summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/Kconfig6
-rw-r--r--net/6lowpan/Makefile3
-rw-r--r--net/6lowpan/iphc.c (renamed from net/ieee802154/6lowpan_iphc.c)296
-rw-r--r--net/802/fc.c2
-rw-r--r--net/802/fddi.c3
-rw-r--r--net/802/hippi.c3
-rw-r--r--net/8021q/vlan.c24
-rw-r--r--net/8021q/vlan_core.c53
-rw-r--r--net/8021q/vlan_dev.c5
-rw-r--r--net/8021q/vlanproc.c2
-rw-r--r--net/9p/client.c1
-rw-r--r--net/Kconfig9
-rw-r--r--net/Makefile3
-rw-r--r--net/appletalk/ddp.c2
-rw-r--r--net/appletalk/dev.c3
-rw-r--r--net/atm/br2684.c4
-rw-r--r--net/atm/clip.c9
-rw-r--r--net/atm/common.c2
-rw-r--r--net/atm/lec.c7
-rw-r--r--net/atm/mpc.c2
-rw-r--r--net/atm/svc.c60
-rw-r--r--net/batman-adv/bat_iv_ogm.c13
-rw-r--r--net/batman-adv/distributed-arp-table.c3
-rw-r--r--net/batman-adv/fragmentation.c10
-rw-r--r--net/batman-adv/hash.c6
-rw-r--r--net/batman-adv/main.h22
-rw-r--r--net/batman-adv/multicast.c1
-rw-r--r--net/batman-adv/routing.c18
-rw-r--r--net/batman-adv/soft-interface.c4
-rw-r--r--net/batman-adv/sysfs.c24
-rw-r--r--net/bluetooth/6lowpan.c1058
-rw-r--r--net/bluetooth/6lowpan.h47
-rw-r--r--net/bluetooth/Kconfig7
-rw-r--r--net/bluetooth/Makefile4
-rw-r--r--net/bluetooth/a2mp.c8
-rw-r--r--net/bluetooth/af_bluetooth.c5
-rw-r--r--net/bluetooth/amp.c19
-rw-r--r--net/bluetooth/bnep/core.c5
-rw-r--r--net/bluetooth/cmtp/capi.c6
-rw-r--r--net/bluetooth/hci_conn.c253
-rw-r--r--net/bluetooth/hci_core.c1308
-rw-r--r--net/bluetooth/hci_event.c856
-rw-r--r--net/bluetooth/hci_sock.c33
-rw-r--r--net/bluetooth/hidp/core.c12
-rw-r--r--net/bluetooth/l2cap_core.c570
-rw-r--r--net/bluetooth/l2cap_sock.c76
-rw-r--r--net/bluetooth/lib.c14
-rw-r--r--net/bluetooth/mgmt.c1488
-rw-r--r--net/bluetooth/rfcomm/core.c10
-rw-r--r--net/bluetooth/rfcomm/sock.c3
-rw-r--r--net/bluetooth/sco.c125
-rw-r--r--net/bluetooth/smp.c1089
-rw-r--r--net/bluetooth/smp.h27
-rw-r--r--net/bridge/Makefile4
-rw-r--r--net/bridge/br.c14
-rw-r--r--net/bridge/br_device.c12
-rw-r--r--net/bridge/br_fdb.c26
-rw-r--r--net/bridge/br_forward.c2
-rw-r--r--net/bridge/br_if.c22
-rw-r--r--net/bridge/br_input.c1
-rw-r--r--net/bridge/br_multicast.c41
-rw-r--r--net/bridge/br_netfilter.c132
-rw-r--r--net/bridge/br_netlink.c117
-rw-r--r--net/bridge/br_nf_core.c96
-rw-r--r--net/bridge/br_private.h45
-rw-r--r--net/bridge/br_stp.c15
-rw-r--r--net/bridge/br_stp_if.c4
-rw-r--r--net/bridge/br_stp_timer.c4
-rw-r--r--net/bridge/br_sysfs_br.c21
-rw-r--r--net/bridge/br_vlan.c171
-rw-r--r--net/bridge/netfilter/Kconfig25
-rw-r--r--net/bridge/netfilter/Makefile5
-rw-r--r--net/bridge/netfilter/ebt_log.c47
-rw-r--r--net/bridge/netfilter/ebt_ulog.c393
-rw-r--r--net/bridge/netfilter/ebtables.c25
-rw-r--r--net/bridge/netfilter/nf_log_bridge.c96
-rw-r--r--net/bridge/netfilter/nf_tables_bridge.c2
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c152
-rw-r--r--net/caif/caif_socket.c3
-rw-r--r--net/caif/cfctrl.c1
-rw-r--r--net/ceph/auth_x.c256
-rw-r--r--net/ceph/messenger.c47
-rw-r--r--net/ceph/mon_client.c8
-rw-r--r--net/ceph/osd_client.c129
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c603
-rw-r--r--net/core/dev_ioctl.c7
-rw-r--r--net/core/drop_monitor.c2
-rw-r--r--net/core/ethtool.c82
-rw-r--r--net/core/filter.c772
-rw-r--r--net/core/flow_dissector.c214
-rw-r--r--net/core/gen_estimator.c31
-rw-r--r--net/core/gen_stats.c114
-rw-r--r--net/core/net-sysfs.c154
-rw-r--r--net/core/net_namespace.c12
-rw-r--r--net/core/netclassid_cgroup.c2
-rw-r--r--net/core/netpoll.c10
-rw-r--r--net/core/netprio_cgroup.c2
-rw-r--r--net/core/pktgen.c187
-rw-r--r--net/core/ptp_classifier.c70
-rw-r--r--net/core/request_sock.c43
-rw-r--r--net/core/rtnetlink.c176
-rw-r--r--net/core/secure_seq.c6
-rw-r--r--net/core/skbuff.c496
-rw-r--r--net/core/sock.c198
-rw-r--r--net/core/sock_diag.c4
-rw-r--r--net/core/timestamping.c84
-rw-r--r--net/core/user_dma.c131
-rw-r--r--net/core/utils.c12
-rw-r--r--net/dcb/dcbnl.c13
-rw-r--r--net/dccp/ccid.c2
-rw-r--r--net/dccp/ipv6.c4
-rw-r--r--net/dccp/minisocks.c4
-rw-r--r--net/dccp/proto.c8
-rw-r--r--net/decnet/af_decnet.c3
-rw-r--r--net/decnet/dn_dev.c3
-rw-r--r--net/decnet/dn_timer.c3
-rw-r--r--net/dsa/Kconfig3
-rw-r--r--net/dsa/Makefile1
-rw-r--r--net/dsa/dsa.c189
-rw-r--r--net/dsa/dsa_priv.h29
-rw-r--r--net/dsa/slave.c308
-rw-r--r--net/dsa/tag_brcm.c171
-rw-r--r--net/dsa/tag_dsa.c9
-rw-r--r--net/dsa/tag_edsa.c9
-rw-r--r--net/dsa/tag_trailer.c9
-rw-r--r--net/ethernet/eth.c37
-rw-r--r--net/hsr/Makefile3
-rw-r--r--net/hsr/hsr_device.c580
-rw-r--r--net/hsr/hsr_device.h12
-rw-r--r--net/hsr/hsr_forward.c368
-rw-r--r--net/hsr/hsr_forward.h20
-rw-r--r--net/hsr/hsr_framereg.c477
-rw-r--r--net/hsr/hsr_framereg.h45
-rw-r--r--net/hsr/hsr_main.c425
-rw-r--r--net/hsr/hsr_main.h61
-rw-r--r--net/hsr/hsr_netlink.c102
-rw-r--r--net/hsr/hsr_netlink.h11
-rw-r--r--net/hsr/hsr_slave.c196
-rw-r--r--net/hsr/hsr_slave.h38
-rw-r--r--net/ieee802154/6lowpan_rtnl.c145
-rw-r--r--net/ieee802154/Kconfig9
-rw-r--r--net/ieee802154/Makefile5
-rw-r--r--net/ieee802154/af_ieee802154.c26
-rw-r--r--net/ieee802154/dgram.c28
-rw-r--r--net/ieee802154/ieee802154.h2
-rw-r--r--net/ieee802154/netlink.c4
-rw-r--r--net/ieee802154/nl-mac.c48
-rw-r--r--net/ieee802154/nl-phy.c23
-rw-r--r--net/ieee802154/raw.c14
-rw-r--r--net/ieee802154/reassembly.c88
-rw-r--r--net/ieee802154/wpan-class.c10
-rw-r--r--net/ipv4/Kconfig55
-rw-r--r--net/ipv4/Makefile4
-rw-r--r--net/ipv4/af_inet.c47
-rw-r--r--net/ipv4/ah4.c2
-rw-r--r--net/ipv4/arp.c6
-rw-r--r--net/ipv4/cipso_ipv4.c2
-rw-r--r--net/ipv4/datagram.c1
-rw-r--r--net/ipv4/devinet.c36
-rw-r--r--net/ipv4/fib_frontend.c14
-rw-r--r--net/ipv4/fib_semantics.c8
-rw-r--r--net/ipv4/fib_trie.c2
-rw-r--r--net/ipv4/fou.c514
-rw-r--r--net/ipv4/geneve.c373
-rw-r--r--net/ipv4/gre_demux.c9
-rw-r--r--net/ipv4/gre_offload.c57
-rw-r--r--net/ipv4/icmp.c70
-rw-r--r--net/ipv4/igmp.c37
-rw-r--r--net/ipv4/inet_fragment.c318
-rw-r--r--net/ipv4/inet_hashtables.c2
-rw-r--r--net/ipv4/inetpeer.c21
-rw-r--r--net/ipv4/ip_fragment.c91
-rw-r--r--net/ipv4/ip_gre.c94
-rw-r--r--net/ipv4/ip_options.c6
-rw-r--r--net/ipv4/ip_output.c23
-rw-r--r--net/ipv4/ip_sockglue.c21
-rw-r--r--net/ipv4/ip_tunnel.c148
-rw-r--r--net/ipv4/ip_vti.c56
-rw-r--r--net/ipv4/ipconfig.c8
-rw-r--r--net/ipv4/ipip.c82
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/netfilter/Kconfig168
-rw-r--r--net/ipv4/netfilter/Makefile12
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c2
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c108
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c2
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c498
-rw-r--r--net/ipv4/netfilter/iptable_nat.c233
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c6
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c4
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c10
-rw-r--r--net/ipv4/netfilter/nf_log_arp.c149
-rw-r--r--net/ipv4/netfilter/nf_log_ipv4.c385
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c203
-rw-r--r--net/ipv4/netfilter/nf_nat_masquerade_ipv4.c153
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_icmp.c2
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c127
-rw-r--r--net/ipv4/netfilter/nft_chain_nat_ipv4.c157
-rw-r--r--net/ipv4/netfilter/nft_masq_ipv4.c77
-rw-r--r--net/ipv4/netfilter/nft_reject_ipv4.c1
-rw-r--r--net/ipv4/ping.c4
-rw-r--r--net/ipv4/proc.c5
-rw-r--r--net/ipv4/protocol.c1
-rw-r--r--net/ipv4/raw.c9
-rw-r--r--net/ipv4/route.c24
-rw-r--r--net/ipv4/syncookies.c5
-rw-r--r--net/ipv4/sysctl_net_ipv4.c49
-rw-r--r--net/ipv4/tcp.c227
-rw-r--r--net/ipv4/tcp_bic.c11
-rw-r--r--net/ipv4/tcp_cong.c55
-rw-r--r--net/ipv4/tcp_cubic.c18
-rw-r--r--net/ipv4/tcp_dctcp.c344
-rw-r--r--net/ipv4/tcp_diag.c5
-rw-r--r--net/ipv4/tcp_fastopen.c2
-rw-r--r--net/ipv4/tcp_highspeed.c145
-rw-r--r--net/ipv4/tcp_htcp.c6
-rw-r--r--net/ipv4/tcp_hybla.c1
-rw-r--r--net/ipv4/tcp_illinois.c3
-rw-r--r--net/ipv4/tcp_input.c573
-rw-r--r--net/ipv4/tcp_ipv4.c328
-rw-r--r--net/ipv4/tcp_memcontrol.c4
-rw-r--r--net/ipv4/tcp_metrics.c7
-rw-r--r--net/ipv4/tcp_minisocks.c15
-rw-r--r--net/ipv4/tcp_offload.c90
-rw-r--r--net/ipv4/tcp_output.c162
-rw-r--r--net/ipv4/tcp_probe.c6
-rw-r--r--net/ipv4/tcp_scalable.c2
-rw-r--r--net/ipv4/tcp_timer.c56
-rw-r--r--net/ipv4/tcp_vegas.c6
-rw-r--r--net/ipv4/tcp_veno.c3
-rw-r--r--net/ipv4/tcp_westwood.c35
-rw-r--r--net/ipv4/tcp_yeah.c9
-rw-r--r--net/ipv4/udp.c169
-rw-r--r--net/ipv4/udp_offload.c219
-rw-r--r--net/ipv4/udp_tunnel.c108
-rw-r--r--net/ipv4/xfrm4_protocol.c2
-rw-r--r--net/ipv6/Makefile4
-rw-r--r--net/ipv6/addrconf.c205
-rw-r--r--net/ipv6/addrconf_core.c7
-rw-r--r--net/ipv6/addrlabel.c2
-rw-r--r--net/ipv6/af_inet6.c27
-rw-r--r--net/ipv6/ah6.c23
-rw-r--r--net/ipv6/anycast.c133
-rw-r--r--net/ipv6/datagram.c24
-rw-r--r--net/ipv6/esp6.c15
-rw-r--r--net/ipv6/exthdrs.c2
-rw-r--r--net/ipv6/icmp.c47
-rw-r--r--net/ipv6/inet6_connection_sock.c6
-rw-r--r--net/ipv6/inet6_hashtables.c7
-rw-r--r--net/ipv6/ip6_fib.c138
-rw-r--r--net/ipv6/ip6_flowlabel.c19
-rw-r--r--net/ipv6/ip6_gre.c30
-rw-r--r--net/ipv6/ip6_icmp.c2
-rw-r--r--net/ipv6/ip6_input.c6
-rw-r--r--net/ipv6/ip6_offload.c34
-rw-r--r--net/ipv6/ip6_output.c63
-rw-r--r--net/ipv6/ip6_tunnel.c48
-rw-r--r--net/ipv6/ip6_udp_tunnel.c107
-rw-r--r--net/ipv6/ip6_vti.c63
-rw-r--r--net/ipv6/ip6mr.c6
-rw-r--r--net/ipv6/ipcomp6.c6
-rw-r--r--net/ipv6/ipv6_sockglue.c39
-rw-r--r--net/ipv6/mcast.c316
-rw-r--r--net/ipv6/mip6.c10
-rw-r--r--net/ipv6/ndisc.c75
-rw-r--r--net/ipv6/netfilter/Kconfig64
-rw-r--r--net/ipv6/netfilter/Makefile10
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c76
-rw-r--r--net/ipv6/netfilter/ip6t_ipv6header.c1
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c233
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c49
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c2
-rw-r--r--net/ipv6/netfilter/nf_log_ipv6.c417
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c203
-rw-r--r--net/ipv6/netfilter/nf_nat_masquerade_ipv6.c120
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c163
-rw-r--r--net/ipv6/netfilter/nft_chain_nat_ipv6.c165
-rw-r--r--net/ipv6/netfilter/nft_masq_ipv6.c77
-rw-r--r--net/ipv6/output_core.c2
-rw-r--r--net/ipv6/proc.c6
-rw-r--r--net/ipv6/protocol.c1
-rw-r--r--net/ipv6/raw.c22
-rw-r--r--net/ipv6/reassembly.c102
-rw-r--r--net/ipv6/route.c26
-rw-r--r--net/ipv6/sit.c133
-rw-r--r--net/ipv6/syncookies.c6
-rw-r--r--net/ipv6/sysctl_net_ipv6.c19
-rw-r--r--net/ipv6/tcp_ipv6.c278
-rw-r--r--net/ipv6/tcpv6_offload.c69
-rw-r--r--net/ipv6/tunnel6.c4
-rw-r--r--net/ipv6/udp.c157
-rw-r--r--net/ipv6/udp_offload.c92
-rw-r--r--net/ipv6/xfrm6_input.c6
-rw-r--r--net/ipv6/xfrm6_output.c1
-rw-r--r--net/ipv6/xfrm6_policy.c22
-rw-r--r--net/ipv6/xfrm6_state.c14
-rw-r--r--net/ipv6/xfrm6_tunnel.c4
-rw-r--r--net/irda/af_irda.c1
-rw-r--r--net/irda/ircomm/ircomm_tty.c6
-rw-r--r--net/irda/irda_device.c3
-rw-r--r--net/irda/irlan/irlan_common.c5
-rw-r--r--net/irda/irlan/irlan_eth.c2
-rw-r--r--net/irda/irlap_frame.c2
-rw-r--r--net/irda/irlmp.c3
-rw-r--r--net/iucv/af_iucv.c4
-rw-r--r--net/iucv/iucv.c9
-rw-r--r--net/key/af_key.c3
-rw-r--r--net/l2tp/Kconfig1
-rw-r--r--net/l2tp/l2tp_core.c110
-rw-r--r--net/l2tp/l2tp_eth.c3
-rw-r--r--net/l2tp/l2tp_ip6.c2
-rw-r--r--net/l2tp/l2tp_ppp.c3
-rw-r--r--net/llc/af_llc.c10
-rw-r--r--net/mac80211/Kconfig17
-rw-r--r--net/mac80211/Makefile6
-rw-r--r--net/mac80211/agg-rx.c111
-rw-r--r--net/mac80211/agg-tx.c8
-rw-r--r--net/mac80211/cfg.c672
-rw-r--r--net/mac80211/chan.c1016
-rw-r--r--net/mac80211/debugfs.c6
-rw-r--r--net/mac80211/debugfs_netdev.c4
-rw-r--r--net/mac80211/debugfs_sta.c9
-rw-r--r--net/mac80211/driver-ops.h22
-rw-r--r--net/mac80211/ethtool.c244
-rw-r--r--net/mac80211/ht.c10
-rw-r--r--net/mac80211/ibss.c18
-rw-r--r--net/mac80211/ieee80211_i.h117
-rw-r--r--net/mac80211/iface.c39
-rw-r--r--net/mac80211/key.c18
-rw-r--r--net/mac80211/main.c11
-rw-r--r--net/mac80211/mesh.c4
-rw-r--r--net/mac80211/mesh_hwmp.c1
-rw-r--r--net/mac80211/mesh_pathtbl.c4
-rw-r--r--net/mac80211/mesh_plink.c51
-rw-r--r--net/mac80211/mlme.c321
-rw-r--r--net/mac80211/offchannel.c6
-rw-r--r--net/mac80211/pm.c6
-rw-r--r--net/mac80211/rate.h13
-rw-r--r--net/mac80211/rc80211_minstrel.c98
-rw-r--r--net/mac80211/rc80211_minstrel.h43
-rw-r--r--net/mac80211/rc80211_minstrel_debugfs.c19
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c303
-rw-r--r--net/mac80211/rc80211_minstrel_ht.h41
-rw-r--r--net/mac80211/rc80211_minstrel_ht_debugfs.c10
-rw-r--r--net/mac80211/rc80211_pid.h278
-rw-r--r--net/mac80211/rc80211_pid_algo.c478
-rw-r--r--net/mac80211/rc80211_pid_debugfs.c228
-rw-r--r--net/mac80211/rx.c80
-rw-r--r--net/mac80211/scan.c121
-rw-r--r--net/mac80211/sta_info.c214
-rw-r--r--net/mac80211/sta_info.h30
-rw-r--r--net/mac80211/status.c29
-rw-r--r--net/mac80211/tdls.c677
-rw-r--r--net/mac80211/trace.h11
-rw-r--r--net/mac80211/tx.c202
-rw-r--r--net/mac80211/util.c311
-rw-r--r--net/mac80211/vht.c4
-rw-r--r--net/mac80211/wep.c20
-rw-r--r--net/mac80211/wme.c5
-rw-r--r--net/mac80211/wpa.c9
-rw-r--r--net/mac802154/ieee802154_dev.c70
-rw-r--r--net/mac802154/llsec.c1
-rw-r--r--net/mac802154/mib.c7
-rw-r--r--net/mac802154/rx.c5
-rw-r--r--net/mac802154/tx.c16
-rw-r--r--net/mac802154/wpan.c18
-rw-r--r--net/mpls/mpls_gso.c7
-rw-r--r--net/netfilter/Kconfig26
-rw-r--r--net/netfilter/Makefile6
-rw-r--r--net/netfilter/core.c17
-rw-r--r--net/netfilter/ipset/Kconfig9
-rw-r--r--net/netfilter/ipset/Makefile1
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h4
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c15
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c15
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c15
-rw-r--r--net/netfilter/ipset/ip_set_core.c55
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h30
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c22
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c14
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c22
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c22
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c14
-rw-r--r--net/netfilter/ipset/ip_set_hash_mac.c173
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c16
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c20
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c29
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c16
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c22
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c23
-rw-r--r--net/netfilter/ipvs/Kconfig10
-rw-r--r--net/netfilter/ipvs/Makefile1
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c74
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c17
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c328
-rw-r--r--net/netfilter/ipvs/ip_vs_dh.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_fo.c79
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_lc.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_nq.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_rr.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sed.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c16
-rw-r--r--net/netfilter/ipvs/ip_vs_wlc.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_wrr.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c394
-rw-r--r--net/netfilter/nf_conntrack_core.c72
-rw-r--r--net/netfilter/nf_conntrack_ecache.c96
-rw-r--r--net/netfilter/nf_conntrack_expect.c3
-rw-r--r--net/netfilter/nf_conntrack_netlink.c5
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c26
-rw-r--r--net/netfilter/nf_conntrack_standalone.c2
-rw-r--r--net/netfilter/nf_log.c155
-rw-r--r--net/netfilter/nf_log_common.c187
-rw-r--r--net/netfilter/nf_nat_core.c7
-rw-r--r--net/netfilter/nf_nat_proto_common.c2
-rw-r--r--net/netfilter/nf_nat_proto_dccp.c2
-rw-r--r--net/netfilter/nf_nat_proto_sctp.c2
-rw-r--r--net/netfilter/nf_nat_proto_tcp.c2
-rw-r--r--net/netfilter/nf_nat_proto_udp.c2
-rw-r--r--net/netfilter/nf_nat_proto_udplite.c2
-rw-r--r--net/netfilter/nf_queue.c4
-rw-r--r--net/netfilter/nf_sockopt.c8
-rw-r--r--net/netfilter/nf_tables_api.c769
-rw-r--r--net/netfilter/nfnetlink.c70
-rw-r--r--net/netfilter/nfnetlink_acct.c66
-rw-r--r--net/netfilter/nfnetlink_log.c12
-rw-r--r--net/netfilter/nfnetlink_queue_core.c12
-rw-r--r--net/netfilter/nft_compat.c116
-rw-r--r--net/netfilter/nft_hash.c287
-rw-r--r--net/netfilter/nft_log.c98
-rw-r--r--net/netfilter/nft_masq.c59
-rw-r--r--net/netfilter/nft_meta.c45
-rw-r--r--net/netfilter/nft_nat.c16
-rw-r--r--net/netfilter/nft_rbtree.c2
-rw-r--r--net/netfilter/nft_reject.c35
-rw-r--r--net/netfilter/nft_reject_inet.c94
-rw-r--r--net/netfilter/x_tables.c100
-rw-r--r--net/netfilter/xt_HMARK.c2
-rw-r--r--net/netfilter/xt_LED.c14
-rw-r--r--net/netfilter/xt_LOG.c884
-rw-r--r--net/netfilter/xt_RATEEST.c2
-rw-r--r--net/netfilter/xt_bpf.c6
-rw-r--r--net/netfilter/xt_cgroup.c2
-rw-r--r--net/netfilter/xt_cluster.c3
-rw-r--r--net/netfilter/xt_connbytes.c2
-rw-r--r--net/netfilter/xt_hashlimit.c35
-rw-r--r--net/netfilter/xt_physdev.c3
-rw-r--r--net/netfilter/xt_set.c191
-rw-r--r--net/netfilter/xt_string.c1
-rw-r--r--net/netlabel/netlabel_kapi.c2
-rw-r--r--net/netlabel/netlabel_user.c6
-rw-r--r--net/netlink/af_netlink.c313
-rw-r--r--net/netlink/af_netlink.h19
-rw-r--r--net/netlink/diag.c14
-rw-r--r--net/netrom/af_netrom.c2
-rw-r--r--net/nfc/digital.h3
-rw-r--r--net/nfc/digital_core.c27
-rw-r--r--net/nfc/digital_dep.c114
-rw-r--r--net/nfc/digital_technology.c96
-rw-r--r--net/nfc/hci/core.c7
-rw-r--r--net/nfc/nci/core.c21
-rw-r--r--net/nfc/nci/data.c7
-rw-r--r--net/nfc/nci/ntf.c44
-rw-r--r--net/openvswitch/Kconfig11
-rw-r--r--net/openvswitch/Makefile4
-rw-r--r--net/openvswitch/actions.c268
-rw-r--r--net/openvswitch/datapath.c171
-rw-r--r--net/openvswitch/datapath.h25
-rw-r--r--net/openvswitch/flow.c123
-rw-r--r--net/openvswitch/flow.h54
-rw-r--r--net/openvswitch/flow_netlink.c292
-rw-r--r--net/openvswitch/flow_netlink.h4
-rw-r--r--net/openvswitch/vport-geneve.c235
-rw-r--r--net/openvswitch/vport-gre.c33
-rw-r--r--net/openvswitch/vport-internal_dev.c24
-rw-r--r--net/openvswitch/vport-internal_dev.h2
-rw-r--r--net/openvswitch/vport-vxlan.c32
-rw-r--r--net/openvswitch/vport.c148
-rw-r--r--net/openvswitch/vport.h41
-rw-r--r--net/packet/af_packet.c43
-rw-r--r--net/packet/internal.h1
-rw-r--r--net/phonet/pep-gprs.c2
-rw-r--r--net/phonet/pn_dev.c6
-rw-r--r--net/rds/af_rds.c2
-rw-r--r--net/rds/send.c11
-rw-r--r--net/rds/tcp_connect.c5
-rw-r--r--net/rds/threads.c3
-rw-r--r--net/rfkill/core.c4
-rw-r--r--net/rfkill/rfkill-gpio.c4
-rw-r--r--net/rose/af_rose.c2
-rw-r--r--net/rose/rose_link.c3
-rw-r--r--net/rxrpc/ar-error.c14
-rw-r--r--net/rxrpc/ar-input.c9
-rw-r--r--net/rxrpc/ar-key.c4
-rw-r--r--net/sched/act_api.c9
-rw-r--r--net/sched/act_mirred.c2
-rw-r--r--net/sched/act_police.c6
-rw-r--r--net/sched/cls_api.c44
-rw-r--r--net/sched/cls_basic.c89
-rw-r--r--net/sched/cls_bpf.c110
-rw-r--r--net/sched/cls_cgroup.c79
-rw-r--r--net/sched/cls_flow.c151
-rw-r--r--net/sched/cls_fw.c120
-rw-r--r--net/sched/cls_route.c241
-rw-r--r--net/sched/cls_rsvp.h208
-rw-r--r--net/sched/cls_tcindex.c275
-rw-r--r--net/sched/cls_u32.c407
-rw-r--r--net/sched/em_canid.c11
-rw-r--r--net/sched/em_ipset.c7
-rw-r--r--net/sched/em_meta.c4
-rw-r--r--net/sched/em_nbyte.c2
-rw-r--r--net/sched/em_text.c4
-rw-r--r--net/sched/ematch.c17
-rw-r--r--net/sched/sch_api.c65
-rw-r--r--net/sched/sch_atm.c28
-rw-r--r--net/sched/sch_cbq.c83
-rw-r--r--net/sched/sch_choke.c47
-rw-r--r--net/sched/sch_codel.c2
-rw-r--r--net/sched/sch_drr.c27
-rw-r--r--net/sched/sch_dsmark.c11
-rw-r--r--net/sched/sch_fifo.c2
-rw-r--r--net/sched/sch_fq.c14
-rw-r--r--net/sched/sch_fq_codel.c24
-rw-r--r--net/sched/sch_generic.c96
-rw-r--r--net/sched/sch_gred.c4
-rw-r--r--net/sched/sch_hfsc.c32
-rw-r--r--net/sched/sch_hhf.c8
-rw-r--r--net/sched/sch_htb.c48
-rw-r--r--net/sched/sch_ingress.c10
-rw-r--r--net/sched/sch_mq.c6
-rw-r--r--net/sched/sch_mqprio.c20
-rw-r--r--net/sched/sch_multiq.c17
-rw-r--r--net/sched/sch_netem.c15
-rw-r--r--net/sched/sch_pie.c2
-rw-r--r--net/sched/sch_prio.c20
-rw-r--r--net/sched/sch_qfq.c25
-rw-r--r--net/sched/sch_red.c8
-rw-r--r--net/sched/sch_sfb.c25
-rw-r--r--net/sched/sch_sfq.c35
-rw-r--r--net/sched/sch_tbf.c17
-rw-r--r--net/sched/sch_teql.c24
-rw-r--r--net/sctp/Makefile2
-rw-r--r--net/sctp/associola.c12
-rw-r--r--net/sctp/command.c68
-rw-r--r--net/sctp/input.c13
-rw-r--r--net/sctp/ipv6.c156
-rw-r--r--net/sctp/output.c73
-rw-r--r--net/sctp/outqueue.c5
-rw-r--r--net/sctp/protocol.c16
-rw-r--r--net/sctp/sm_statefuns.c23
-rw-r--r--net/sctp/socket.c329
-rw-r--r--net/sctp/sysctl.c5
-rw-r--r--net/sctp/transport.c21
-rw-r--r--net/sctp/ulpevent.c65
-rw-r--r--net/socket.c63
-rw-r--r--net/sunrpc/addr.c16
-rw-r--r--net/sunrpc/auth.c68
-rw-r--r--net/sunrpc/auth_generic.c6
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c126
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c9
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seal.c28
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c20
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c2
-rw-r--r--net/sunrpc/auth_null.c2
-rw-r--r--net/sunrpc/clnt.c10
-rw-r--r--net/sunrpc/rpc_pipe.c2
-rw-r--r--net/sunrpc/sched.c6
-rw-r--r--net/sunrpc/svc.c6
-rw-r--r--net/sunrpc/svc_xprt.c108
-rw-r--r--net/sunrpc/svcsock.c75
-rw-r--r--net/sunrpc/xdr.c3
-rw-r--r--net/sunrpc/xprt.c3
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c83
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c28
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c39
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c22
-rw-r--r--net/sunrpc/xprtrdma/transport.c19
-rw-r--r--net/sunrpc/xprtrdma/verbs.c739
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h68
-rw-r--r--net/sunrpc/xprtsock.c130
-rw-r--r--net/tipc/Makefile2
-rw-r--r--net/tipc/bcast.c107
-rw-r--r--net/tipc/bcast.h7
-rw-r--r--net/tipc/config.c4
-rw-r--r--net/tipc/core.c9
-rw-r--r--net/tipc/core.h6
-rw-r--r--net/tipc/link.c882
-rw-r--r--net/tipc/link.h14
-rw-r--r--net/tipc/msg.c405
-rw-r--r--net/tipc/msg.h40
-rw-r--r--net/tipc/name_distr.c216
-rw-r--r--net/tipc/name_distr.h3
-rw-r--r--net/tipc/name_table.c9
-rw-r--r--net/tipc/net.c66
-rw-r--r--net/tipc/net.h2
-rw-r--r--net/tipc/node.c133
-rw-r--r--net/tipc/node.h25
-rw-r--r--net/tipc/node_subscr.c6
-rw-r--r--net/tipc/port.c898
-rw-r--r--net/tipc/port.h237
-rw-r--r--net/tipc/ref.c266
-rw-r--r--net/tipc/ref.h48
-rw-r--r--net/tipc/socket.c1383
-rw-r--r--net/tipc/socket.h41
-rw-r--r--net/tipc/subscr.c1
-rw-r--r--net/tipc/sysctl.c7
-rw-r--r--net/unix/garbage.c2
-rw-r--r--net/wimax/id-table.c2
-rw-r--r--net/wimax/op-msg.c9
-rw-r--r--net/wimax/op-reset.c3
-rw-r--r--net/wimax/op-rfkill.c3
-rw-r--r--net/wimax/op-state-get.c3
-rw-r--r--net/wimax/stack.c7
-rw-r--r--net/wimax/wimax-internal.h6
-rw-r--r--net/wireless/Kconfig6
-rw-r--r--net/wireless/chan.c1
-rw-r--r--net/wireless/core.c19
-rw-r--r--net/wireless/ethtool.c86
-rw-r--r--net/wireless/ethtool.h6
-rw-r--r--net/wireless/genregdb.awk35
-rw-r--r--net/wireless/ibss.c4
-rw-r--r--net/wireless/mlme.c8
-rw-r--r--net/wireless/nl80211.c273
-rw-r--r--net/wireless/nl80211.h3
-rw-r--r--net/wireless/rdev-ops.h85
-rw-r--r--net/wireless/reg.c82
-rw-r--r--net/wireless/scan.c22
-rw-r--r--net/wireless/sme.c6
-rw-r--r--net/wireless/trace.h104
-rw-r--r--net/wireless/util.c3
-rw-r--r--net/wireless/wext-compat.c2
-rw-r--r--net/wireless/wext-sme.c2
-rw-r--r--net/xfrm/xfrm_hash.h76
-rw-r--r--net/xfrm/xfrm_output.c6
-rw-r--r--net/xfrm/xfrm_policy.c196
-rw-r--r--net/xfrm/xfrm_state.c13
-rw-r--r--net/xfrm/xfrm_user.c83
646 files changed, 31189 insertions, 20935 deletions
diff --git a/net/6lowpan/Kconfig b/net/6lowpan/Kconfig
new file mode 100644
index 000000000000..e4a02ef55102
--- /dev/null
+++ b/net/6lowpan/Kconfig
@@ -0,0 +1,6 @@
+config 6LOWPAN
+ tristate "6LoWPAN Support"
+ depends on IPV6
+ ---help---
+ This enables IPv6 over Low power Wireless Personal Area Network -
+ "6LoWPAN" which is supported by IEEE 802.15.4 or Bluetooth stacks.
diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile
new file mode 100644
index 000000000000..415886bb456a
--- /dev/null
+++ b/net/6lowpan/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_6LOWPAN) := 6lowpan.o
+
+6lowpan-y := iphc.o
diff --git a/net/ieee802154/6lowpan_iphc.c b/net/6lowpan/iphc.c
index 211b5686d719..142eef55c9e2 100644
--- a/net/ieee802154/6lowpan_iphc.c
+++ b/net/6lowpan/iphc.c
@@ -3,8 +3,7 @@
* written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
*/
-/*
- * Based on patches from Jon Smirl <jonsmirl@gmail.com>
+/* Based on patches from Jon Smirl <jonsmirl@gmail.com>
* Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
@@ -58,16 +57,15 @@
#include <net/ipv6.h>
#include <net/af_ieee802154.h>
-/*
- * Uncompress address function for source and
+/* Uncompress address function for source and
* destination address(non-multicast).
*
* address_mode is sam value or dam value.
*/
static int uncompress_addr(struct sk_buff *skb,
- struct in6_addr *ipaddr, const u8 address_mode,
- const u8 *lladdr, const u8 addr_type,
- const u8 addr_len)
+ struct in6_addr *ipaddr, const u8 address_mode,
+ const u8 *lladdr, const u8 addr_type,
+ const u8 addr_len)
{
bool fail;
@@ -140,13 +138,12 @@ static int uncompress_addr(struct sk_buff *skb,
return 0;
}
-/*
- * Uncompress address function for source context
+/* Uncompress address function for source context
* based address(non-multicast).
*/
static int uncompress_context_based_src_addr(struct sk_buff *skb,
- struct in6_addr *ipaddr,
- const u8 sam)
+ struct in6_addr *ipaddr,
+ const u8 sam)
{
switch (sam) {
case LOWPAN_IPHC_ADDR_00:
@@ -175,13 +172,13 @@ static int uncompress_context_based_src_addr(struct sk_buff *skb,
}
static int skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr,
- struct net_device *dev, skb_delivery_cb deliver_skb)
+ struct net_device *dev, skb_delivery_cb deliver_skb)
{
struct sk_buff *new;
int stat;
new = skb_copy_expand(skb, sizeof(struct ipv6hdr), skb_tailroom(skb),
- GFP_ATOMIC);
+ GFP_ATOMIC);
kfree_skb(skb);
if (!new)
@@ -196,7 +193,7 @@ static int skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr,
new->dev = dev;
raw_dump_table(__func__, "raw skb data dump before receiving",
- new->data, new->len);
+ new->data, new->len);
stat = deliver_skb(new, dev);
@@ -208,10 +205,9 @@ static int skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr,
/* Uncompress function for multicast destination address,
* when M bit is set.
*/
-static int
-lowpan_uncompress_multicast_daddr(struct sk_buff *skb,
- struct in6_addr *ipaddr,
- const u8 dam)
+static int lowpan_uncompress_multicast_daddr(struct sk_buff *skb,
+ struct in6_addr *ipaddr,
+ const u8 dam)
{
bool fail;
@@ -257,41 +253,41 @@ lowpan_uncompress_multicast_daddr(struct sk_buff *skb,
}
raw_dump_inline(NULL, "Reconstructed ipv6 multicast addr is",
- ipaddr->s6_addr, 16);
+ ipaddr->s6_addr, 16);
return 0;
}
-static int
-uncompress_udp_header(struct sk_buff *skb, struct udphdr *uh)
+static int uncompress_udp_header(struct sk_buff *skb, struct udphdr *uh)
{
bool fail;
u8 tmp = 0, val = 0;
- if (!uh)
- goto err;
-
- fail = lowpan_fetch_skb(skb, &tmp, 1);
+ fail = lowpan_fetch_skb(skb, &tmp, sizeof(tmp));
if ((tmp & LOWPAN_NHC_UDP_MASK) == LOWPAN_NHC_UDP_ID) {
pr_debug("UDP header uncompression\n");
switch (tmp & LOWPAN_NHC_UDP_CS_P_11) {
case LOWPAN_NHC_UDP_CS_P_00:
- fail |= lowpan_fetch_skb(skb, &uh->source, 2);
- fail |= lowpan_fetch_skb(skb, &uh->dest, 2);
+ fail |= lowpan_fetch_skb(skb, &uh->source,
+ sizeof(uh->source));
+ fail |= lowpan_fetch_skb(skb, &uh->dest,
+ sizeof(uh->dest));
break;
case LOWPAN_NHC_UDP_CS_P_01:
- fail |= lowpan_fetch_skb(skb, &uh->source, 2);
- fail |= lowpan_fetch_skb(skb, &val, 1);
+ fail |= lowpan_fetch_skb(skb, &uh->source,
+ sizeof(uh->source));
+ fail |= lowpan_fetch_skb(skb, &val, sizeof(val));
uh->dest = htons(val + LOWPAN_NHC_UDP_8BIT_PORT);
break;
case LOWPAN_NHC_UDP_CS_P_10:
- fail |= lowpan_fetch_skb(skb, &val, 1);
+ fail |= lowpan_fetch_skb(skb, &val, sizeof(val));
uh->source = htons(val + LOWPAN_NHC_UDP_8BIT_PORT);
- fail |= lowpan_fetch_skb(skb, &uh->dest, 2);
+ fail |= lowpan_fetch_skb(skb, &uh->dest,
+ sizeof(uh->dest));
break;
case LOWPAN_NHC_UDP_CS_P_11:
- fail |= lowpan_fetch_skb(skb, &val, 1);
+ fail |= lowpan_fetch_skb(skb, &val, sizeof(val));
uh->source = htons(LOWPAN_NHC_UDP_4BIT_PORT +
(val >> 4));
uh->dest = htons(LOWPAN_NHC_UDP_4BIT_PORT +
@@ -300,7 +296,6 @@ uncompress_udp_header(struct sk_buff *skb, struct udphdr *uh)
default:
pr_debug("ERROR: unknown UDP format\n");
goto err;
- break;
}
pr_debug("uncompressed UDP ports: src = %d, dst = %d\n",
@@ -311,11 +306,11 @@ uncompress_udp_header(struct sk_buff *skb, struct udphdr *uh)
pr_debug_ratelimited("checksum elided currently not supported\n");
goto err;
} else {
- fail |= lowpan_fetch_skb(skb, &uh->check, 2);
+ fail |= lowpan_fetch_skb(skb, &uh->check,
+ sizeof(uh->check));
}
- /*
- * UDP lenght needs to be infered from the lower layers
+ /* UDP length needs to be infered from the lower layers
* here, we obtain the hint from the remaining size of the
* frame
*/
@@ -338,21 +333,21 @@ err:
static const u8 lowpan_ttl_values[] = { 0, 1, 64, 255 };
int lowpan_process_data(struct sk_buff *skb, struct net_device *dev,
- const u8 *saddr, const u8 saddr_type, const u8 saddr_len,
- const u8 *daddr, const u8 daddr_type, const u8 daddr_len,
- u8 iphc0, u8 iphc1, skb_delivery_cb deliver_skb)
+ const u8 *saddr, const u8 saddr_type, const u8 saddr_len,
+ const u8 *daddr, const u8 daddr_type, const u8 daddr_len,
+ u8 iphc0, u8 iphc1, skb_delivery_cb deliver_skb)
{
struct ipv6hdr hdr = {};
u8 tmp, num_context = 0;
int err;
raw_dump_table(__func__, "raw skb data dump uncompressed",
- skb->data, skb->len);
+ skb->data, skb->len);
/* another if the CID flag is set */
if (iphc1 & LOWPAN_IPHC_CID) {
pr_debug("CID flag is set, increase header with one\n");
- if (lowpan_fetch_skb_u8(skb, &num_context))
+ if (lowpan_fetch_skb(skb, &num_context, sizeof(num_context)))
goto drop;
}
@@ -360,12 +355,11 @@ int lowpan_process_data(struct sk_buff *skb, struct net_device *dev,
/* Traffic Class and Flow Label */
switch ((iphc0 & LOWPAN_IPHC_TF) >> 3) {
- /*
- * Traffic Class and FLow Label carried in-line
+ /* Traffic Class and FLow Label carried in-line
* ECN + DSCP + 4-bit Pad + Flow Label (4 bytes)
*/
case 0: /* 00b */
- if (lowpan_fetch_skb_u8(skb, &tmp))
+ if (lowpan_fetch_skb(skb, &tmp, sizeof(tmp)))
goto drop;
memcpy(&hdr.flow_lbl, &skb->data[0], 3);
@@ -374,23 +368,21 @@ int lowpan_process_data(struct sk_buff *skb, struct net_device *dev,
hdr.flow_lbl[0] = ((tmp >> 2) & 0x30) | (tmp << 6) |
(hdr.flow_lbl[0] & 0x0f);
break;
- /*
- * Traffic class carried in-line
+ /* Traffic class carried in-line
* ECN + DSCP (1 byte), Flow Label is elided
*/
case 2: /* 10b */
- if (lowpan_fetch_skb_u8(skb, &tmp))
+ if (lowpan_fetch_skb(skb, &tmp, sizeof(tmp)))
goto drop;
hdr.priority = ((tmp >> 2) & 0x0f);
hdr.flow_lbl[0] = ((tmp << 6) & 0xC0) | ((tmp >> 2) & 0x30);
break;
- /*
- * Flow Label carried in-line
+ /* Flow Label carried in-line
* ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided
*/
case 1: /* 01b */
- if (lowpan_fetch_skb_u8(skb, &tmp))
+ if (lowpan_fetch_skb(skb, &tmp, sizeof(tmp)))
goto drop;
hdr.flow_lbl[0] = (skb->data[0] & 0x0F) | ((tmp >> 2) & 0x30);
@@ -407,7 +399,7 @@ int lowpan_process_data(struct sk_buff *skb, struct net_device *dev,
/* Next Header */
if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) {
/* Next header is carried inline */
- if (lowpan_fetch_skb_u8(skb, &(hdr.nexthdr)))
+ if (lowpan_fetch_skb(skb, &hdr.nexthdr, sizeof(hdr.nexthdr)))
goto drop;
pr_debug("NH flag is set, next header carried inline: %02x\n",
@@ -415,10 +407,11 @@ int lowpan_process_data(struct sk_buff *skb, struct net_device *dev,
}
/* Hop Limit */
- if ((iphc0 & 0x03) != LOWPAN_IPHC_TTL_I)
+ if ((iphc0 & 0x03) != LOWPAN_IPHC_TTL_I) {
hdr.hop_limit = lowpan_ttl_values[iphc0 & 0x03];
- else {
- if (lowpan_fetch_skb_u8(skb, &(hdr.hop_limit)))
+ } else {
+ if (lowpan_fetch_skb(skb, &hdr.hop_limit,
+ sizeof(hdr.hop_limit)))
goto drop;
}
@@ -428,13 +421,12 @@ int lowpan_process_data(struct sk_buff *skb, struct net_device *dev,
if (iphc1 & LOWPAN_IPHC_SAC) {
/* Source address context based uncompression */
pr_debug("SAC bit is set. Handle context based source address.\n");
- err = uncompress_context_based_src_addr(
- skb, &hdr.saddr, tmp);
+ err = uncompress_context_based_src_addr(skb, &hdr.saddr, tmp);
} else {
/* Source address uncompression */
pr_debug("source address stateless compression\n");
err = uncompress_addr(skb, &hdr.saddr, tmp, saddr,
- saddr_type, saddr_len);
+ saddr_type, saddr_len);
}
/* Check on error of previous branch */
@@ -450,16 +442,17 @@ int lowpan_process_data(struct sk_buff *skb, struct net_device *dev,
pr_debug("dest: context-based mcast compression\n");
/* TODO: implement this */
} else {
- err = lowpan_uncompress_multicast_daddr(
- skb, &hdr.daddr, tmp);
+ err = lowpan_uncompress_multicast_daddr(skb, &hdr.daddr,
+ tmp);
+
if (err)
goto drop;
}
} else {
err = uncompress_addr(skb, &hdr.daddr, tmp, daddr,
- daddr_type, daddr_len);
+ daddr_type, daddr_len);
pr_debug("dest: stateless compression mode %d dest %pI6c\n",
- tmp, &hdr.daddr);
+ tmp, &hdr.daddr);
if (err)
goto drop;
}
@@ -468,11 +461,11 @@ int lowpan_process_data(struct sk_buff *skb, struct net_device *dev,
if (iphc0 & LOWPAN_IPHC_NH_C) {
struct udphdr uh;
struct sk_buff *new;
+
if (uncompress_udp_header(skb, &uh))
goto drop;
- /*
- * replace the compressed UDP head by the uncompressed UDP
+ /* replace the compressed UDP head by the uncompressed UDP
* header
*/
new = skb_copy_expand(skb, sizeof(struct udphdr),
@@ -489,7 +482,7 @@ int lowpan_process_data(struct sk_buff *skb, struct net_device *dev,
skb_copy_to_linear_data(skb, &uh, sizeof(struct udphdr));
raw_dump_table(__func__, "raw UDP header dump",
- (u8 *)&uh, sizeof(uh));
+ (u8 *)&uh, sizeof(uh));
hdr.nexthdr = UIP_PROTO_UDP;
}
@@ -504,8 +497,7 @@ int lowpan_process_data(struct sk_buff *skb, struct net_device *dev,
hdr.version, ntohs(hdr.payload_len), hdr.nexthdr,
hdr.hop_limit, &hdr.daddr);
- raw_dump_table(__func__, "raw header dump", (u8 *)&hdr,
- sizeof(hdr));
+ raw_dump_table(__func__, "raw header dump", (u8 *)&hdr, sizeof(hdr));
return skb_deliver(skb, &hdr, dev, deliver_skb);
@@ -515,9 +507,9 @@ drop:
}
EXPORT_SYMBOL_GPL(lowpan_process_data);
-static u8 lowpan_compress_addr_64(u8 **hc06_ptr, u8 shift,
- const struct in6_addr *ipaddr,
- const unsigned char *lladdr)
+static u8 lowpan_compress_addr_64(u8 **hc_ptr, u8 shift,
+ const struct in6_addr *ipaddr,
+ const unsigned char *lladdr)
{
u8 val = 0;
@@ -526,24 +518,22 @@ static u8 lowpan_compress_addr_64(u8 **hc06_ptr, u8 shift,
pr_debug("address compression 0 bits\n");
} else if (lowpan_is_iid_16_bit_compressable(ipaddr)) {
/* compress IID to 16 bits xxxx::XXXX */
- memcpy(*hc06_ptr, &ipaddr->s6_addr16[7], 2);
- *hc06_ptr += 2;
+ lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr16[7], 2);
val = 2; /* 16-bits */
raw_dump_inline(NULL, "Compressed ipv6 addr is (16 bits)",
- *hc06_ptr - 2, 2);
+ *hc_ptr - 2, 2);
} else {
/* do not compress IID => xxxx::IID */
- memcpy(*hc06_ptr, &ipaddr->s6_addr16[4], 8);
- *hc06_ptr += 8;
+ lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr16[4], 8);
val = 1; /* 64-bits */
raw_dump_inline(NULL, "Compressed ipv6 addr is (64 bits)",
- *hc06_ptr - 8, 8);
+ *hc_ptr - 8, 8);
}
return rol8(val, shift);
}
-static void compress_udp_header(u8 **hc06_ptr, struct sk_buff *skb)
+static void compress_udp_header(u8 **hc_ptr, struct sk_buff *skb)
{
struct udphdr *uh = udp_hdr(skb);
u8 tmp;
@@ -555,75 +545,75 @@ static void compress_udp_header(u8 **hc06_ptr, struct sk_buff *skb)
pr_debug("UDP header: both ports compression to 4 bits\n");
/* compression value */
tmp = LOWPAN_NHC_UDP_CS_P_11;
- lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp));
+ lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp));
/* source and destination port */
tmp = ntohs(uh->dest) - LOWPAN_NHC_UDP_4BIT_PORT +
((ntohs(uh->source) - LOWPAN_NHC_UDP_4BIT_PORT) << 4);
- lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp));
+ lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp));
} else if ((ntohs(uh->dest) & LOWPAN_NHC_UDP_8BIT_MASK) ==
LOWPAN_NHC_UDP_8BIT_PORT) {
pr_debug("UDP header: remove 8 bits of dest\n");
/* compression value */
tmp = LOWPAN_NHC_UDP_CS_P_01;
- lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp));
+ lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp));
/* source port */
- lowpan_push_hc_data(hc06_ptr, &uh->source, sizeof(uh->source));
+ lowpan_push_hc_data(hc_ptr, &uh->source, sizeof(uh->source));
/* destination port */
tmp = ntohs(uh->dest) - LOWPAN_NHC_UDP_8BIT_PORT;
- lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp));
+ lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp));
} else if ((ntohs(uh->source) & LOWPAN_NHC_UDP_8BIT_MASK) ==
LOWPAN_NHC_UDP_8BIT_PORT) {
pr_debug("UDP header: remove 8 bits of source\n");
/* compression value */
tmp = LOWPAN_NHC_UDP_CS_P_10;
- lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp));
+ lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp));
/* source port */
tmp = ntohs(uh->source) - LOWPAN_NHC_UDP_8BIT_PORT;
- lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp));
+ lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp));
/* destination port */
- lowpan_push_hc_data(hc06_ptr, &uh->dest, sizeof(uh->dest));
+ lowpan_push_hc_data(hc_ptr, &uh->dest, sizeof(uh->dest));
} else {
pr_debug("UDP header: can't compress\n");
/* compression value */
tmp = LOWPAN_NHC_UDP_CS_P_00;
- lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp));
+ lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp));
/* source port */
- lowpan_push_hc_data(hc06_ptr, &uh->source, sizeof(uh->source));
+ lowpan_push_hc_data(hc_ptr, &uh->source, sizeof(uh->source));
/* destination port */
- lowpan_push_hc_data(hc06_ptr, &uh->dest, sizeof(uh->dest));
+ lowpan_push_hc_data(hc_ptr, &uh->dest, sizeof(uh->dest));
}
/* checksum is always inline */
- lowpan_push_hc_data(hc06_ptr, &uh->check, sizeof(uh->check));
+ lowpan_push_hc_data(hc_ptr, &uh->check, sizeof(uh->check));
/* skip the UDP header */
skb_pull(skb, sizeof(struct udphdr));
}
int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
- unsigned short type, const void *_daddr,
- const void *_saddr, unsigned int len)
+ unsigned short type, const void *_daddr,
+ const void *_saddr, unsigned int len)
{
- u8 tmp, iphc0, iphc1, *hc06_ptr;
+ u8 tmp, iphc0, iphc1, *hc_ptr;
struct ipv6hdr *hdr;
u8 head[100] = {};
+ int addr_type;
if (type != ETH_P_IPV6)
return -EINVAL;
hdr = ipv6_hdr(skb);
- hc06_ptr = head + 2;
+ hc_ptr = head + 2;
pr_debug("IPv6 header dump:\n\tversion = %d\n\tlength = %d\n"
"\tnexthdr = 0x%02x\n\thop_lim = %d\n\tdest = %pI6c\n",
- hdr->version, ntohs(hdr->payload_len), hdr->nexthdr,
- hdr->hop_limit, &hdr->daddr);
+ hdr->version, ntohs(hdr->payload_len), hdr->nexthdr,
+ hdr->hop_limit, &hdr->daddr);
raw_dump_table(__func__, "raw skb network header dump",
- skb_network_header(skb), sizeof(struct ipv6hdr));
+ skb_network_header(skb), sizeof(struct ipv6hdr));
- /*
- * As we copy some bit-length fields, in the IPHC encoding bytes,
+ /* As we copy some bit-length fields, in the IPHC encoding bytes,
* we sometimes use |=
* If the field is 0, and the current bit value in memory is 1,
* this does not work. We therefore reset the IPHC encoding here
@@ -638,49 +628,47 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
raw_dump_inline(__func__, "daddr",
(unsigned char *)_daddr, IEEE802154_ADDR_LEN);
- raw_dump_table(__func__,
- "sending raw skb network uncompressed packet",
- skb->data, skb->len);
+ raw_dump_table(__func__, "sending raw skb network uncompressed packet",
+ skb->data, skb->len);
- /*
- * Traffic class, flow label
+ /* Traffic class, flow label
* If flow label is 0, compress it. If traffic class is 0, compress it
* We have to process both in the same time as the offset of traffic
* class depends on the presence of version and flow label
*/
- /* hc06 format of TC is ECN | DSCP , original one is DSCP | ECN */
+ /* hc format of TC is ECN | DSCP , original one is DSCP | ECN */
tmp = (hdr->priority << 4) | (hdr->flow_lbl[0] >> 4);
tmp = ((tmp & 0x03) << 6) | (tmp >> 2);
if (((hdr->flow_lbl[0] & 0x0F) == 0) &&
- (hdr->flow_lbl[1] == 0) && (hdr->flow_lbl[2] == 0)) {
+ (hdr->flow_lbl[1] == 0) && (hdr->flow_lbl[2] == 0)) {
/* flow label can be compressed */
iphc0 |= LOWPAN_IPHC_FL_C;
if ((hdr->priority == 0) &&
- ((hdr->flow_lbl[0] & 0xF0) == 0)) {
+ ((hdr->flow_lbl[0] & 0xF0) == 0)) {
/* compress (elide) all */
iphc0 |= LOWPAN_IPHC_TC_C;
} else {
/* compress only the flow label */
- *hc06_ptr = tmp;
- hc06_ptr += 1;
+ *hc_ptr = tmp;
+ hc_ptr += 1;
}
} else {
/* Flow label cannot be compressed */
if ((hdr->priority == 0) &&
- ((hdr->flow_lbl[0] & 0xF0) == 0)) {
+ ((hdr->flow_lbl[0] & 0xF0) == 0)) {
/* compress only traffic class */
iphc0 |= LOWPAN_IPHC_TC_C;
- *hc06_ptr = (tmp & 0xc0) | (hdr->flow_lbl[0] & 0x0F);
- memcpy(hc06_ptr + 1, &hdr->flow_lbl[1], 2);
- hc06_ptr += 3;
+ *hc_ptr = (tmp & 0xc0) | (hdr->flow_lbl[0] & 0x0F);
+ memcpy(hc_ptr + 1, &hdr->flow_lbl[1], 2);
+ hc_ptr += 3;
} else {
/* compress nothing */
- memcpy(hc06_ptr, hdr, 4);
+ memcpy(hc_ptr, hdr, 4);
/* replace the top byte with new ECN | DSCP format */
- *hc06_ptr = tmp;
- hc06_ptr += 4;
+ *hc_ptr = tmp;
+ hc_ptr += 4;
}
}
@@ -690,13 +678,11 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
if (hdr->nexthdr == UIP_PROTO_UDP)
iphc0 |= LOWPAN_IPHC_NH_C;
- if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) {
- *hc06_ptr = hdr->nexthdr;
- hc06_ptr += 1;
- }
+ if ((iphc0 & LOWPAN_IPHC_NH_C) == 0)
+ lowpan_push_hc_data(&hc_ptr, &hdr->nexthdr,
+ sizeof(hdr->nexthdr));
- /*
- * Hop limit
+ /* Hop limit
* if 1: compress, encoding is 01
* if 64: compress, encoding is 10
* if 255: compress, encoding is 11
@@ -713,87 +699,89 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
iphc0 |= LOWPAN_IPHC_TTL_255;
break;
default:
- *hc06_ptr = hdr->hop_limit;
- hc06_ptr += 1;
- break;
+ lowpan_push_hc_data(&hc_ptr, &hdr->hop_limit,
+ sizeof(hdr->hop_limit));
}
+ addr_type = ipv6_addr_type(&hdr->saddr);
/* source address compression */
- if (is_addr_unspecified(&hdr->saddr)) {
+ if (addr_type == IPV6_ADDR_ANY) {
pr_debug("source address is unspecified, setting SAC\n");
iphc1 |= LOWPAN_IPHC_SAC;
- /* TODO: context lookup */
- } else if (is_addr_link_local(&hdr->saddr)) {
- iphc1 |= lowpan_compress_addr_64(&hc06_ptr,
- LOWPAN_IPHC_SAM_BIT, &hdr->saddr, _saddr);
- pr_debug("source address unicast link-local %pI6c "
- "iphc1 0x%02x\n", &hdr->saddr, iphc1);
} else {
- pr_debug("send the full source address\n");
- memcpy(hc06_ptr, &hdr->saddr.s6_addr16[0], 16);
- hc06_ptr += 16;
+ if (addr_type & IPV6_ADDR_LINKLOCAL) {
+ iphc1 |= lowpan_compress_addr_64(&hc_ptr,
+ LOWPAN_IPHC_SAM_BIT,
+ &hdr->saddr, _saddr);
+ pr_debug("source address unicast link-local %pI6c iphc1 0x%02x\n",
+ &hdr->saddr, iphc1);
+ } else {
+ pr_debug("send the full source address\n");
+ lowpan_push_hc_data(&hc_ptr, hdr->saddr.s6_addr, 16);
+ }
}
+ addr_type = ipv6_addr_type(&hdr->daddr);
/* destination address compression */
- if (is_addr_mcast(&hdr->daddr)) {
+ if (addr_type & IPV6_ADDR_MULTICAST) {
pr_debug("destination address is multicast: ");
iphc1 |= LOWPAN_IPHC_M;
if (lowpan_is_mcast_addr_compressable8(&hdr->daddr)) {
pr_debug("compressed to 1 octet\n");
iphc1 |= LOWPAN_IPHC_DAM_11;
/* use last byte */
- *hc06_ptr = hdr->daddr.s6_addr[15];
- hc06_ptr += 1;
+ lowpan_push_hc_data(&hc_ptr,
+ &hdr->daddr.s6_addr[15], 1);
} else if (lowpan_is_mcast_addr_compressable32(&hdr->daddr)) {
pr_debug("compressed to 4 octets\n");
iphc1 |= LOWPAN_IPHC_DAM_10;
/* second byte + the last three */
- *hc06_ptr = hdr->daddr.s6_addr[1];
- memcpy(hc06_ptr + 1, &hdr->daddr.s6_addr[13], 3);
- hc06_ptr += 4;
+ lowpan_push_hc_data(&hc_ptr,
+ &hdr->daddr.s6_addr[1], 1);
+ lowpan_push_hc_data(&hc_ptr,
+ &hdr->daddr.s6_addr[13], 3);
} else if (lowpan_is_mcast_addr_compressable48(&hdr->daddr)) {
pr_debug("compressed to 6 octets\n");
iphc1 |= LOWPAN_IPHC_DAM_01;
/* second byte + the last five */
- *hc06_ptr = hdr->daddr.s6_addr[1];
- memcpy(hc06_ptr + 1, &hdr->daddr.s6_addr[11], 5);
- hc06_ptr += 6;
+ lowpan_push_hc_data(&hc_ptr,
+ &hdr->daddr.s6_addr[1], 1);
+ lowpan_push_hc_data(&hc_ptr,
+ &hdr->daddr.s6_addr[11], 5);
} else {
pr_debug("using full address\n");
iphc1 |= LOWPAN_IPHC_DAM_00;
- memcpy(hc06_ptr, &hdr->daddr.s6_addr[0], 16);
- hc06_ptr += 16;
+ lowpan_push_hc_data(&hc_ptr, hdr->daddr.s6_addr, 16);
}
} else {
- /* TODO: context lookup */
- if (is_addr_link_local(&hdr->daddr)) {
- iphc1 |= lowpan_compress_addr_64(&hc06_ptr,
+ if (addr_type & IPV6_ADDR_LINKLOCAL) {
+ /* TODO: context lookup */
+ iphc1 |= lowpan_compress_addr_64(&hc_ptr,
LOWPAN_IPHC_DAM_BIT, &hdr->daddr, _daddr);
pr_debug("dest address unicast link-local %pI6c "
- "iphc1 0x%02x\n", &hdr->daddr, iphc1);
+ "iphc1 0x%02x\n", &hdr->daddr, iphc1);
} else {
pr_debug("dest address unicast %pI6c\n", &hdr->daddr);
- memcpy(hc06_ptr, &hdr->daddr.s6_addr16[0], 16);
- hc06_ptr += 16;
+ lowpan_push_hc_data(&hc_ptr, hdr->daddr.s6_addr, 16);
}
}
/* UDP header compression */
if (hdr->nexthdr == UIP_PROTO_UDP)
- compress_udp_header(&hc06_ptr, skb);
+ compress_udp_header(&hc_ptr, skb);
head[0] = iphc0;
head[1] = iphc1;
skb_pull(skb, sizeof(struct ipv6hdr));
skb_reset_transport_header(skb);
- memcpy(skb_push(skb, hc06_ptr - head), head, hc06_ptr - head);
+ memcpy(skb_push(skb, hc_ptr - head), head, hc_ptr - head);
skb_reset_network_header(skb);
- pr_debug("header len %d skb %u\n", (int)(hc06_ptr - head), skb->len);
+ pr_debug("header len %d skb %u\n", (int)(hc_ptr - head), skb->len);
raw_dump_table(__func__, "raw skb data dump compressed",
- skb->data, skb->len);
+ skb->data, skb->len);
return 0;
}
EXPORT_SYMBOL_GPL(lowpan_header_compress);
diff --git a/net/802/fc.c b/net/802/fc.c
index 05eea6b98bb8..7c174b6750cd 100644
--- a/net/802/fc.c
+++ b/net/802/fc.c
@@ -126,6 +126,6 @@ static void fc_setup(struct net_device *dev)
*/
struct net_device *alloc_fcdev(int sizeof_priv)
{
- return alloc_netdev(sizeof_priv, "fc%d", fc_setup);
+ return alloc_netdev(sizeof_priv, "fc%d", NET_NAME_UNKNOWN, fc_setup);
}
EXPORT_SYMBOL(alloc_fcdev);
diff --git a/net/802/fddi.c b/net/802/fddi.c
index 9cda40661e0d..59e7346f1193 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -207,7 +207,8 @@ static void fddi_setup(struct net_device *dev)
*/
struct net_device *alloc_fddidev(int sizeof_priv)
{
- return alloc_netdev(sizeof_priv, "fddi%d", fddi_setup);
+ return alloc_netdev(sizeof_priv, "fddi%d", NET_NAME_UNKNOWN,
+ fddi_setup);
}
EXPORT_SYMBOL(alloc_fddidev);
diff --git a/net/802/hippi.c b/net/802/hippi.c
index 5ff2a718ddca..2e03f8259dd5 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -228,7 +228,8 @@ static void hippi_setup(struct net_device *dev)
struct net_device *alloc_hippi_dev(int sizeof_priv)
{
- return alloc_netdev(sizeof_priv, "hip%d", hippi_setup);
+ return alloc_netdev(sizeof_priv, "hip%d", NET_NAME_UNKNOWN,
+ hippi_setup);
}
EXPORT_SYMBOL(alloc_hippi_dev);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 44ebd5c2cd4a..64c6bed4a3d3 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -250,7 +250,8 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
snprintf(name, IFNAMSIZ, "vlan%.4i", vlan_id);
}
- new_dev = alloc_netdev(sizeof(struct vlan_dev_priv), name, vlan_setup);
+ new_dev = alloc_netdev(sizeof(struct vlan_dev_priv), name,
+ NET_NAME_UNKNOWN, vlan_setup);
if (new_dev == NULL)
return -ENOBUFS;
@@ -324,23 +325,24 @@ static void vlan_transfer_features(struct net_device *dev,
netdev_update_features(vlandev);
}
-static void __vlan_device_event(struct net_device *dev, unsigned long event)
+static int __vlan_device_event(struct net_device *dev, unsigned long event)
{
+ int err = 0;
+
switch (event) {
case NETDEV_CHANGENAME:
vlan_proc_rem_dev(dev);
- if (vlan_proc_add_dev(dev) < 0)
- pr_warn("failed to change proc name for %s\n",
- dev->name);
+ err = vlan_proc_add_dev(dev);
break;
case NETDEV_REGISTER:
- if (vlan_proc_add_dev(dev) < 0)
- pr_warn("failed to add proc entry for %s\n", dev->name);
+ err = vlan_proc_add_dev(dev);
break;
case NETDEV_UNREGISTER:
vlan_proc_rem_dev(dev);
break;
}
+
+ return err;
}
static int vlan_device_event(struct notifier_block *unused, unsigned long event,
@@ -355,8 +357,12 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
bool last = false;
LIST_HEAD(list);
- if (is_vlan_dev(dev))
- __vlan_device_event(dev, event);
+ if (is_vlan_dev(dev)) {
+ int err = __vlan_device_event(dev, event);
+
+ if (err)
+ return notifier_from_errno(err);
+ }
if ((event == NETDEV_UP) &&
(dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) {
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 75d427763992..90cc2bdd4064 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -112,59 +112,6 @@ __be16 vlan_dev_vlan_proto(const struct net_device *dev)
}
EXPORT_SYMBOL(vlan_dev_vlan_proto);
-static struct sk_buff *vlan_reorder_header(struct sk_buff *skb)
-{
- if (skb_cow(skb, skb_headroom(skb)) < 0) {
- kfree_skb(skb);
- return NULL;
- }
-
- memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
- skb->mac_header += VLAN_HLEN;
- return skb;
-}
-
-struct sk_buff *vlan_untag(struct sk_buff *skb)
-{
- struct vlan_hdr *vhdr;
- u16 vlan_tci;
-
- if (unlikely(vlan_tx_tag_present(skb))) {
- /* vlan_tci is already set-up so leave this for another time */
- return skb;
- }
-
- skb = skb_share_check(skb, GFP_ATOMIC);
- if (unlikely(!skb))
- goto err_free;
-
- if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
- goto err_free;
-
- vhdr = (struct vlan_hdr *) skb->data;
- vlan_tci = ntohs(vhdr->h_vlan_TCI);
- __vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci);
-
- skb_pull_rcsum(skb, VLAN_HLEN);
- vlan_set_encap_proto(skb, vhdr);
-
- skb = vlan_reorder_header(skb);
- if (unlikely(!skb))
- goto err_free;
-
- skb_reset_network_header(skb);
- skb_reset_transport_header(skb);
- skb_reset_mac_len(skb);
-
- return skb;
-
-err_free:
- kfree_skb(skb);
- return NULL;
-}
-EXPORT_SYMBOL(vlan_untag);
-
-
/*
* vlan info and vid list
*/
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index dd11f612e03e..0d441ec8763e 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -385,6 +385,8 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCGMIIPHY:
case SIOCGMIIREG:
case SIOCSMIIREG:
+ case SIOCSHWTSTAMP:
+ case SIOCGHWTSTAMP:
if (netif_device_present(real_dev) && ops->ndo_do_ioctl)
err = ops->ndo_do_ioctl(real_dev, &ifrr, cmd);
break;
@@ -797,7 +799,8 @@ void vlan_setup(struct net_device *dev)
ether_setup(dev);
dev->priv_flags |= IFF_802_1Q_VLAN;
- dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+ netif_keep_dst(dev);
dev->tx_queue_len = 0;
dev->netdev_ops = &vlan_netdev_ops;
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 1d0e89213a28..ae63cf72a953 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -171,6 +171,8 @@ int vlan_proc_add_dev(struct net_device *vlandev)
struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev);
struct vlan_net *vn = net_generic(dev_net(vlandev), vlan_net_id);
+ if (!strcmp(vlandev->name, name_conf))
+ return -EINVAL;
vlan->dent =
proc_create_data(vlandev->name, S_IFREG|S_IRUSR|S_IWUSR,
vn->proc_vlan_dir, &vlandev_fops, vlandev);
diff --git a/net/9p/client.c b/net/9p/client.c
index 0004cbaac4a4..e86a9bea1d16 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -959,7 +959,6 @@ static int p9_client_version(struct p9_client *c)
break;
default:
return -EINVAL;
- break;
}
if (IS_ERR(req))
diff --git a/net/Kconfig b/net/Kconfig
index d92afe4204d9..6272420a721b 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -6,6 +6,7 @@ menuconfig NET
bool "Networking support"
select NLATTR
select GENERIC_NET_UTILS
+ select ANON_INODES
---help---
Unless you really know what you are doing, you should say Y here.
The reason is that some programs need kernel networking support even
@@ -176,10 +177,11 @@ config NETFILTER_ADVANCED
If unsure, say Y.
config BRIDGE_NETFILTER
- bool "Bridged IP/ARP packets filtering"
- depends on BRIDGE && NETFILTER && INET
+ tristate "Bridged IP/ARP packets filtering"
+ depends on BRIDGE
+ depends on NETFILTER && INET
depends on NETFILTER_ADVANCED
- default y
+ default m
---help---
Enabling this option will let arptables resp. iptables see bridged
ARP resp. IP traffic. If you want a bridging firewall, you probably
@@ -214,6 +216,7 @@ source "drivers/net/appletalk/Kconfig"
source "net/x25/Kconfig"
source "net/lapb/Kconfig"
source "net/phonet/Kconfig"
+source "net/6lowpan/Kconfig"
source "net/ieee802154/Kconfig"
source "net/mac802154/Kconfig"
source "net/sched/Kconfig"
diff --git a/net/Makefile b/net/Makefile
index cbbbe6d657ca..7ed1970074b0 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -57,7 +57,8 @@ obj-$(CONFIG_CAIF) += caif/
ifneq ($(CONFIG_DCB),)
obj-y += dcb/
endif
-obj-y += ieee802154/
+obj-$(CONFIG_6LOWPAN) += 6lowpan/
+obj-$(CONFIG_IEEE802154) += ieee802154/
obj-$(CONFIG_MAC802154) += mac802154/
ifeq ($(CONFIG_NET),y)
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index bfcf6be1d665..c00897f65a31 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1805,7 +1805,7 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
long amount = 0;
if (skb)
- amount = skb->len - sizeof(struct ddpehdr);
+ amount = skb->len - sizeof(struct ddpehdr);
rc = put_user(amount, (int __user *)argp);
break;
}
diff --git a/net/appletalk/dev.c b/net/appletalk/dev.c
index 6c8016f61866..e4158b8b926d 100644
--- a/net/appletalk/dev.c
+++ b/net/appletalk/dev.c
@@ -39,6 +39,7 @@ static void ltalk_setup(struct net_device *dev)
struct net_device *alloc_ltalkdev(int sizeof_priv)
{
- return alloc_netdev(sizeof_priv, "lt%d", ltalk_setup);
+ return alloc_netdev(sizeof_priv, "lt%d", NET_NAME_UNKNOWN,
+ ltalk_setup);
}
EXPORT_SYMBOL(alloc_ltalkdev);
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 403e71fa88fe..cc78538d163b 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -682,8 +682,8 @@ static int br2684_create(void __user *arg)
netdev = alloc_netdev(sizeof(struct br2684_dev),
ni.ifname[0] ? ni.ifname : "nas%d",
- (payload == p_routed) ?
- br2684_setup_routed : br2684_setup);
+ NET_NAME_UNKNOWN,
+ (payload == p_routed) ? br2684_setup_routed : br2684_setup);
if (!netdev)
return -ENOMEM;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index ba291ce4bdff..17e55dfecbe2 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -384,7 +384,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb,
pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, vcc, vcc->dev);
old = xchg(&entry->vccs->xoff, 1); /* assume XOFF ... */
if (old) {
- pr_warning("XOFF->XOFF transition\n");
+ pr_warn("XOFF->XOFF transition\n");
goto out_release_neigh;
}
dev->stats.tx_packets++;
@@ -447,7 +447,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
struct rtable *rt;
if (vcc->push != clip_push) {
- pr_warning("non-CLIP VCC\n");
+ pr_warn("non-CLIP VCC\n");
return -EBADF;
}
clip_vcc = CLIP_VCC(vcc);
@@ -501,7 +501,7 @@ static void clip_setup(struct net_device *dev)
/* without any more elaborate queuing. 100 is a reasonable */
/* compromise between decent burst-tolerance and protection */
/* against memory hogs. */
- dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+ netif_keep_dst(dev);
}
static int clip_create(int number)
@@ -520,7 +520,8 @@ static int clip_create(int number)
if (PRIV(dev)->number >= number)
number = PRIV(dev)->number + 1;
}
- dev = alloc_netdev(sizeof(struct clip_priv), "", clip_setup);
+ dev = alloc_netdev(sizeof(struct clip_priv), "", NET_NAME_UNKNOWN,
+ clip_setup);
if (!dev)
return -ENOMEM;
clip_priv = PRIV(dev);
diff --git a/net/atm/common.c b/net/atm/common.c
index 7b491006eaf4..6a765156a3f6 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -300,7 +300,7 @@ static int adjust_tp(struct atm_trafprm *tp, unsigned char aal)
max_sdu = ATM_MAX_AAL34_PDU;
break;
default:
- pr_warning("AAL problems ... (%d)\n", aal);
+ pr_warn("AAL problems ... (%d)\n", aal);
/* fall through */
case ATM_AAL5:
max_sdu = ATM_MAX_AAL5_PDU;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 4c5b8ba0f84f..4b98f897044a 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -410,9 +410,11 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
priv->lane2_ops = NULL;
if (priv->lane_version > 1)
priv->lane2_ops = &lane2_ops;
+ rtnl_lock();
if (dev_set_mtu(dev, mesg->content.config.mtu))
pr_info("%s: change_mtu to %d failed\n",
dev->name, mesg->content.config.mtu);
+ rtnl_unlock();
priv->is_proxy = mesg->content.config.is_proxy;
break;
case l_flush_tran_id:
@@ -833,7 +835,6 @@ static void *lec_tbl_walk(struct lec_state *state, struct hlist_head *tbl,
loff_t *l)
{
struct hlist_node *e = state->node;
- struct lec_arp_table *tmp;
if (!e)
e = tbl->first;
@@ -842,9 +843,7 @@ static void *lec_tbl_walk(struct lec_state *state, struct hlist_head *tbl,
--*l;
}
- tmp = container_of(e, struct lec_arp_table, next);
-
- hlist_for_each_entry_from(tmp, next) {
+ for (; e; e = e->next) {
if (--*l < 0)
break;
}
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index e8e0e7a8a23d..0e982222d425 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -599,7 +599,7 @@ static netdev_tx_t mpc_send_packet(struct sk_buff *skb,
}
non_ip:
- return mpc->old_ops->ndo_start_xmit(skb, dev);
+ return __netdev_start_xmit(mpc->old_ops, skb, dev, false);
}
static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg)
diff --git a/net/atm/svc.c b/net/atm/svc.c
index d8e5d0c2ebbc..1ba23f5018e7 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -50,12 +50,12 @@ static void svc_disconnect(struct atm_vcc *vcc)
pr_debug("%p\n", vcc);
if (test_bit(ATM_VF_REGIS, &vcc->flags)) {
- prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
sigd_enq(vcc, as_close, NULL, NULL, NULL);
- while (!test_bit(ATM_VF_RELEASED, &vcc->flags) && sigd) {
+ for (;;) {
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
+ if (test_bit(ATM_VF_RELEASED, &vcc->flags) || !sigd)
+ break;
schedule();
- prepare_to_wait(sk_sleep(sk), &wait,
- TASK_UNINTERRUPTIBLE);
}
finish_wait(sk_sleep(sk), &wait);
}
@@ -126,11 +126,12 @@ static int svc_bind(struct socket *sock, struct sockaddr *sockaddr,
}
vcc->local = *addr;
set_bit(ATM_VF_WAITING, &vcc->flags);
- prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
sigd_enq(vcc, as_bind, NULL, NULL, &vcc->local);
- while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
- schedule();
+ for (;;) {
prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
+ if (!test_bit(ATM_VF_WAITING, &vcc->flags) || !sigd)
+ break;
+ schedule();
}
finish_wait(sk_sleep(sk), &wait);
clear_bit(ATM_VF_REGIS, &vcc->flags); /* doesn't count */
@@ -202,15 +203,14 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr,
}
vcc->remote = *addr;
set_bit(ATM_VF_WAITING, &vcc->flags);
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
sigd_enq(vcc, as_connect, NULL, NULL, &vcc->remote);
if (flags & O_NONBLOCK) {
- finish_wait(sk_sleep(sk), &wait);
sock->state = SS_CONNECTING;
error = -EINPROGRESS;
goto out;
}
error = 0;
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
schedule();
if (!signal_pending(current)) {
@@ -297,11 +297,12 @@ static int svc_listen(struct socket *sock, int backlog)
goto out;
}
set_bit(ATM_VF_WAITING, &vcc->flags);
- prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
sigd_enq(vcc, as_listen, NULL, NULL, &vcc->local);
- while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
- schedule();
+ for (;;) {
prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
+ if (!test_bit(ATM_VF_WAITING, &vcc->flags) || !sigd)
+ break;
+ schedule();
}
finish_wait(sk_sleep(sk), &wait);
if (!sigd) {
@@ -387,15 +388,15 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags)
}
/* wait should be short, so we ignore the non-blocking flag */
set_bit(ATM_VF_WAITING, &new_vcc->flags);
- prepare_to_wait(sk_sleep(sk_atm(new_vcc)), &wait,
- TASK_UNINTERRUPTIBLE);
sigd_enq(new_vcc, as_accept, old_vcc, NULL, NULL);
- while (test_bit(ATM_VF_WAITING, &new_vcc->flags) && sigd) {
+ for (;;) {
+ prepare_to_wait(sk_sleep(sk_atm(new_vcc)), &wait,
+ TASK_UNINTERRUPTIBLE);
+ if (!test_bit(ATM_VF_WAITING, &new_vcc->flags) || !sigd)
+ break;
release_sock(sk);
schedule();
lock_sock(sk);
- prepare_to_wait(sk_sleep(sk_atm(new_vcc)), &wait,
- TASK_UNINTERRUPTIBLE);
}
finish_wait(sk_sleep(sk_atm(new_vcc)), &wait);
if (!sigd) {
@@ -433,12 +434,14 @@ int svc_change_qos(struct atm_vcc *vcc, struct atm_qos *qos)
DEFINE_WAIT(wait);
set_bit(ATM_VF_WAITING, &vcc->flags);
- prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
sigd_enq2(vcc, as_modify, NULL, NULL, &vcc->local, qos, 0);
- while (test_bit(ATM_VF_WAITING, &vcc->flags) &&
- !test_bit(ATM_VF_RELEASED, &vcc->flags) && sigd) {
- schedule();
+ for (;;) {
prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
+ if (!test_bit(ATM_VF_WAITING, &vcc->flags) ||
+ test_bit(ATM_VF_RELEASED, &vcc->flags) || !sigd) {
+ break;
+ }
+ schedule();
}
finish_wait(sk_sleep(sk), &wait);
if (!sigd)
@@ -529,18 +532,18 @@ static int svc_addparty(struct socket *sock, struct sockaddr *sockaddr,
lock_sock(sk);
set_bit(ATM_VF_WAITING, &vcc->flags);
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
sigd_enq(vcc, as_addparty, NULL, NULL,
(struct sockaddr_atmsvc *) sockaddr);
if (flags & O_NONBLOCK) {
- finish_wait(sk_sleep(sk), &wait);
error = -EINPROGRESS;
goto out;
}
pr_debug("added wait queue\n");
- while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
- schedule();
+ for (;;) {
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ if (!test_bit(ATM_VF_WAITING, &vcc->flags) || !sigd)
+ break;
+ schedule();
}
finish_wait(sk_sleep(sk), &wait);
error = xchg(&sk->sk_err_soft, 0);
@@ -558,11 +561,12 @@ static int svc_dropparty(struct socket *sock, int ep_ref)
lock_sock(sk);
set_bit(ATM_VF_WAITING, &vcc->flags);
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
sigd_enq2(vcc, as_dropparty, NULL, NULL, NULL, NULL, ep_ref);
- while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
- schedule();
+ for (;;) {
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ if (!test_bit(ATM_VF_WAITING, &vcc->flags) || !sigd)
+ break;
+ schedule();
}
finish_wait(sk_sleep(sk), &wait);
if (!sigd) {
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index f04224c32005..1e8053976e83 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -108,14 +108,15 @@ static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node,
int max_if_num)
{
void *data_ptr;
- size_t data_size, old_size;
+ size_t old_size;
int ret = -ENOMEM;
spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
- data_size = max_if_num * sizeof(unsigned long) * BATADV_NUM_WORDS;
old_size = (max_if_num - 1) * sizeof(unsigned long) * BATADV_NUM_WORDS;
- data_ptr = kmalloc(data_size, GFP_ATOMIC);
+ data_ptr = kmalloc_array(max_if_num,
+ BATADV_NUM_WORDS * sizeof(unsigned long),
+ GFP_ATOMIC);
if (!data_ptr)
goto unlock;
@@ -123,7 +124,7 @@ static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node,
kfree(orig_node->bat_iv.bcast_own);
orig_node->bat_iv.bcast_own = data_ptr;
- data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC);
+ data_ptr = kmalloc_array(max_if_num, sizeof(uint8_t), GFP_ATOMIC);
if (!data_ptr) {
kfree(orig_node->bat_iv.bcast_own);
goto unlock;
@@ -164,7 +165,7 @@ static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node,
goto free_bcast_own;
chunk_size = sizeof(unsigned long) * BATADV_NUM_WORDS;
- data_ptr = kmalloc(max_if_num * chunk_size, GFP_ATOMIC);
+ data_ptr = kmalloc_array(max_if_num, chunk_size, GFP_ATOMIC);
if (!data_ptr)
goto unlock;
@@ -183,7 +184,7 @@ free_bcast_own:
if (max_if_num == 0)
goto free_own_sum;
- data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC);
+ data_ptr = kmalloc_array(max_if_num, sizeof(uint8_t), GFP_ATOMIC);
if (!data_ptr) {
kfree(orig_node->bat_iv.bcast_own);
goto unlock;
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index f2c066b21716..b5981113c9a7 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -537,7 +537,8 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
if (!bat_priv->orig_hash)
return NULL;
- res = kmalloc(BATADV_DAT_CANDIDATES_NUM * sizeof(*res), GFP_ATOMIC);
+ res = kmalloc_array(BATADV_DAT_CANDIDATES_NUM, sizeof(*res),
+ GFP_ATOMIC);
if (!res)
return NULL;
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index f14e54a05691..fc1835c6bb40 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -128,6 +128,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
{
struct batadv_frag_table_entry *chain;
struct batadv_frag_list_entry *frag_entry_new = NULL, *frag_entry_curr;
+ struct batadv_frag_list_entry *frag_entry_last = NULL;
struct batadv_frag_packet *frag_packet;
uint8_t bucket;
uint16_t seqno, hdr_size = sizeof(struct batadv_frag_packet);
@@ -180,11 +181,14 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
ret = true;
goto out;
}
+
+ /* store current entry because it could be the last in list */
+ frag_entry_last = frag_entry_curr;
}
- /* Reached the end of the list, so insert after 'frag_entry_curr'. */
- if (likely(frag_entry_curr)) {
- hlist_add_after(&frag_entry_curr->list, &frag_entry_new->list);
+ /* Reached the end of the list, so insert after 'frag_entry_last'. */
+ if (likely(frag_entry_last)) {
+ hlist_add_behind(&frag_entry_new->list, &frag_entry_last->list);
chain->size += skb->len - hdr_size;
chain->timestamp = jiffies;
ret = true;
diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
index 63bdf7e94f1e..7c1c63080e20 100644
--- a/net/batman-adv/hash.c
+++ b/net/batman-adv/hash.c
@@ -46,12 +46,12 @@ struct batadv_hashtable *batadv_hash_new(uint32_t size)
if (!hash)
return NULL;
- hash->table = kmalloc(sizeof(*hash->table) * size, GFP_ATOMIC);
+ hash->table = kmalloc_array(size, sizeof(*hash->table), GFP_ATOMIC);
if (!hash->table)
goto free_hash;
- hash->list_locks = kmalloc(sizeof(*hash->list_locks) * size,
- GFP_ATOMIC);
+ hash->list_locks = kmalloc_array(size, sizeof(*hash->list_locks),
+ GFP_ATOMIC);
if (!hash->list_locks)
goto free_table;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 118b990bae25..a1fcd884f0b1 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2014.3.0"
+#define BATADV_SOURCE_VERSION "2014.4.0"
#endif
/* B.A.T.M.A.N. parameters */
@@ -238,21 +238,29 @@ enum batadv_dbg_level {
int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
__printf(2, 3);
-#define batadv_dbg(type, bat_priv, fmt, arg...) \
+/* possibly ratelimited debug output */
+#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...) \
do { \
- if (atomic_read(&bat_priv->log_level) & type) \
+ if (atomic_read(&bat_priv->log_level) & type && \
+ (!ratelimited || net_ratelimit())) \
batadv_debug_log(bat_priv, fmt, ## arg);\
} \
while (0)
#else /* !CONFIG_BATMAN_ADV_DEBUG */
-__printf(3, 4)
-static inline void batadv_dbg(int type __always_unused,
- struct batadv_priv *bat_priv __always_unused,
- const char *fmt __always_unused, ...)
+__printf(4, 5)
+static inline void _batadv_dbg(int type __always_unused,
+ struct batadv_priv *bat_priv __always_unused,
+ int ratelimited __always_unused,
+ const char *fmt __always_unused, ...)
{
}
#endif
+#define batadv_dbg(type, bat_priv, arg...) \
+ _batadv_dbg(type, bat_priv, 0, ## arg)
+#define batadv_dbg_ratelimited(type, bat_priv, arg...) \
+ _batadv_dbg(type, bat_priv, 1, ## arg)
+
#define batadv_info(net_dev, fmt, arg...) \
do { \
struct net_device *_netdev = (net_dev); \
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 96b66fd30f96..ab6bb2af1d45 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -20,7 +20,6 @@
#include "originator.h"
#include "hard-interface.h"
#include "translation-table.h"
-#include "multicast.h"
/**
* batadv_mcast_mla_softif_get - get softif multicast listeners
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 35141534938e..35f76f2f7824 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -706,11 +706,11 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
if (batadv_tt_local_client_is_roaming(bat_priv, ethhdr->h_dest, vid)) {
if (batadv_reroute_unicast_packet(bat_priv, unicast_packet,
ethhdr->h_dest, vid))
- net_ratelimited_function(batadv_dbg, BATADV_DBG_TT,
- bat_priv,
- "Rerouting unicast packet to %pM (dst=%pM): Local Roaming\n",
- unicast_packet->dest,
- ethhdr->h_dest);
+ batadv_dbg_ratelimited(BATADV_DBG_TT,
+ bat_priv,
+ "Rerouting unicast packet to %pM (dst=%pM): Local Roaming\n",
+ unicast_packet->dest,
+ ethhdr->h_dest);
/* at this point the mesh destination should have been
* substituted with the originator address found in the global
* table. If not, let the packet go untouched anyway because
@@ -752,10 +752,10 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
*/
if (batadv_reroute_unicast_packet(bat_priv, unicast_packet,
ethhdr->h_dest, vid)) {
- net_ratelimited_function(batadv_dbg, BATADV_DBG_TT, bat_priv,
- "Rerouting unicast packet to %pM (dst=%pM): TTVN mismatch old_ttvn=%u new_ttvn=%u\n",
- unicast_packet->dest, ethhdr->h_dest,
- old_ttvn, curr_ttvn);
+ batadv_dbg_ratelimited(BATADV_DBG_TT, bat_priv,
+ "Rerouting unicast packet to %pM (dst=%pM): TTVN mismatch old_ttvn=%u new_ttvn=%u\n",
+ unicast_packet->dest, ethhdr->h_dest,
+ old_ttvn, curr_ttvn);
return 1;
}
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index cbd677f48c00..5467955eb27c 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -751,7 +751,7 @@ static int batadv_softif_init_late(struct net_device *dev)
atomic_set(&bat_priv->gw.bandwidth_down, 100);
atomic_set(&bat_priv->gw.bandwidth_up, 20);
atomic_set(&bat_priv->orig_interval, 1000);
- atomic_set(&bat_priv->hop_penalty, 15);
+ atomic_set(&bat_priv->hop_penalty, 30);
#ifdef CONFIG_BATMAN_ADV_DEBUG
atomic_set(&bat_priv->log_level, 0);
#endif
@@ -927,7 +927,7 @@ struct net_device *batadv_softif_create(const char *name)
int ret;
soft_iface = alloc_netdev(sizeof(struct batadv_priv), name,
- batadv_softif_init_early);
+ NET_NAME_UNKNOWN, batadv_softif_init_early);
if (!soft_iface)
return NULL;
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index fc47baa888c5..f40cb0436eba 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -900,32 +900,24 @@ int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type,
bat_kobj = &bat_priv->soft_iface->dev.kobj;
- uevent_env[0] = kmalloc(strlen(BATADV_UEV_TYPE_VAR) +
- strlen(batadv_uev_type_str[type]) + 1,
- GFP_ATOMIC);
+ uevent_env[0] = kasprintf(GFP_ATOMIC,
+ "%s%s", BATADV_UEV_TYPE_VAR,
+ batadv_uev_type_str[type]);
if (!uevent_env[0])
goto out;
- sprintf(uevent_env[0], "%s%s", BATADV_UEV_TYPE_VAR,
- batadv_uev_type_str[type]);
-
- uevent_env[1] = kmalloc(strlen(BATADV_UEV_ACTION_VAR) +
- strlen(batadv_uev_action_str[action]) + 1,
- GFP_ATOMIC);
+ uevent_env[1] = kasprintf(GFP_ATOMIC,
+ "%s%s", BATADV_UEV_ACTION_VAR,
+ batadv_uev_action_str[action]);
if (!uevent_env[1])
goto out;
- sprintf(uevent_env[1], "%s%s", BATADV_UEV_ACTION_VAR,
- batadv_uev_action_str[action]);
-
/* If the event is DEL, ignore the data field */
if (action != BATADV_UEV_DEL) {
- uevent_env[2] = kmalloc(strlen(BATADV_UEV_DATA_VAR) +
- strlen(data) + 1, GFP_ATOMIC);
+ uevent_env[2] = kasprintf(GFP_ATOMIC,
+ "%s%s", BATADV_UEV_DATA_VAR, data);
if (!uevent_env[2])
goto out;
-
- sprintf(uevent_env[2], "%s%s", BATADV_UEV_DATA_VAR, data);
}
ret = kobject_uevent_env(bat_kobj, KOBJ_CHANGE, uevent_env);
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 8796ffa08b43..c2e0d14433df 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2013 Intel Corp.
+ Copyright (c) 2013-2014 Intel Corp.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License version 2 and
@@ -14,6 +14,8 @@
#include <linux/if_arp.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
@@ -25,16 +27,21 @@
#include <net/bluetooth/hci_core.h>
#include <net/bluetooth/l2cap.h>
-#include "6lowpan.h"
-
#include <net/6lowpan.h> /* for the compression support */
+#define VERSION "0.1"
+
+static struct dentry *lowpan_psm_debugfs;
+static struct dentry *lowpan_control_debugfs;
+
#define IFACE_NAME_TEMPLATE "bt%d"
#define EUI64_ADDR_LEN 8
struct skb_cb {
struct in6_addr addr;
- struct l2cap_conn *conn;
+ struct in6_addr gw;
+ struct l2cap_chan *chan;
+ int status;
};
#define lowpan_cb(skb) ((struct skb_cb *)((skb)->cb))
@@ -48,9 +55,19 @@ struct skb_cb {
static LIST_HEAD(bt_6lowpan_devices);
static DEFINE_RWLOCK(devices_lock);
+/* If psm is set to 0 (default value), then 6lowpan is disabled.
+ * Other values are used to indicate a Protocol Service Multiplexer
+ * value for 6lowpan.
+ */
+static u16 psm_6lowpan;
+
+/* We are listening incoming connections via this channel
+ */
+static struct l2cap_chan *listen_chan;
+
struct lowpan_peer {
struct list_head list;
- struct l2cap_conn *conn;
+ struct l2cap_chan *chan;
/* peer addresses in various formats */
unsigned char eui64_addr[EUI64_ADDR_LEN];
@@ -84,6 +101,8 @@ static inline bool peer_del(struct lowpan_dev *dev, struct lowpan_peer *peer)
{
list_del(&peer->list);
+ module_put(THIS_MODULE);
+
if (atomic_dec_and_test(&dev->peer_count)) {
BT_DBG("last peer");
return true;
@@ -101,13 +120,26 @@ static inline struct lowpan_peer *peer_lookup_ba(struct lowpan_dev *dev,
ba, type);
list_for_each_entry_safe(peer, tmp, &dev->peers, list) {
- BT_DBG("addr %pMR type %d",
- &peer->conn->hcon->dst, peer->conn->hcon->dst_type);
+ BT_DBG("dst addr %pMR dst type %d",
+ &peer->chan->dst, peer->chan->dst_type);
- if (bacmp(&peer->conn->hcon->dst, ba))
+ if (bacmp(&peer->chan->dst, ba))
continue;
- if (type == peer->conn->hcon->dst_type)
+ if (type == peer->chan->dst_type)
+ return peer;
+ }
+
+ return NULL;
+}
+
+static inline struct lowpan_peer *peer_lookup_chan(struct lowpan_dev *dev,
+ struct l2cap_chan *chan)
+{
+ struct lowpan_peer *peer, *tmp;
+
+ list_for_each_entry_safe(peer, tmp, &dev->peers, list) {
+ if (peer->chan == chan)
return peer;
}
@@ -120,7 +152,55 @@ static inline struct lowpan_peer *peer_lookup_conn(struct lowpan_dev *dev,
struct lowpan_peer *peer, *tmp;
list_for_each_entry_safe(peer, tmp, &dev->peers, list) {
- if (peer->conn == conn)
+ if (peer->chan->conn == conn)
+ return peer;
+ }
+
+ return NULL;
+}
+
+static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_dev *dev,
+ struct in6_addr *daddr,
+ struct sk_buff *skb)
+{
+ struct lowpan_peer *peer, *tmp;
+ struct in6_addr *nexthop;
+ struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
+ int count = atomic_read(&dev->peer_count);
+
+ BT_DBG("peers %d addr %pI6c rt %p", count, daddr, rt);
+
+ /* If we have multiple 6lowpan peers, then check where we should
+ * send the packet. If only one peer exists, then we can send the
+ * packet right away.
+ */
+ if (count == 1)
+ return list_first_entry(&dev->peers, struct lowpan_peer,
+ list);
+
+ if (!rt) {
+ nexthop = &lowpan_cb(skb)->gw;
+
+ if (ipv6_addr_any(nexthop))
+ return NULL;
+ } else {
+ nexthop = rt6_nexthop(rt);
+
+ /* We need to remember the address because it is needed
+ * by bt_xmit() when sending the packet. In bt_xmit(), the
+ * destination routing info is not set.
+ */
+ memcpy(&lowpan_cb(skb)->gw, nexthop, sizeof(struct in6_addr));
+ }
+
+ BT_DBG("gw %pI6c", nexthop);
+
+ list_for_each_entry_safe(peer, tmp, &dev->peers, list) {
+ BT_DBG("dst addr %pMR dst type %d ip %pI6c",
+ &peer->chan->dst, peer->chan->dst_type,
+ &peer->peer_addr);
+
+ if (!ipv6_addr_cmp(&peer->peer_addr, nexthop))
return peer;
}
@@ -176,16 +256,16 @@ static int give_skb_to_upper(struct sk_buff *skb, struct net_device *dev)
return -ENOMEM;
ret = netif_rx(skb_cp);
-
- BT_DBG("receive skb %d", ret);
- if (ret < 0)
+ if (ret < 0) {
+ BT_DBG("receive skb %d", ret);
return NET_RX_DROP;
+ }
return ret;
}
static int process_data(struct sk_buff *skb, struct net_device *netdev,
- struct l2cap_conn *conn)
+ struct l2cap_chan *chan)
{
const u8 *saddr, *daddr;
u8 iphc0, iphc1;
@@ -196,7 +276,7 @@ static int process_data(struct sk_buff *skb, struct net_device *netdev,
dev = lowpan_dev(netdev);
read_lock_irqsave(&devices_lock, flags);
- peer = peer_lookup_conn(dev, conn);
+ peer = peer_lookup_chan(dev, chan);
read_unlock_irqrestore(&devices_lock, flags);
if (!peer)
goto drop;
@@ -225,7 +305,7 @@ drop:
}
static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
- struct l2cap_conn *conn)
+ struct l2cap_chan *chan)
{
struct sk_buff *local_skb;
int ret;
@@ -269,7 +349,7 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
if (!local_skb)
goto drop;
- ret = process_data(local_skb, dev, conn);
+ ret = process_data(local_skb, dev, chan);
if (ret != NET_RX_SUCCESS)
goto drop;
@@ -286,147 +366,39 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
return NET_RX_SUCCESS;
drop:
+ dev->stats.rx_dropped++;
kfree_skb(skb);
return NET_RX_DROP;
}
/* Packet from BT LE device */
-int bt_6lowpan_recv(struct l2cap_conn *conn, struct sk_buff *skb)
+static int chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
{
struct lowpan_dev *dev;
struct lowpan_peer *peer;
int err;
- peer = lookup_peer(conn);
+ peer = lookup_peer(chan->conn);
if (!peer)
return -ENOENT;
- dev = lookup_dev(conn);
+ dev = lookup_dev(chan->conn);
if (!dev || !dev->netdev)
return -ENOENT;
- err = recv_pkt(skb, dev->netdev, conn);
- BT_DBG("recv pkt %d", err);
-
- return err;
-}
-
-static inline int skbuff_copy(void *msg, int len, int count, int mtu,
- struct sk_buff *skb, struct net_device *dev)
-{
- struct sk_buff **frag;
- int sent = 0;
-
- memcpy(skb_put(skb, count), msg, count);
-
- sent += count;
- msg += count;
- len -= count;
-
- dev->stats.tx_bytes += count;
- dev->stats.tx_packets++;
-
- raw_dump_table(__func__, "Sending", skb->data, skb->len);
-
- /* Continuation fragments (no L2CAP header) */
- frag = &skb_shinfo(skb)->frag_list;
- while (len > 0) {
- struct sk_buff *tmp;
-
- count = min_t(unsigned int, mtu, len);
-
- tmp = bt_skb_alloc(count, GFP_ATOMIC);
- if (!tmp)
- return -ENOMEM;
-
- *frag = tmp;
-
- memcpy(skb_put(*frag, count), msg, count);
-
- raw_dump_table(__func__, "Sending fragment",
- (*frag)->data, count);
-
- (*frag)->priority = skb->priority;
-
- sent += count;
- msg += count;
- len -= count;
-
- skb->len += (*frag)->len;
- skb->data_len += (*frag)->len;
-
- frag = &(*frag)->next;
-
- dev->stats.tx_bytes += count;
- dev->stats.tx_packets++;
- }
-
- return sent;
-}
-
-static struct sk_buff *create_pdu(struct l2cap_conn *conn, void *msg,
- size_t len, u32 priority,
- struct net_device *dev)
-{
- struct sk_buff *skb;
- int err, count;
- struct l2cap_hdr *lh;
-
- /* FIXME: This mtu check should be not needed and atm is only used for
- * testing purposes
- */
- if (conn->mtu > (L2CAP_LE_MIN_MTU + L2CAP_HDR_SIZE))
- conn->mtu = L2CAP_LE_MIN_MTU + L2CAP_HDR_SIZE;
-
- count = min_t(unsigned int, (conn->mtu - L2CAP_HDR_SIZE), len);
-
- BT_DBG("conn %p len %zu mtu %d count %d", conn, len, conn->mtu, count);
-
- skb = bt_skb_alloc(count + L2CAP_HDR_SIZE, GFP_ATOMIC);
- if (!skb)
- return ERR_PTR(-ENOMEM);
-
- skb->priority = priority;
-
- lh = (struct l2cap_hdr *)skb_put(skb, L2CAP_HDR_SIZE);
- lh->cid = cpu_to_le16(L2CAP_FC_6LOWPAN);
- lh->len = cpu_to_le16(len);
-
- err = skbuff_copy(msg, len, count, conn->mtu, skb, dev);
- if (unlikely(err < 0)) {
- kfree_skb(skb);
- BT_DBG("skbuff copy %d failed", err);
- return ERR_PTR(err);
+ err = recv_pkt(skb, dev->netdev, chan);
+ if (err) {
+ BT_DBG("recv pkt %d", err);
+ err = -EAGAIN;
}
- return skb;
-}
-
-static int conn_send(struct l2cap_conn *conn,
- void *msg, size_t len, u32 priority,
- struct net_device *dev)
-{
- struct sk_buff *skb;
-
- skb = create_pdu(conn, msg, len, priority, dev);
- if (IS_ERR(skb))
- return -EINVAL;
-
- BT_DBG("conn %p skb %p len %d priority %u", conn, skb, skb->len,
- skb->priority);
-
- hci_send_acl(conn->hchan, skb, ACL_START);
-
- return 0;
+ return err;
}
static u8 get_addr_type_from_eui64(u8 byte)
{
- /* Is universal(0) or local(1) bit, */
- if (byte & 0x02)
- return ADDR_LE_DEV_RANDOM;
-
- return ADDR_LE_DEV_PUBLIC;
+ /* Is universal(0) or local(1) bit */
+ return ((byte & 0x02) ? BDADDR_LE_RANDOM : BDADDR_LE_PUBLIC);
}
static void copy_to_bdaddr(struct in6_addr *ip6_daddr, bdaddr_t *addr)
@@ -454,77 +426,132 @@ static void convert_dest_bdaddr(struct in6_addr *ip6_daddr,
*addr_type = get_addr_type_from_eui64(addr->b[5]);
}
-static int header_create(struct sk_buff *skb, struct net_device *netdev,
- unsigned short type, const void *_daddr,
- const void *_saddr, unsigned int len)
+static int setup_header(struct sk_buff *skb, struct net_device *netdev,
+ bdaddr_t *peer_addr, u8 *peer_addr_type)
{
- struct ipv6hdr *hdr;
+ struct in6_addr ipv6_daddr;
struct lowpan_dev *dev;
struct lowpan_peer *peer;
bdaddr_t addr, *any = BDADDR_ANY;
- u8 *saddr, *daddr = any->b;
- u8 addr_type;
-
- if (type != ETH_P_IPV6)
- return -EINVAL;
-
- hdr = ipv6_hdr(skb);
+ u8 *daddr = any->b;
+ int err, status = 0;
dev = lowpan_dev(netdev);
- if (ipv6_addr_is_multicast(&hdr->daddr)) {
- memcpy(&lowpan_cb(skb)->addr, &hdr->daddr,
- sizeof(struct in6_addr));
- lowpan_cb(skb)->conn = NULL;
+ memcpy(&ipv6_daddr, &lowpan_cb(skb)->addr, sizeof(ipv6_daddr));
+
+ if (ipv6_addr_is_multicast(&ipv6_daddr)) {
+ lowpan_cb(skb)->chan = NULL;
} else {
unsigned long flags;
+ u8 addr_type;
/* Get destination BT device from skb.
* If there is no such peer then discard the packet.
*/
- convert_dest_bdaddr(&hdr->daddr, &addr, &addr_type);
+ convert_dest_bdaddr(&ipv6_daddr, &addr, &addr_type);
- BT_DBG("dest addr %pMR type %s IP %pI6c", &addr,
- addr_type == ADDR_LE_DEV_PUBLIC ? "PUBLIC" : "RANDOM",
- &hdr->daddr);
+ BT_DBG("dest addr %pMR type %d IP %pI6c", &addr,
+ addr_type, &ipv6_daddr);
read_lock_irqsave(&devices_lock, flags);
peer = peer_lookup_ba(dev, &addr, addr_type);
read_unlock_irqrestore(&devices_lock, flags);
if (!peer) {
- BT_DBG("no such peer %pMR found", &addr);
- return -ENOENT;
+ /* The packet might be sent to 6lowpan interface
+ * because of routing (either via default route
+ * or user set route) so get peer according to
+ * the destination address.
+ */
+ read_lock_irqsave(&devices_lock, flags);
+ peer = peer_lookup_dst(dev, &ipv6_daddr, skb);
+ read_unlock_irqrestore(&devices_lock, flags);
+ if (!peer) {
+ BT_DBG("no such peer %pMR found", &addr);
+ return -ENOENT;
+ }
}
daddr = peer->eui64_addr;
+ *peer_addr = addr;
+ *peer_addr_type = addr_type;
+ lowpan_cb(skb)->chan = peer->chan;
- memcpy(&lowpan_cb(skb)->addr, &hdr->daddr,
- sizeof(struct in6_addr));
- lowpan_cb(skb)->conn = peer->conn;
+ status = 1;
}
- saddr = dev->netdev->dev_addr;
+ lowpan_header_compress(skb, netdev, ETH_P_IPV6, daddr,
+ dev->netdev->dev_addr, skb->len);
+
+ err = dev_hard_header(skb, netdev, ETH_P_IPV6, NULL, NULL, 0);
+ if (err < 0)
+ return err;
+
+ return status;
+}
+
+static int header_create(struct sk_buff *skb, struct net_device *netdev,
+ unsigned short type, const void *_daddr,
+ const void *_saddr, unsigned int len)
+{
+ struct ipv6hdr *hdr;
+
+ if (type != ETH_P_IPV6)
+ return -EINVAL;
- return lowpan_header_compress(skb, netdev, type, daddr, saddr, len);
+ hdr = ipv6_hdr(skb);
+
+ memcpy(&lowpan_cb(skb)->addr, &hdr->daddr, sizeof(struct in6_addr));
+
+ return 0;
}
/* Packet to BT LE device */
-static int send_pkt(struct l2cap_conn *conn, const void *saddr,
- const void *daddr, struct sk_buff *skb,
+static int send_pkt(struct l2cap_chan *chan, struct sk_buff *skb,
struct net_device *netdev)
{
- raw_dump_table(__func__, "raw skb data dump before fragmentation",
- skb->data, skb->len);
+ struct msghdr msg;
+ struct kvec iv;
+ int err;
+
+ /* Remember the skb so that we can send EAGAIN to the caller if
+ * we run out of credits.
+ */
+ chan->data = skb;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = (struct iovec *) &iv;
+ msg.msg_iovlen = 1;
+ iv.iov_base = skb->data;
+ iv.iov_len = skb->len;
+
+ err = l2cap_chan_send(chan, &msg, skb->len);
+ if (err > 0) {
+ netdev->stats.tx_bytes += err;
+ netdev->stats.tx_packets++;
+ return 0;
+ }
+
+ if (!err)
+ err = lowpan_cb(skb)->status;
+
+ if (err < 0) {
+ if (err == -EAGAIN)
+ netdev->stats.tx_dropped++;
+ else
+ netdev->stats.tx_errors++;
+ }
- return conn_send(conn, skb->data, skb->len, 0, netdev);
+ return err;
}
-static void send_mcast_pkt(struct sk_buff *skb, struct net_device *netdev)
+static int send_mcast_pkt(struct sk_buff *skb, struct net_device *netdev)
{
struct sk_buff *local_skb;
struct lowpan_dev *entry, *tmp;
unsigned long flags;
+ int err = 0;
read_lock_irqsave(&devices_lock, flags);
@@ -538,58 +565,77 @@ static void send_mcast_pkt(struct sk_buff *skb, struct net_device *netdev)
dev = lowpan_dev(entry->netdev);
list_for_each_entry_safe(pentry, ptmp, &dev->peers, list) {
+ int ret;
+
local_skb = skb_clone(skb, GFP_ATOMIC);
- send_pkt(pentry->conn, netdev->dev_addr,
- pentry->eui64_addr, local_skb, netdev);
+ BT_DBG("xmit %s to %pMR type %d IP %pI6c chan %p",
+ netdev->name,
+ &pentry->chan->dst, pentry->chan->dst_type,
+ &pentry->peer_addr, pentry->chan);
+ ret = send_pkt(pentry->chan, local_skb, netdev);
+ if (ret < 0)
+ err = ret;
kfree_skb(local_skb);
}
}
read_unlock_irqrestore(&devices_lock, flags);
+
+ return err;
}
static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev)
{
int err = 0;
- unsigned char *eui64_addr;
- struct lowpan_dev *dev;
- struct lowpan_peer *peer;
bdaddr_t addr;
u8 addr_type;
+ struct sk_buff *tmpskb;
- if (ipv6_addr_is_multicast(&lowpan_cb(skb)->addr)) {
- /* We need to send the packet to every device
- * behind this interface.
- */
- send_mcast_pkt(skb, netdev);
- } else {
- unsigned long flags;
-
- convert_dest_bdaddr(&lowpan_cb(skb)->addr, &addr, &addr_type);
- eui64_addr = lowpan_cb(skb)->addr.s6_addr + 8;
- dev = lowpan_dev(netdev);
-
- read_lock_irqsave(&devices_lock, flags);
- peer = peer_lookup_ba(dev, &addr, addr_type);
- read_unlock_irqrestore(&devices_lock, flags);
+ /* We must take a copy of the skb before we modify/replace the ipv6
+ * header as the header could be used elsewhere
+ */
+ tmpskb = skb_unshare(skb, GFP_ATOMIC);
+ if (!tmpskb) {
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
+ }
+ skb = tmpskb;
- BT_DBG("xmit %s to %pMR type %s IP %pI6c peer %p",
- netdev->name, &addr,
- addr_type == ADDR_LE_DEV_PUBLIC ? "PUBLIC" : "RANDOM",
- &lowpan_cb(skb)->addr, peer);
+ /* Return values from setup_header()
+ * <0 - error, packet is dropped
+ * 0 - this is a multicast packet
+ * 1 - this is unicast packet
+ */
+ err = setup_header(skb, netdev, &addr, &addr_type);
+ if (err < 0) {
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
+ }
- if (peer && peer->conn)
- err = send_pkt(peer->conn, netdev->dev_addr,
- eui64_addr, skb, netdev);
+ if (err) {
+ if (lowpan_cb(skb)->chan) {
+ BT_DBG("xmit %s to %pMR type %d IP %pI6c chan %p",
+ netdev->name, &addr, addr_type,
+ &lowpan_cb(skb)->addr, lowpan_cb(skb)->chan);
+ err = send_pkt(lowpan_cb(skb)->chan, skb, netdev);
+ } else {
+ err = -ENOENT;
+ }
+ } else {
+ /* We need to send the packet to every device behind this
+ * interface.
+ */
+ err = send_mcast_pkt(skb, netdev);
}
+
dev_kfree_skb(skb);
if (err)
BT_DBG("ERROR: xmit failed (%d)", err);
- return (err < 0) ? NET_XMIT_DROP : err;
+ return err < 0 ? NET_XMIT_DROP : err;
}
static const struct net_device_ops netdev_ops = {
@@ -609,7 +655,8 @@ static void netdev_setup(struct net_device *dev)
dev->needed_tailroom = 0;
dev->mtu = IPV6_MIN_MTU;
dev->tx_queue_len = 0;
- dev->flags = IFF_RUNNING | IFF_POINTOPOINT;
+ dev->flags = IFF_RUNNING | IFF_POINTOPOINT |
+ IFF_MULTICAST;
dev->watchdog_timeo = 0;
dev->netdev_ops = &netdev_ops;
@@ -634,7 +681,7 @@ static void set_addr(u8 *eui, u8 *addr, u8 addr_type)
eui[7] = addr[0];
/* Universal/local bit set, BT 6lowpan draft ch. 3.2.1 */
- if (addr_type == ADDR_LE_DEV_PUBLIC)
+ if (addr_type == BDADDR_LE_PUBLIC)
eui[0] &= ~0x02;
else
eui[0] |= 0x02;
@@ -660,6 +707,17 @@ static void ifup(struct net_device *netdev)
rtnl_unlock();
}
+static void ifdown(struct net_device *netdev)
+{
+ int err;
+
+ rtnl_lock();
+ err = dev_close(netdev);
+ if (err < 0)
+ BT_INFO("iface %s cannot be closed (%d)", netdev->name, err);
+ rtnl_unlock();
+}
+
static void do_notify_peers(struct work_struct *work)
{
struct lowpan_dev *dev = container_of(work, struct lowpan_dev,
@@ -673,30 +731,81 @@ static bool is_bt_6lowpan(struct hci_conn *hcon)
if (hcon->type != LE_LINK)
return false;
- return test_bit(HCI_CONN_6LOWPAN, &hcon->flags);
+ if (!psm_6lowpan)
+ return false;
+
+ return true;
+}
+
+static struct l2cap_chan *chan_create(void)
+{
+ struct l2cap_chan *chan;
+
+ chan = l2cap_chan_create();
+ if (!chan)
+ return NULL;
+
+ l2cap_chan_set_defaults(chan);
+
+ chan->chan_type = L2CAP_CHAN_CONN_ORIENTED;
+ chan->mode = L2CAP_MODE_LE_FLOWCTL;
+ chan->omtu = 65535;
+ chan->imtu = chan->omtu;
+
+ return chan;
+}
+
+static struct l2cap_chan *chan_open(struct l2cap_chan *pchan)
+{
+ struct l2cap_chan *chan;
+
+ chan = chan_create();
+ if (!chan)
+ return NULL;
+
+ chan->remote_mps = chan->omtu;
+ chan->mps = chan->omtu;
+
+ chan->state = BT_CONNECTED;
+
+ return chan;
}
-static int add_peer_conn(struct l2cap_conn *conn, struct lowpan_dev *dev)
+static void set_ip_addr_bits(u8 addr_type, u8 *addr)
+{
+ if (addr_type == BDADDR_LE_PUBLIC)
+ *addr |= 0x02;
+ else
+ *addr &= ~0x02;
+}
+
+static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan,
+ struct lowpan_dev *dev)
{
struct lowpan_peer *peer;
unsigned long flags;
peer = kzalloc(sizeof(*peer), GFP_ATOMIC);
if (!peer)
- return -ENOMEM;
+ return NULL;
- peer->conn = conn;
+ peer->chan = chan;
memset(&peer->peer_addr, 0, sizeof(struct in6_addr));
/* RFC 2464 ch. 5 */
peer->peer_addr.s6_addr[0] = 0xFE;
peer->peer_addr.s6_addr[1] = 0x80;
- set_addr((u8 *)&peer->peer_addr.s6_addr + 8, conn->hcon->dst.b,
- conn->hcon->dst_type);
+ set_addr((u8 *)&peer->peer_addr.s6_addr + 8, chan->dst.b,
+ chan->dst_type);
memcpy(&peer->eui64_addr, (u8 *)&peer->peer_addr.s6_addr + 8,
EUI64_ADDR_LEN);
+ /* IPv6 address needs to have the U/L bit set properly so toggle
+ * it back here.
+ */
+ set_ip_addr_bits(chan->dst_type, (u8 *)&peer->peer_addr.s6_addr + 8);
+
write_lock_irqsave(&devices_lock, flags);
INIT_LIST_HEAD(&peer->list);
peer_add(dev, peer);
@@ -706,40 +815,24 @@ static int add_peer_conn(struct l2cap_conn *conn, struct lowpan_dev *dev)
INIT_DELAYED_WORK(&dev->notify_peers, do_notify_peers);
schedule_delayed_work(&dev->notify_peers, msecs_to_jiffies(100));
- return 0;
+ return peer->chan;
}
-/* This gets called when BT LE 6LoWPAN device is connected. We then
- * create network device that acts as a proxy between BT LE device
- * and kernel network stack.
- */
-int bt_6lowpan_add_conn(struct l2cap_conn *conn)
+static int setup_netdev(struct l2cap_chan *chan, struct lowpan_dev **dev)
{
- struct lowpan_peer *peer = NULL;
- struct lowpan_dev *dev;
struct net_device *netdev;
int err = 0;
unsigned long flags;
- if (!is_bt_6lowpan(conn->hcon))
- return 0;
-
- peer = lookup_peer(conn);
- if (peer)
- return -EEXIST;
-
- dev = lookup_dev(conn);
- if (dev)
- return add_peer_conn(conn, dev);
-
- netdev = alloc_netdev(sizeof(*dev), IFACE_NAME_TEMPLATE, netdev_setup);
+ netdev = alloc_netdev(sizeof(struct lowpan_dev), IFACE_NAME_TEMPLATE,
+ NET_NAME_UNKNOWN, netdev_setup);
if (!netdev)
return -ENOMEM;
- set_dev_addr(netdev, &conn->hcon->src, conn->hcon->src_type);
+ set_dev_addr(netdev, &chan->src, chan->src_type);
netdev->netdev_ops = &netdev_ops;
- SET_NETDEV_DEV(netdev, &conn->hcon->dev);
+ SET_NETDEV_DEV(netdev, &chan->conn->hcon->dev);
SET_NETDEV_DEVTYPE(netdev, &bt_type);
err = register_netdev(netdev);
@@ -749,28 +842,61 @@ int bt_6lowpan_add_conn(struct l2cap_conn *conn)
goto out;
}
- BT_DBG("ifindex %d peer bdaddr %pMR my addr %pMR",
- netdev->ifindex, &conn->hcon->dst, &conn->hcon->src);
+ BT_DBG("ifindex %d peer bdaddr %pMR type %d my addr %pMR type %d",
+ netdev->ifindex, &chan->dst, chan->dst_type,
+ &chan->src, chan->src_type);
set_bit(__LINK_STATE_PRESENT, &netdev->state);
- dev = netdev_priv(netdev);
- dev->netdev = netdev;
- dev->hdev = conn->hcon->hdev;
- INIT_LIST_HEAD(&dev->peers);
+ *dev = netdev_priv(netdev);
+ (*dev)->netdev = netdev;
+ (*dev)->hdev = chan->conn->hcon->hdev;
+ INIT_LIST_HEAD(&(*dev)->peers);
write_lock_irqsave(&devices_lock, flags);
- INIT_LIST_HEAD(&dev->list);
- list_add(&dev->list, &bt_6lowpan_devices);
+ INIT_LIST_HEAD(&(*dev)->list);
+ list_add(&(*dev)->list, &bt_6lowpan_devices);
write_unlock_irqrestore(&devices_lock, flags);
- ifup(netdev);
-
- return add_peer_conn(conn, dev);
+ return 0;
out:
return err;
}
+static inline void chan_ready_cb(struct l2cap_chan *chan)
+{
+ struct lowpan_dev *dev;
+
+ dev = lookup_dev(chan->conn);
+
+ BT_DBG("chan %p conn %p dev %p", chan, chan->conn, dev);
+
+ if (!dev) {
+ if (setup_netdev(chan, &dev) < 0) {
+ l2cap_chan_del(chan, -ENOENT);
+ return;
+ }
+ }
+
+ if (!try_module_get(THIS_MODULE))
+ return;
+
+ add_peer_chan(chan, dev);
+ ifup(dev->netdev);
+}
+
+static inline struct l2cap_chan *chan_new_conn_cb(struct l2cap_chan *pchan)
+{
+ struct l2cap_chan *chan;
+
+ chan = chan_open(pchan);
+ chan->ops = pchan->ops;
+
+ BT_DBG("chan %p pchan %p", chan, pchan);
+
+ return chan;
+}
+
static void delete_netdev(struct work_struct *work)
{
struct lowpan_dev *entry = container_of(work, struct lowpan_dev,
@@ -781,26 +907,43 @@ static void delete_netdev(struct work_struct *work)
/* The entry pointer is deleted in device_event() */
}
-int bt_6lowpan_del_conn(struct l2cap_conn *conn)
+static void chan_close_cb(struct l2cap_chan *chan)
{
struct lowpan_dev *entry, *tmp;
struct lowpan_dev *dev = NULL;
struct lowpan_peer *peer;
int err = -ENOENT;
unsigned long flags;
- bool last = false;
+ bool last = false, removed = true;
- if (!conn || !is_bt_6lowpan(conn->hcon))
- return 0;
+ BT_DBG("chan %p conn %p", chan, chan->conn);
+
+ if (chan->conn && chan->conn->hcon) {
+ if (!is_bt_6lowpan(chan->conn->hcon))
+ return;
+
+ /* If conn is set, then the netdev is also there and we should
+ * not remove it.
+ */
+ removed = false;
+ }
write_lock_irqsave(&devices_lock, flags);
list_for_each_entry_safe(entry, tmp, &bt_6lowpan_devices, list) {
dev = lowpan_dev(entry->netdev);
- peer = peer_lookup_conn(dev, conn);
+ peer = peer_lookup_chan(dev, chan);
if (peer) {
last = peer_del(dev, peer);
err = 0;
+
+ BT_DBG("dev %p removing %speer %p", dev,
+ last ? "last " : "1 ", peer);
+ BT_DBG("chan %p orig refcnt %d", chan,
+ atomic_read(&chan->kref.refcount));
+
+ l2cap_chan_put(chan);
+ kfree(peer);
break;
}
}
@@ -810,18 +953,408 @@ int bt_6lowpan_del_conn(struct l2cap_conn *conn)
cancel_delayed_work_sync(&dev->notify_peers);
- /* bt_6lowpan_del_conn() is called with hci dev lock held which
- * means that we must delete the netdevice in worker thread.
- */
- INIT_WORK(&entry->delete_netdev, delete_netdev);
- schedule_work(&entry->delete_netdev);
+ ifdown(dev->netdev);
+
+ if (!removed) {
+ INIT_WORK(&entry->delete_netdev, delete_netdev);
+ schedule_work(&entry->delete_netdev);
+ }
} else {
write_unlock_irqrestore(&devices_lock, flags);
}
+ return;
+}
+
+static void chan_state_change_cb(struct l2cap_chan *chan, int state, int err)
+{
+ BT_DBG("chan %p conn %p state %s err %d", chan, chan->conn,
+ state_to_string(state), err);
+}
+
+static struct sk_buff *chan_alloc_skb_cb(struct l2cap_chan *chan,
+ unsigned long hdr_len,
+ unsigned long len, int nb)
+{
+ /* Note that we must allocate using GFP_ATOMIC here as
+ * this function is called originally from netdev hard xmit
+ * function in atomic context.
+ */
+ return bt_skb_alloc(hdr_len + len, GFP_ATOMIC);
+}
+
+static void chan_suspend_cb(struct l2cap_chan *chan)
+{
+ struct sk_buff *skb = chan->data;
+
+ BT_DBG("chan %p conn %p skb %p", chan, chan->conn, skb);
+
+ if (!skb)
+ return;
+
+ lowpan_cb(skb)->status = -EAGAIN;
+}
+
+static void chan_resume_cb(struct l2cap_chan *chan)
+{
+ struct sk_buff *skb = chan->data;
+
+ BT_DBG("chan %p conn %p skb %p", chan, chan->conn, skb);
+
+ if (!skb)
+ return;
+
+ lowpan_cb(skb)->status = 0;
+}
+
+static long chan_get_sndtimeo_cb(struct l2cap_chan *chan)
+{
+ return L2CAP_CONN_TIMEOUT;
+}
+
+static const struct l2cap_ops bt_6lowpan_chan_ops = {
+ .name = "L2CAP 6LoWPAN channel",
+ .new_connection = chan_new_conn_cb,
+ .recv = chan_recv_cb,
+ .close = chan_close_cb,
+ .state_change = chan_state_change_cb,
+ .ready = chan_ready_cb,
+ .resume = chan_resume_cb,
+ .suspend = chan_suspend_cb,
+ .get_sndtimeo = chan_get_sndtimeo_cb,
+ .alloc_skb = chan_alloc_skb_cb,
+ .memcpy_fromiovec = l2cap_chan_no_memcpy_fromiovec,
+
+ .teardown = l2cap_chan_no_teardown,
+ .defer = l2cap_chan_no_defer,
+ .set_shutdown = l2cap_chan_no_set_shutdown,
+};
+
+static inline __u8 bdaddr_type(__u8 type)
+{
+ if (type == ADDR_LE_DEV_PUBLIC)
+ return BDADDR_LE_PUBLIC;
+ else
+ return BDADDR_LE_RANDOM;
+}
+
+static struct l2cap_chan *chan_get(void)
+{
+ struct l2cap_chan *pchan;
+
+ pchan = chan_create();
+ if (!pchan)
+ return NULL;
+
+ pchan->ops = &bt_6lowpan_chan_ops;
+
+ return pchan;
+}
+
+static int bt_6lowpan_connect(bdaddr_t *addr, u8 dst_type)
+{
+ struct l2cap_chan *pchan;
+ int err;
+
+ pchan = chan_get();
+ if (!pchan)
+ return -EINVAL;
+
+ err = l2cap_chan_connect(pchan, cpu_to_le16(psm_6lowpan), 0,
+ addr, dst_type);
+
+ BT_DBG("chan %p err %d", pchan, err);
+ if (err < 0)
+ l2cap_chan_put(pchan);
+
return err;
}
+static int bt_6lowpan_disconnect(struct l2cap_conn *conn, u8 dst_type)
+{
+ struct lowpan_peer *peer;
+
+ BT_DBG("conn %p dst type %d", conn, dst_type);
+
+ peer = lookup_peer(conn);
+ if (!peer)
+ return -ENOENT;
+
+ BT_DBG("peer %p chan %p", peer, peer->chan);
+
+ l2cap_chan_close(peer->chan, ENOENT);
+
+ return 0;
+}
+
+static struct l2cap_chan *bt_6lowpan_listen(void)
+{
+ bdaddr_t *addr = BDADDR_ANY;
+ struct l2cap_chan *pchan;
+ int err;
+
+ if (psm_6lowpan == 0)
+ return NULL;
+
+ pchan = chan_get();
+ if (!pchan)
+ return NULL;
+
+ pchan->state = BT_LISTEN;
+ pchan->src_type = BDADDR_LE_PUBLIC;
+
+ BT_DBG("psm 0x%04x chan %p src type %d", psm_6lowpan, pchan,
+ pchan->src_type);
+
+ err = l2cap_add_psm(pchan, addr, cpu_to_le16(psm_6lowpan));
+ if (err) {
+ l2cap_chan_put(pchan);
+ BT_ERR("psm cannot be added err %d", err);
+ return NULL;
+ }
+
+ return pchan;
+}
+
+static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type,
+ struct l2cap_conn **conn)
+{
+ struct hci_conn *hcon;
+ struct hci_dev *hdev;
+ bdaddr_t *src = BDADDR_ANY;
+ int n;
+
+ n = sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx %hhu",
+ &addr->b[5], &addr->b[4], &addr->b[3],
+ &addr->b[2], &addr->b[1], &addr->b[0],
+ addr_type);
+
+ if (n < 7)
+ return -EINVAL;
+
+ hdev = hci_get_route(addr, src);
+ if (!hdev)
+ return -ENOENT;
+
+ hci_dev_lock(hdev);
+ hcon = hci_conn_hash_lookup_ba(hdev, LE_LINK, addr);
+ hci_dev_unlock(hdev);
+
+ if (!hcon)
+ return -ENOENT;
+
+ *conn = (struct l2cap_conn *)hcon->l2cap_data;
+
+ BT_DBG("conn %p dst %pMR type %d", *conn, &hcon->dst, hcon->dst_type);
+
+ return 0;
+}
+
+static void disconnect_all_peers(void)
+{
+ struct lowpan_dev *entry, *tmp_dev;
+ struct lowpan_peer *peer, *tmp_peer, *new_peer;
+ struct list_head peers;
+ unsigned long flags;
+
+ INIT_LIST_HEAD(&peers);
+
+ /* We make a separate list of peers as the close_cb() will
+ * modify the device peers list so it is better not to mess
+ * with the same list at the same time.
+ */
+
+ read_lock_irqsave(&devices_lock, flags);
+
+ list_for_each_entry_safe(entry, tmp_dev, &bt_6lowpan_devices, list) {
+ list_for_each_entry_safe(peer, tmp_peer, &entry->peers, list) {
+ new_peer = kmalloc(sizeof(*new_peer), GFP_ATOMIC);
+ if (!new_peer)
+ break;
+
+ new_peer->chan = peer->chan;
+ INIT_LIST_HEAD(&new_peer->list);
+
+ list_add(&new_peer->list, &peers);
+ }
+ }
+
+ read_unlock_irqrestore(&devices_lock, flags);
+
+ list_for_each_entry_safe(peer, tmp_peer, &peers, list) {
+ l2cap_chan_close(peer->chan, ENOENT);
+ kfree(peer);
+ }
+}
+
+static int lowpan_psm_set(void *data, u64 val)
+{
+ u16 psm;
+
+ psm = val;
+ if (psm == 0 || psm_6lowpan != psm)
+ /* Disconnect existing connections if 6lowpan is
+ * disabled (psm = 0), or if psm changes.
+ */
+ disconnect_all_peers();
+
+ psm_6lowpan = psm;
+
+ if (listen_chan) {
+ l2cap_chan_close(listen_chan, 0);
+ l2cap_chan_put(listen_chan);
+ }
+
+ listen_chan = bt_6lowpan_listen();
+
+ return 0;
+}
+
+static int lowpan_psm_get(void *data, u64 *val)
+{
+ *val = psm_6lowpan;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(lowpan_psm_fops, lowpan_psm_get,
+ lowpan_psm_set, "%llu\n");
+
+static ssize_t lowpan_control_write(struct file *fp,
+ const char __user *user_buffer,
+ size_t count,
+ loff_t *position)
+{
+ char buf[32];
+ size_t buf_size = min(count, sizeof(buf) - 1);
+ int ret;
+ bdaddr_t addr;
+ u8 addr_type;
+ struct l2cap_conn *conn = NULL;
+
+ if (copy_from_user(buf, user_buffer, buf_size))
+ return -EFAULT;
+
+ buf[buf_size] = '\0';
+
+ if (memcmp(buf, "connect ", 8) == 0) {
+ ret = get_l2cap_conn(&buf[8], &addr, &addr_type, &conn);
+ if (ret == -EINVAL)
+ return ret;
+
+ if (listen_chan) {
+ l2cap_chan_close(listen_chan, 0);
+ l2cap_chan_put(listen_chan);
+ listen_chan = NULL;
+ }
+
+ if (conn) {
+ struct lowpan_peer *peer;
+
+ if (!is_bt_6lowpan(conn->hcon))
+ return -EINVAL;
+
+ peer = lookup_peer(conn);
+ if (peer) {
+ BT_DBG("6LoWPAN connection already exists");
+ return -EALREADY;
+ }
+
+ BT_DBG("conn %p dst %pMR type %d user %d", conn,
+ &conn->hcon->dst, conn->hcon->dst_type,
+ addr_type);
+ }
+
+ ret = bt_6lowpan_connect(&addr, addr_type);
+ if (ret < 0)
+ return ret;
+
+ return count;
+ }
+
+ if (memcmp(buf, "disconnect ", 11) == 0) {
+ ret = get_l2cap_conn(&buf[11], &addr, &addr_type, &conn);
+ if (ret < 0)
+ return ret;
+
+ ret = bt_6lowpan_disconnect(conn, addr_type);
+ if (ret < 0)
+ return ret;
+
+ return count;
+ }
+
+ return count;
+}
+
+static int lowpan_control_show(struct seq_file *f, void *ptr)
+{
+ struct lowpan_dev *entry, *tmp_dev;
+ struct lowpan_peer *peer, *tmp_peer;
+ unsigned long flags;
+
+ read_lock_irqsave(&devices_lock, flags);
+
+ list_for_each_entry_safe(entry, tmp_dev, &bt_6lowpan_devices, list) {
+ list_for_each_entry_safe(peer, tmp_peer, &entry->peers, list)
+ seq_printf(f, "%pMR (type %u)\n",
+ &peer->chan->dst, peer->chan->dst_type);
+ }
+
+ read_unlock_irqrestore(&devices_lock, flags);
+
+ return 0;
+}
+
+static int lowpan_control_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, lowpan_control_show, inode->i_private);
+}
+
+static const struct file_operations lowpan_control_fops = {
+ .open = lowpan_control_open,
+ .read = seq_read,
+ .write = lowpan_control_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static void disconnect_devices(void)
+{
+ struct lowpan_dev *entry, *tmp, *new_dev;
+ struct list_head devices;
+ unsigned long flags;
+
+ INIT_LIST_HEAD(&devices);
+
+ /* We make a separate list of devices because the unregister_netdev()
+ * will call device_event() which will also want to modify the same
+ * devices list.
+ */
+
+ read_lock_irqsave(&devices_lock, flags);
+
+ list_for_each_entry_safe(entry, tmp, &bt_6lowpan_devices, list) {
+ new_dev = kmalloc(sizeof(*new_dev), GFP_ATOMIC);
+ if (!new_dev)
+ break;
+
+ new_dev->netdev = entry->netdev;
+ INIT_LIST_HEAD(&new_dev->list);
+
+ list_add(&new_dev->list, &devices);
+ }
+
+ read_unlock_irqrestore(&devices_lock, flags);
+
+ list_for_each_entry_safe(entry, tmp, &devices, list) {
+ ifdown(entry->netdev);
+ BT_DBG("Unregistering netdev %s %p",
+ entry->netdev->name, entry->netdev);
+ unregister_netdev(entry->netdev);
+ kfree(entry);
+ }
+}
+
static int device_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
@@ -838,6 +1371,8 @@ static int device_event(struct notifier_block *unused,
list_for_each_entry_safe(entry, tmp, &bt_6lowpan_devices,
list) {
if (entry->netdev == netdev) {
+ BT_DBG("Unregistered netdev %s %p",
+ netdev->name, netdev);
list_del(&entry->list);
kfree(entry);
break;
@@ -854,12 +1389,37 @@ static struct notifier_block bt_6lowpan_dev_notifier = {
.notifier_call = device_event,
};
-int bt_6lowpan_init(void)
+static int __init bt_6lowpan_init(void)
{
+ lowpan_psm_debugfs = debugfs_create_file("6lowpan_psm", 0644,
+ bt_debugfs, NULL,
+ &lowpan_psm_fops);
+ lowpan_control_debugfs = debugfs_create_file("6lowpan_control", 0644,
+ bt_debugfs, NULL,
+ &lowpan_control_fops);
+
return register_netdevice_notifier(&bt_6lowpan_dev_notifier);
}
-void bt_6lowpan_cleanup(void)
+static void __exit bt_6lowpan_exit(void)
{
+ debugfs_remove(lowpan_psm_debugfs);
+ debugfs_remove(lowpan_control_debugfs);
+
+ if (listen_chan) {
+ l2cap_chan_close(listen_chan, 0);
+ l2cap_chan_put(listen_chan);
+ }
+
+ disconnect_devices();
+
unregister_netdevice_notifier(&bt_6lowpan_dev_notifier);
}
+
+module_init(bt_6lowpan_init);
+module_exit(bt_6lowpan_exit);
+
+MODULE_AUTHOR("Jukka Rissanen <jukka.rissanen@linux.intel.com>");
+MODULE_DESCRIPTION("Bluetooth 6LoWPAN");
+MODULE_VERSION(VERSION);
+MODULE_LICENSE("GPL");
diff --git a/net/bluetooth/6lowpan.h b/net/bluetooth/6lowpan.h
deleted file mode 100644
index 5d281f1eaf55..000000000000
--- a/net/bluetooth/6lowpan.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- Copyright (c) 2013 Intel Corp.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License version 2 and
- only version 2 as published by the Free Software Foundation.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-*/
-
-#ifndef __6LOWPAN_H
-#define __6LOWPAN_H
-
-#include <linux/errno.h>
-#include <linux/skbuff.h>
-#include <net/bluetooth/l2cap.h>
-
-#if IS_ENABLED(CONFIG_BT_6LOWPAN)
-int bt_6lowpan_recv(struct l2cap_conn *conn, struct sk_buff *skb);
-int bt_6lowpan_add_conn(struct l2cap_conn *conn);
-int bt_6lowpan_del_conn(struct l2cap_conn *conn);
-int bt_6lowpan_init(void);
-void bt_6lowpan_cleanup(void);
-#else
-static int bt_6lowpan_recv(struct l2cap_conn *conn, struct sk_buff *skb)
-{
- return -EOPNOTSUPP;
-}
-static int bt_6lowpan_add_conn(struct l2cap_conn *conn)
-{
- return -EOPNOTSUPP;
-}
-int bt_6lowpan_del_conn(struct l2cap_conn *conn)
-{
- return -EOPNOTSUPP;
-}
-static int bt_6lowpan_init(void)
-{
- return -EOPNOTSUPP;
-}
-static void bt_6lowpan_cleanup(void) { }
-#endif
-
-#endif /* __6LOWPAN_H */
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index 06ec14499ca1..600fb29288f4 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -6,7 +6,6 @@ menuconfig BT
tristate "Bluetooth subsystem support"
depends on NET && !S390
depends on RFKILL || !RFKILL
- select 6LOWPAN_IPHC if BT_6LOWPAN
select CRC16
select CRYPTO
select CRYPTO_BLKCIPHER
@@ -41,10 +40,10 @@ menuconfig BT
more information, see <http://www.bluez.org/>.
config BT_6LOWPAN
- bool "Bluetooth 6LoWPAN support"
- depends on BT && IPV6
+ tristate "Bluetooth 6LoWPAN support"
+ depends on BT && 6LOWPAN
help
- IPv6 compression over Bluetooth.
+ IPv6 compression over Bluetooth Low Energy.
source "net/bluetooth/rfcomm/Kconfig"
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index ca51246b1016..886e9aa3ecf1 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -7,10 +7,12 @@ obj-$(CONFIG_BT_RFCOMM) += rfcomm/
obj-$(CONFIG_BT_BNEP) += bnep/
obj-$(CONFIG_BT_CMTP) += cmtp/
obj-$(CONFIG_BT_HIDP) += hidp/
+obj-$(CONFIG_BT_6LOWPAN) += bluetooth_6lowpan.o
+
+bluetooth_6lowpan-y := 6lowpan.o
bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \
hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o sco.o lib.o \
a2mp.o amp.o
-bluetooth-$(CONFIG_BT_6LOWPAN) += 6lowpan.o
subdir-ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index 9514cc9e850c..5dcade511fdb 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -63,7 +63,7 @@ void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, void *data)
msg.msg_iov = (struct iovec *) &iv;
msg.msg_iovlen = 1;
- l2cap_chan_send(chan, &msg, total_len, 0);
+ l2cap_chan_send(chan, &msg, total_len);
kfree(cmd);
}
@@ -693,18 +693,19 @@ static void a2mp_chan_state_change_cb(struct l2cap_chan *chan, int state,
}
static struct sk_buff *a2mp_chan_alloc_skb_cb(struct l2cap_chan *chan,
+ unsigned long hdr_len,
unsigned long len, int nb)
{
struct sk_buff *skb;
- skb = bt_skb_alloc(len, GFP_KERNEL);
+ skb = bt_skb_alloc(hdr_len + len, GFP_KERNEL);
if (!skb)
return ERR_PTR(-ENOMEM);
return skb;
}
-static struct l2cap_ops a2mp_chan_ops = {
+static const struct l2cap_ops a2mp_chan_ops = {
.name = "L2CAP A2MP channel",
.recv = a2mp_chan_recv_cb,
.close = a2mp_chan_close_cb,
@@ -719,6 +720,7 @@ static struct l2cap_ops a2mp_chan_ops = {
.resume = l2cap_chan_no_resume,
.set_shutdown = l2cap_chan_no_set_shutdown,
.get_sndtimeo = l2cap_chan_no_get_sndtimeo,
+ .memcpy_fromiovec = l2cap_chan_no_memcpy_fromiovec,
};
static struct l2cap_chan *a2mp_chan_open(struct l2cap_conn *conn, bool locked)
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 2021c481cdb6..339c74ad4553 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -639,7 +639,7 @@ static int bt_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations bt_seq_ops = {
+static const struct seq_operations bt_seq_ops = {
.start = bt_seq_start,
.next = bt_seq_next,
.stop = bt_seq_stop,
@@ -709,8 +709,11 @@ EXPORT_SYMBOL_GPL(bt_debugfs);
static int __init bt_init(void)
{
+ struct sk_buff *skb;
int err;
+ BUILD_BUG_ON(sizeof(struct bt_skb_cb) > sizeof(skb->cb));
+
BT_INFO("Core ver %s", VERSION);
bt_debugfs = debugfs_create_dir("bluetooth", NULL);
diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c
index bb39509b3f06..2640d78f30b8 100644
--- a/net/bluetooth/amp.c
+++ b/net/bluetooth/amp.c
@@ -113,8 +113,9 @@ struct hci_conn *phylink_add(struct hci_dev *hdev, struct amp_mgr *mgr,
{
bdaddr_t *dst = &mgr->l2cap_conn->hcon->dst;
struct hci_conn *hcon;
+ u8 role = out ? HCI_ROLE_MASTER : HCI_ROLE_SLAVE;
- hcon = hci_conn_add(hdev, AMP_LINK, dst);
+ hcon = hci_conn_add(hdev, AMP_LINK, dst, role);
if (!hcon)
return NULL;
@@ -125,7 +126,6 @@ struct hci_conn *phylink_add(struct hci_dev *hdev, struct amp_mgr *mgr,
hcon->handle = __next_handle(mgr);
hcon->remote_id = remote_id;
hcon->amp_mgr = amp_mgr_get(mgr);
- hcon->out = out;
return hcon;
}
@@ -133,8 +133,8 @@ struct hci_conn *phylink_add(struct hci_dev *hdev, struct amp_mgr *mgr,
/* AMP crypto key generation interface */
static int hmac_sha256(u8 *key, u8 ksize, char *plaintext, u8 psize, u8 *output)
{
- int ret = 0;
struct crypto_shash *tfm;
+ int ret;
if (!ksize)
return -EINVAL;
@@ -149,15 +149,14 @@ static int hmac_sha256(u8 *key, u8 ksize, char *plaintext, u8 psize, u8 *output)
if (ret) {
BT_DBG("crypto_ahash_setkey failed: err %d", ret);
} else {
- struct {
- struct shash_desc shash;
- char ctx[crypto_shash_descsize(tfm)];
- } desc;
+ char desc[sizeof(struct shash_desc) +
+ crypto_shash_descsize(tfm)] CRYPTO_MINALIGN_ATTR;
+ struct shash_desc *shash = (struct shash_desc *)desc;
- desc.shash.tfm = tfm;
- desc.shash.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+ shash->tfm = tfm;
+ shash->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
- ret = crypto_shash_digest(&desc.shash, plaintext, psize,
+ ret = crypto_shash_digest(shash, plaintext, psize,
output);
}
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index a841d3e776c5..85bcc21e84d2 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -538,8 +538,9 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock)
/* session struct allocated as private part of net_device */
dev = alloc_netdev(sizeof(struct bnep_session),
- (*req->device) ? req->device : "bnep%d",
- bnep_net_setup);
+ (*req->device) ? req->device : "bnep%d",
+ NET_NAME_UNKNOWN,
+ bnep_net_setup);
if (!dev)
return -ENOMEM;
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index cd75e4d64b90..1ca8a87a0787 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -362,12 +362,6 @@ void cmtp_recv_capimsg(struct cmtp_session *session, struct sk_buff *skb)
CAPIMSG_SETCONTROL(skb->data, contr);
}
- if (!ctrl) {
- BT_ERR("Can't find controller %d for message", session->num);
- kfree_skb(skb);
- return;
- }
-
capi_ctr_handle_message(ctrl, appl, skb);
}
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index a7a27bc2c0b1..b9517bd17190 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -36,19 +36,25 @@
struct sco_param {
u16 pkt_type;
u16 max_latency;
+ u8 retrans_effort;
+};
+
+static const struct sco_param esco_param_cvsd[] = {
+ { EDR_ESCO_MASK & ~ESCO_2EV3, 0x000a, 0x01 }, /* S3 */
+ { EDR_ESCO_MASK & ~ESCO_2EV3, 0x0007, 0x01 }, /* S2 */
+ { EDR_ESCO_MASK | ESCO_EV3, 0x0007, 0x01 }, /* S1 */
+ { EDR_ESCO_MASK | ESCO_HV3, 0xffff, 0x01 }, /* D1 */
+ { EDR_ESCO_MASK | ESCO_HV1, 0xffff, 0x01 }, /* D0 */
};
static const struct sco_param sco_param_cvsd[] = {
- { EDR_ESCO_MASK & ~ESCO_2EV3, 0x000a }, /* S3 */
- { EDR_ESCO_MASK & ~ESCO_2EV3, 0x0007 }, /* S2 */
- { EDR_ESCO_MASK | ESCO_EV3, 0x0007 }, /* S1 */
- { EDR_ESCO_MASK | ESCO_HV3, 0xffff }, /* D1 */
- { EDR_ESCO_MASK | ESCO_HV1, 0xffff }, /* D0 */
+ { EDR_ESCO_MASK | ESCO_HV3, 0xffff, 0xff }, /* D1 */
+ { EDR_ESCO_MASK | ESCO_HV1, 0xffff, 0xff }, /* D0 */
};
-static const struct sco_param sco_param_wideband[] = {
- { EDR_ESCO_MASK & ~ESCO_2EV3, 0x000d }, /* T2 */
- { EDR_ESCO_MASK | ESCO_EV3, 0x0008 }, /* T1 */
+static const struct sco_param esco_param_msbc[] = {
+ { EDR_ESCO_MASK & ~ESCO_2EV3, 0x000d, 0x02 }, /* T2 */
+ { EDR_ESCO_MASK | ESCO_EV3, 0x0008, 0x02 }, /* T1 */
};
static void hci_le_create_connection_cancel(struct hci_conn *conn)
@@ -66,8 +72,7 @@ static void hci_acl_create_connection(struct hci_conn *conn)
conn->state = BT_CONNECT;
conn->out = true;
-
- conn->link_mode = HCI_LM_MASTER;
+ conn->role = HCI_ROLE_MASTER;
conn->attempt++;
@@ -117,26 +122,39 @@ static void hci_reject_sco(struct hci_conn *conn)
{
struct hci_cp_reject_sync_conn_req cp;
- cp.reason = HCI_ERROR_REMOTE_USER_TERM;
+ cp.reason = HCI_ERROR_REJ_LIMITED_RESOURCES;
bacpy(&cp.bdaddr, &conn->dst);
hci_send_cmd(conn->hdev, HCI_OP_REJECT_SYNC_CONN_REQ, sizeof(cp), &cp);
}
-void hci_disconnect(struct hci_conn *conn, __u8 reason)
+int hci_disconnect(struct hci_conn *conn, __u8 reason)
{
struct hci_cp_disconnect cp;
BT_DBG("hcon %p", conn);
+ /* When we are master of an established connection and it enters
+ * the disconnect timeout, then go ahead and try to read the
+ * current clock offset. Processing of the result is done
+ * within the event handling and hci_clock_offset_evt function.
+ */
+ if (conn->type == ACL_LINK && conn->role == HCI_ROLE_MASTER) {
+ struct hci_dev *hdev = conn->hdev;
+ struct hci_cp_read_clock_offset cp;
+
+ cp.handle = cpu_to_le16(conn->handle);
+ hci_send_cmd(hdev, HCI_OP_READ_CLOCK_OFFSET, sizeof(cp), &cp);
+ }
+
conn->state = BT_DISCONN;
cp.handle = cpu_to_le16(conn->handle);
cp.reason = reason;
- hci_send_cmd(conn->hdev, HCI_OP_DISCONNECT, sizeof(cp), &cp);
+ return hci_send_cmd(conn->hdev, HCI_OP_DISCONNECT, sizeof(cp), &cp);
}
-static void hci_amp_disconn(struct hci_conn *conn, __u8 reason)
+static void hci_amp_disconn(struct hci_conn *conn)
{
struct hci_cp_disconn_phy_link cp;
@@ -145,7 +163,7 @@ static void hci_amp_disconn(struct hci_conn *conn, __u8 reason)
conn->state = BT_DISCONN;
cp.phy_handle = HCI_PHY_HANDLE(conn->handle);
- cp.reason = reason;
+ cp.reason = hci_proto_disconn_ind(conn);
hci_send_cmd(conn->hdev, HCI_OP_DISCONN_PHY_LINK,
sizeof(cp), &cp);
}
@@ -189,21 +207,26 @@ bool hci_setup_sync(struct hci_conn *conn, __u16 handle)
switch (conn->setting & SCO_AIRMODE_MASK) {
case SCO_AIRMODE_TRANSP:
- if (conn->attempt > ARRAY_SIZE(sco_param_wideband))
+ if (conn->attempt > ARRAY_SIZE(esco_param_msbc))
return false;
- cp.retrans_effort = 0x02;
- param = &sco_param_wideband[conn->attempt - 1];
+ param = &esco_param_msbc[conn->attempt - 1];
break;
case SCO_AIRMODE_CVSD:
- if (conn->attempt > ARRAY_SIZE(sco_param_cvsd))
- return false;
- cp.retrans_effort = 0x01;
- param = &sco_param_cvsd[conn->attempt - 1];
+ if (lmp_esco_capable(conn->link)) {
+ if (conn->attempt > ARRAY_SIZE(esco_param_cvsd))
+ return false;
+ param = &esco_param_cvsd[conn->attempt - 1];
+ } else {
+ if (conn->attempt > ARRAY_SIZE(sco_param_cvsd))
+ return false;
+ param = &sco_param_cvsd[conn->attempt - 1];
+ }
break;
default:
return false;
}
+ cp.retrans_effort = param->retrans_effort;
cp.pkt_type = __cpu_to_le16(param->pkt_type);
cp.max_latency = __cpu_to_le16(param->max_latency);
@@ -213,14 +236,26 @@ bool hci_setup_sync(struct hci_conn *conn, __u16 handle)
return true;
}
-void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max,
- u16 latency, u16 to_multiplier)
+u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency,
+ u16 to_multiplier)
{
- struct hci_cp_le_conn_update cp;
struct hci_dev *hdev = conn->hdev;
+ struct hci_conn_params *params;
+ struct hci_cp_le_conn_update cp;
- memset(&cp, 0, sizeof(cp));
+ hci_dev_lock(hdev);
+
+ params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type);
+ if (params) {
+ params->conn_min_interval = min;
+ params->conn_max_interval = max;
+ params->conn_latency = latency;
+ params->supervision_timeout = to_multiplier;
+ }
+
+ hci_dev_unlock(hdev);
+ memset(&cp, 0, sizeof(cp));
cp.handle = cpu_to_le16(conn->handle);
cp.conn_interval_min = cpu_to_le16(min);
cp.conn_interval_max = cpu_to_le16(max);
@@ -230,6 +265,11 @@ void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max,
cp.max_ce_len = cpu_to_le16(0x0000);
hci_send_cmd(hdev, HCI_OP_LE_CONN_UPDATE, sizeof(cp), &cp);
+
+ if (params)
+ return 0x01;
+
+ return 0x00;
}
void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand,
@@ -271,20 +311,6 @@ void hci_sco_setup(struct hci_conn *conn, __u8 status)
}
}
-static void hci_conn_disconnect(struct hci_conn *conn)
-{
- __u8 reason = hci_proto_disconn_ind(conn);
-
- switch (conn->type) {
- case AMP_LINK:
- hci_amp_disconn(conn, reason);
- break;
- default:
- hci_disconnect(conn, reason);
- break;
- }
-}
-
static void hci_conn_timeout(struct work_struct *work)
{
struct hci_conn *conn = container_of(work, struct hci_conn,
@@ -319,7 +345,12 @@ static void hci_conn_timeout(struct work_struct *work)
break;
case BT_CONFIG:
case BT_CONNECTED:
- hci_conn_disconnect(conn);
+ if (conn->type == AMP_LINK) {
+ hci_amp_disconn(conn);
+ } else {
+ __u8 reason = hci_proto_disconn_ind(conn);
+ hci_disconnect(conn, reason);
+ }
break;
default:
conn->state = BT_CLOSED;
@@ -336,9 +367,6 @@ static void hci_conn_idle(struct work_struct *work)
BT_DBG("hcon %p mode %d", conn, conn->mode);
- if (test_bit(HCI_RAW, &hdev->flags))
- return;
-
if (!lmp_sniff_capable(hdev) || !lmp_sniff_capable(conn))
return;
@@ -398,13 +426,14 @@ static void le_conn_timeout(struct work_struct *work)
hci_le_create_connection_cancel(conn);
}
-struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
+struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
+ u8 role)
{
struct hci_conn *conn;
BT_DBG("%s dst %pMR", hdev->name, dst);
- conn = kzalloc(sizeof(struct hci_conn), GFP_KERNEL);
+ conn = kzalloc(sizeof(*conn), GFP_KERNEL);
if (!conn)
return NULL;
@@ -412,6 +441,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
bacpy(&conn->src, &hdev->bdaddr);
conn->hdev = hdev;
conn->type = type;
+ conn->role = role;
conn->mode = HCI_CM_ACTIVE;
conn->state = BT_OPEN;
conn->auth_type = HCI_AT_GENERAL_BONDING;
@@ -424,6 +454,9 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
set_bit(HCI_CONN_POWER_SAVE, &conn->flags);
conn->disc_timeout = HCI_DISCONN_TIMEOUT;
+ if (conn->role == HCI_ROLE_MASTER)
+ conn->out = true;
+
switch (type) {
case ACL_LINK:
conn->pkt_type = hdev->pkt_type & ACL_PTYPE_MASK;
@@ -529,7 +562,6 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src)
list_for_each_entry(d, &hci_dev_list, list) {
if (!test_bit(HCI_UP, &d->flags) ||
- test_bit(HCI_RAW, &d->flags) ||
test_bit(HCI_USER_CHANNEL, &d->dev_flags) ||
d->dev_type != HCI_BREDR)
continue;
@@ -562,6 +594,15 @@ EXPORT_SYMBOL(hci_get_route);
void hci_le_conn_failed(struct hci_conn *conn, u8 status)
{
struct hci_dev *hdev = conn->hdev;
+ struct hci_conn_params *params;
+
+ params = hci_pend_le_action_lookup(&hdev->pend_le_conns, &conn->dst,
+ conn->dst_type);
+ if (params && params->conn) {
+ hci_conn_drop(params->conn);
+ hci_conn_put(params->conn);
+ params->conn = NULL;
+ }
conn->state = BT_CLOSED;
@@ -627,7 +668,8 @@ static void hci_req_add_le_create_conn(struct hci_request *req,
cp.own_address_type = own_addr_type;
cp.conn_interval_min = cpu_to_le16(conn->le_conn_min_interval);
cp.conn_interval_max = cpu_to_le16(conn->le_conn_max_interval);
- cp.supervision_timeout = cpu_to_le16(0x002a);
+ cp.conn_latency = cpu_to_le16(conn->le_conn_latency);
+ cp.supervision_timeout = cpu_to_le16(conn->le_supv_timeout);
cp.min_ce_len = cpu_to_le16(0x0000);
cp.max_ce_len = cpu_to_le16(0x0000);
@@ -644,15 +686,12 @@ static void hci_req_directed_advertising(struct hci_request *req,
u8 own_addr_type;
u8 enable;
- enable = 0x00;
- hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
-
- /* Clear the HCI_ADVERTISING bit temporarily so that the
+ /* Clear the HCI_LE_ADV bit temporarily so that the
* hci_update_random_address knows that it's safe to go ahead
* and write a new random address. The flag will be set back on
* as soon as the SET_ADV_ENABLE HCI command completes.
*/
- clear_bit(HCI_ADVERTISING, &hdev->dev_flags);
+ clear_bit(HCI_LE_ADV, &hdev->dev_flags);
/* Set require_privacy to false so that the remote device has a
* chance of identifying us.
@@ -676,7 +715,8 @@ static void hci_req_directed_advertising(struct hci_request *req,
}
struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
- u8 dst_type, u8 sec_level, u8 auth_type)
+ u8 dst_type, u8 sec_level, u16 conn_timeout,
+ u8 role)
{
struct hci_conn_params *params;
struct hci_conn *conn;
@@ -696,7 +736,6 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst);
if (conn) {
conn->pending_sec_level = sec_level;
- conn->auth_type = auth_type;
goto done;
}
@@ -726,32 +765,56 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
dst_type = ADDR_LE_DEV_RANDOM;
}
- conn = hci_conn_add(hdev, LE_LINK, dst);
+ conn = hci_conn_add(hdev, LE_LINK, dst, role);
if (!conn)
return ERR_PTR(-ENOMEM);
conn->dst_type = dst_type;
conn->sec_level = BT_SECURITY_LOW;
conn->pending_sec_level = sec_level;
- conn->auth_type = auth_type;
+ conn->conn_timeout = conn_timeout;
hci_req_init(&req, hdev);
- if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) {
+ /* Disable advertising if we're active. For master role
+ * connections most controllers will refuse to connect if
+ * advertising is enabled, and for slave role connections we
+ * anyway have to disable it in order to start directed
+ * advertising.
+ */
+ if (test_bit(HCI_LE_ADV, &hdev->dev_flags)) {
+ u8 enable = 0x00;
+ hci_req_add(&req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable),
+ &enable);
+ }
+
+ /* If requested to connect as slave use directed advertising */
+ if (conn->role == HCI_ROLE_SLAVE) {
+ /* If we're active scanning most controllers are unable
+ * to initiate advertising. Simply reject the attempt.
+ */
+ if (test_bit(HCI_LE_SCAN, &hdev->dev_flags) &&
+ hdev->le_scan_type == LE_SCAN_ACTIVE) {
+ skb_queue_purge(&req.cmd_q);
+ hci_conn_del(conn);
+ return ERR_PTR(-EBUSY);
+ }
+
hci_req_directed_advertising(&req, conn);
goto create_conn;
}
- conn->out = true;
- conn->link_mode |= HCI_LM_MASTER;
-
params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type);
if (params) {
conn->le_conn_min_interval = params->conn_min_interval;
conn->le_conn_max_interval = params->conn_max_interval;
+ conn->le_conn_latency = params->conn_latency;
+ conn->le_supv_timeout = params->supervision_timeout;
} else {
conn->le_conn_min_interval = hdev->le_conn_min_interval;
conn->le_conn_max_interval = hdev->le_conn_max_interval;
+ conn->le_conn_latency = hdev->le_conn_latency;
+ conn->le_supv_timeout = hdev->le_supv_timeout;
}
/* If controller is scanning, we stop it since some controllers are
@@ -785,11 +848,11 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
struct hci_conn *acl;
if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags))
- return ERR_PTR(-ENOTSUPP);
+ return ERR_PTR(-EOPNOTSUPP);
acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst);
if (!acl) {
- acl = hci_conn_add(hdev, ACL_LINK, dst);
+ acl = hci_conn_add(hdev, ACL_LINK, dst, HCI_ROLE_MASTER);
if (!acl)
return ERR_PTR(-ENOMEM);
}
@@ -818,7 +881,7 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
sco = hci_conn_hash_lookup_ba(hdev, type, dst);
if (!sco) {
- sco = hci_conn_add(hdev, type, dst);
+ sco = hci_conn_add(hdev, type, dst, HCI_ROLE_MASTER);
if (!sco) {
hci_conn_drop(acl);
return ERR_PTR(-ENOMEM);
@@ -865,7 +928,8 @@ int hci_conn_check_link_mode(struct hci_conn *conn)
return 0;
}
- if (hci_conn_ssp_enabled(conn) && !(conn->link_mode & HCI_LM_ENCRYPT))
+ if (hci_conn_ssp_enabled(conn) &&
+ !test_bit(HCI_CONN_ENCRYPT, &conn->flags))
return 0;
return 1;
@@ -881,7 +945,7 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
if (sec_level > conn->sec_level)
conn->pending_sec_level = sec_level;
- else if (conn->link_mode & HCI_LM_AUTH)
+ else if (test_bit(HCI_CONN_AUTH, &conn->flags))
return 1;
/* Make sure we preserve an existing MITM requirement*/
@@ -899,7 +963,7 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
/* If we're already encrypted set the REAUTH_PEND flag,
* otherwise set the ENCRYPT_PEND.
*/
- if (conn->link_mode & HCI_LM_ENCRYPT)
+ if (test_bit(HCI_CONN_ENCRYPT, &conn->flags))
set_bit(HCI_CONN_REAUTH_PEND, &conn->flags);
else
set_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
@@ -923,7 +987,8 @@ static void hci_conn_encrypt(struct hci_conn *conn)
}
/* Enable security */
-int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
+int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type,
+ bool initiator)
{
BT_DBG("hcon %p", conn);
@@ -940,7 +1005,7 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
return 1;
/* For other security levels we need the link key. */
- if (!(conn->link_mode & HCI_LM_AUTH))
+ if (!test_bit(HCI_CONN_AUTH, &conn->flags))
goto auth;
/* An authenticated FIPS approved combination key has sufficient
@@ -976,11 +1041,14 @@ auth:
if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags))
return 0;
+ if (initiator)
+ set_bit(HCI_CONN_AUTH_INITIATOR, &conn->flags);
+
if (!hci_conn_auth(conn, sec_level, auth_type))
return 0;
encrypt:
- if (conn->link_mode & HCI_LM_ENCRYPT)
+ if (test_bit(HCI_CONN_ENCRYPT, &conn->flags))
return 1;
hci_conn_encrypt(conn);
@@ -1027,7 +1095,7 @@ int hci_conn_switch_role(struct hci_conn *conn, __u8 role)
{
BT_DBG("hcon %p", conn);
- if (!role && conn->link_mode & HCI_LM_MASTER)
+ if (role == conn->role)
return 1;
if (!test_and_set_bit(HCI_CONN_RSWITCH_PEND, &conn->flags)) {
@@ -1048,9 +1116,6 @@ void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active)
BT_DBG("hcon %p mode %d", conn, conn->mode);
- if (test_bit(HCI_RAW, &hdev->flags))
- return;
-
if (conn->mode != HCI_CM_SNIFF)
goto timer;
@@ -1101,6 +1166,28 @@ void hci_conn_check_pending(struct hci_dev *hdev)
hci_dev_unlock(hdev);
}
+static u32 get_link_mode(struct hci_conn *conn)
+{
+ u32 link_mode = 0;
+
+ if (conn->role == HCI_ROLE_MASTER)
+ link_mode |= HCI_LM_MASTER;
+
+ if (test_bit(HCI_CONN_ENCRYPT, &conn->flags))
+ link_mode |= HCI_LM_ENCRYPT;
+
+ if (test_bit(HCI_CONN_AUTH, &conn->flags))
+ link_mode |= HCI_LM_AUTH;
+
+ if (test_bit(HCI_CONN_SECURE, &conn->flags))
+ link_mode |= HCI_LM_SECURE;
+
+ if (test_bit(HCI_CONN_FIPS, &conn->flags))
+ link_mode |= HCI_LM_FIPS;
+
+ return link_mode;
+}
+
int hci_get_conn_list(void __user *arg)
{
struct hci_conn *c;
@@ -1136,7 +1223,7 @@ int hci_get_conn_list(void __user *arg)
(ci + n)->type = c->type;
(ci + n)->out = c->out;
(ci + n)->state = c->state;
- (ci + n)->link_mode = c->link_mode;
+ (ci + n)->link_mode = get_link_mode(c);
if (++n >= req.conn_num)
break;
}
@@ -1172,7 +1259,7 @@ int hci_get_conn_info(struct hci_dev *hdev, void __user *arg)
ci.type = conn->type;
ci.out = conn->out;
ci.state = conn->state;
- ci.link_mode = conn->link_mode;
+ ci.link_mode = get_link_mode(conn);
}
hci_dev_unlock(hdev);
@@ -1209,11 +1296,16 @@ struct hci_chan *hci_chan_create(struct hci_conn *conn)
BT_DBG("%s hcon %p", hdev->name, conn);
- chan = kzalloc(sizeof(struct hci_chan), GFP_KERNEL);
+ if (test_bit(HCI_CONN_DROP, &conn->flags)) {
+ BT_DBG("Refusing to create new hci_chan");
+ return NULL;
+ }
+
+ chan = kzalloc(sizeof(*chan), GFP_KERNEL);
if (!chan)
return NULL;
- chan->conn = conn;
+ chan->conn = hci_conn_get(conn);
skb_queue_head_init(&chan->data_q);
chan->state = BT_CONNECTED;
@@ -1233,7 +1325,10 @@ void hci_chan_del(struct hci_chan *chan)
synchronize_rcu();
- hci_conn_drop(conn);
+ /* Prevent new hci_chan's to be created for this hci_conn */
+ set_bit(HCI_CONN_DROP, &conn->flags);
+
+ hci_conn_put(conn);
skb_queue_purge(&chan->data_q);
kfree(chan);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 0a43cce9a914..cb05d7f16a34 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -35,6 +35,7 @@
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
#include <net/bluetooth/l2cap.h>
+#include <net/bluetooth/mgmt.h>
#include "smp.h"
@@ -53,6 +54,15 @@ DEFINE_RWLOCK(hci_cb_list_lock);
/* HCI ID Numbering */
static DEFINE_IDA(hci_index_ida);
+/* ----- HCI requests ----- */
+
+#define HCI_REQ_DONE 0
+#define HCI_REQ_PEND 1
+#define HCI_REQ_CANCELED 2
+
+#define hci_req_lock(d) mutex_lock(&d->req_lock)
+#define hci_req_unlock(d) mutex_unlock(&d->req_lock)
+
/* ---- HCI notifications ---- */
static void hci_notify(struct hci_dev *hdev, int event)
@@ -68,7 +78,7 @@ static ssize_t dut_mode_read(struct file *file, char __user *user_buf,
struct hci_dev *hdev = file->private_data;
char buf[3];
- buf[0] = test_bit(HCI_DUT_MODE, &hdev->dev_flags) ? 'Y': 'N';
+ buf[0] = test_bit(HCI_DUT_MODE, &hdev->dbg_flags) ? 'Y': 'N';
buf[1] = '\n';
buf[2] = '\0';
return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
@@ -94,7 +104,7 @@ static ssize_t dut_mode_write(struct file *file, const char __user *user_buf,
if (strtobool(buf, &enable))
return -EINVAL;
- if (enable == test_bit(HCI_DUT_MODE, &hdev->dev_flags))
+ if (enable == test_bit(HCI_DUT_MODE, &hdev->dbg_flags))
return -EALREADY;
hci_req_lock(hdev);
@@ -115,7 +125,7 @@ static ssize_t dut_mode_write(struct file *file, const char __user *user_buf,
if (err < 0)
return err;
- change_bit(HCI_DUT_MODE, &hdev->dev_flags);
+ change_bit(HCI_DUT_MODE, &hdev->dbg_flags);
return count;
}
@@ -190,6 +200,31 @@ static const struct file_operations blacklist_fops = {
.release = single_release,
};
+static int whitelist_show(struct seq_file *f, void *p)
+{
+ struct hci_dev *hdev = f->private;
+ struct bdaddr_list *b;
+
+ hci_dev_lock(hdev);
+ list_for_each_entry(b, &hdev->whitelist, list)
+ seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type);
+ hci_dev_unlock(hdev);
+
+ return 0;
+}
+
+static int whitelist_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, whitelist_show, inode->i_private);
+}
+
+static const struct file_operations whitelist_fops = {
+ .open = whitelist_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
static int uuids_show(struct seq_file *f, void *p)
{
struct hci_dev *hdev = f->private;
@@ -352,62 +387,13 @@ static int auto_accept_delay_get(void *data, u64 *val)
DEFINE_SIMPLE_ATTRIBUTE(auto_accept_delay_fops, auto_accept_delay_get,
auto_accept_delay_set, "%llu\n");
-static int ssp_debug_mode_set(void *data, u64 val)
-{
- struct hci_dev *hdev = data;
- struct sk_buff *skb;
- __u8 mode;
- int err;
-
- if (val != 0 && val != 1)
- return -EINVAL;
-
- if (!test_bit(HCI_UP, &hdev->flags))
- return -ENETDOWN;
-
- hci_req_lock(hdev);
- mode = val;
- skb = __hci_cmd_sync(hdev, HCI_OP_WRITE_SSP_DEBUG_MODE, sizeof(mode),
- &mode, HCI_CMD_TIMEOUT);
- hci_req_unlock(hdev);
-
- if (IS_ERR(skb))
- return PTR_ERR(skb);
-
- err = -bt_to_errno(skb->data[0]);
- kfree_skb(skb);
-
- if (err < 0)
- return err;
-
- hci_dev_lock(hdev);
- hdev->ssp_debug_mode = val;
- hci_dev_unlock(hdev);
-
- return 0;
-}
-
-static int ssp_debug_mode_get(void *data, u64 *val)
-{
- struct hci_dev *hdev = data;
-
- hci_dev_lock(hdev);
- *val = hdev->ssp_debug_mode;
- hci_dev_unlock(hdev);
-
- return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(ssp_debug_mode_fops, ssp_debug_mode_get,
- ssp_debug_mode_set, "%llu\n");
-
static ssize_t force_sc_support_read(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
{
struct hci_dev *hdev = file->private_data;
char buf[3];
- buf[0] = test_bit(HCI_FORCE_SC, &hdev->dev_flags) ? 'Y': 'N';
+ buf[0] = test_bit(HCI_FORCE_SC, &hdev->dbg_flags) ? 'Y': 'N';
buf[1] = '\n';
buf[2] = '\0';
return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
@@ -432,10 +418,10 @@ static ssize_t force_sc_support_write(struct file *file,
if (strtobool(buf, &enable))
return -EINVAL;
- if (enable == test_bit(HCI_FORCE_SC, &hdev->dev_flags))
+ if (enable == test_bit(HCI_FORCE_SC, &hdev->dbg_flags))
return -EALREADY;
- change_bit(HCI_FORCE_SC, &hdev->dev_flags);
+ change_bit(HCI_FORCE_SC, &hdev->dbg_flags);
return count;
}
@@ -719,7 +705,7 @@ static ssize_t force_static_address_read(struct file *file,
struct hci_dev *hdev = file->private_data;
char buf[3];
- buf[0] = test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dev_flags) ? 'Y': 'N';
+ buf[0] = test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ? 'Y': 'N';
buf[1] = '\n';
buf[2] = '\0';
return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
@@ -744,10 +730,10 @@ static ssize_t force_static_address_write(struct file *file,
if (strtobool(buf, &enable))
return -EINVAL;
- if (enable == test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dev_flags))
+ if (enable == test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags))
return -EALREADY;
- change_bit(HCI_FORCE_STATIC_ADDR, &hdev->dev_flags);
+ change_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags);
return count;
}
@@ -900,177 +886,169 @@ static int conn_max_interval_get(void *data, u64 *val)
DEFINE_SIMPLE_ATTRIBUTE(conn_max_interval_fops, conn_max_interval_get,
conn_max_interval_set, "%llu\n");
-static int adv_channel_map_set(void *data, u64 val)
+static int conn_latency_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val < 0x01 || val > 0x07)
+ if (val > 0x01f3)
return -EINVAL;
hci_dev_lock(hdev);
- hdev->le_adv_channel_map = val;
+ hdev->le_conn_latency = val;
hci_dev_unlock(hdev);
return 0;
}
-static int adv_channel_map_get(void *data, u64 *val)
+static int conn_latency_get(void *data, u64 *val)
{
struct hci_dev *hdev = data;
hci_dev_lock(hdev);
- *val = hdev->le_adv_channel_map;
+ *val = hdev->le_conn_latency;
hci_dev_unlock(hdev);
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(adv_channel_map_fops, adv_channel_map_get,
- adv_channel_map_set, "%llu\n");
+DEFINE_SIMPLE_ATTRIBUTE(conn_latency_fops, conn_latency_get,
+ conn_latency_set, "%llu\n");
-static ssize_t lowpan_read(struct file *file, char __user *user_buf,
- size_t count, loff_t *ppos)
+static int supervision_timeout_set(void *data, u64 val)
{
- struct hci_dev *hdev = file->private_data;
- char buf[3];
+ struct hci_dev *hdev = data;
- buf[0] = test_bit(HCI_6LOWPAN_ENABLED, &hdev->dev_flags) ? 'Y' : 'N';
- buf[1] = '\n';
- buf[2] = '\0';
- return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+ if (val < 0x000a || val > 0x0c80)
+ return -EINVAL;
+
+ hci_dev_lock(hdev);
+ hdev->le_supv_timeout = val;
+ hci_dev_unlock(hdev);
+
+ return 0;
}
-static ssize_t lowpan_write(struct file *fp, const char __user *user_buffer,
- size_t count, loff_t *position)
+static int supervision_timeout_get(void *data, u64 *val)
{
- struct hci_dev *hdev = fp->private_data;
- bool enable;
- char buf[32];
- size_t buf_size = min(count, (sizeof(buf)-1));
+ struct hci_dev *hdev = data;
- if (copy_from_user(buf, user_buffer, buf_size))
- return -EFAULT;
+ hci_dev_lock(hdev);
+ *val = hdev->le_supv_timeout;
+ hci_dev_unlock(hdev);
- buf[buf_size] = '\0';
+ return 0;
+}
- if (strtobool(buf, &enable) < 0)
- return -EINVAL;
+DEFINE_SIMPLE_ATTRIBUTE(supervision_timeout_fops, supervision_timeout_get,
+ supervision_timeout_set, "%llu\n");
- if (enable == test_bit(HCI_6LOWPAN_ENABLED, &hdev->dev_flags))
- return -EALREADY;
+static int adv_channel_map_set(void *data, u64 val)
+{
+ struct hci_dev *hdev = data;
- change_bit(HCI_6LOWPAN_ENABLED, &hdev->dev_flags);
+ if (val < 0x01 || val > 0x07)
+ return -EINVAL;
- return count;
-}
+ hci_dev_lock(hdev);
+ hdev->le_adv_channel_map = val;
+ hci_dev_unlock(hdev);
-static const struct file_operations lowpan_debugfs_fops = {
- .open = simple_open,
- .read = lowpan_read,
- .write = lowpan_write,
- .llseek = default_llseek,
-};
+ return 0;
+}
-static int le_auto_conn_show(struct seq_file *sf, void *ptr)
+static int adv_channel_map_get(void *data, u64 *val)
{
- struct hci_dev *hdev = sf->private;
- struct hci_conn_params *p;
+ struct hci_dev *hdev = data;
hci_dev_lock(hdev);
+ *val = hdev->le_adv_channel_map;
+ hci_dev_unlock(hdev);
- list_for_each_entry(p, &hdev->le_conn_params, list) {
- seq_printf(sf, "%pMR %u %u\n", &p->addr, p->addr_type,
- p->auto_connect);
- }
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(adv_channel_map_fops, adv_channel_map_get,
+ adv_channel_map_set, "%llu\n");
+
+static int adv_min_interval_set(void *data, u64 val)
+{
+ struct hci_dev *hdev = data;
+ if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval)
+ return -EINVAL;
+
+ hci_dev_lock(hdev);
+ hdev->le_adv_min_interval = val;
hci_dev_unlock(hdev);
return 0;
}
-static int le_auto_conn_open(struct inode *inode, struct file *file)
+static int adv_min_interval_get(void *data, u64 *val)
{
- return single_open(file, le_auto_conn_show, inode->i_private);
+ struct hci_dev *hdev = data;
+
+ hci_dev_lock(hdev);
+ *val = hdev->le_adv_min_interval;
+ hci_dev_unlock(hdev);
+
+ return 0;
}
-static ssize_t le_auto_conn_write(struct file *file, const char __user *data,
- size_t count, loff_t *offset)
+DEFINE_SIMPLE_ATTRIBUTE(adv_min_interval_fops, adv_min_interval_get,
+ adv_min_interval_set, "%llu\n");
+
+static int adv_max_interval_set(void *data, u64 val)
{
- struct seq_file *sf = file->private_data;
- struct hci_dev *hdev = sf->private;
- u8 auto_connect = 0;
- bdaddr_t addr;
- u8 addr_type;
- char *buf;
- int err = 0;
- int n;
+ struct hci_dev *hdev = data;
- /* Don't allow partial write */
- if (*offset != 0)
+ if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval)
return -EINVAL;
- if (count < 3)
- return -EINVAL;
+ hci_dev_lock(hdev);
+ hdev->le_adv_max_interval = val;
+ hci_dev_unlock(hdev);
- buf = memdup_user(data, count);
- if (IS_ERR(buf))
- return PTR_ERR(buf);
+ return 0;
+}
- if (memcmp(buf, "add", 3) == 0) {
- n = sscanf(&buf[4], "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx %hhu %hhu",
- &addr.b[5], &addr.b[4], &addr.b[3], &addr.b[2],
- &addr.b[1], &addr.b[0], &addr_type,
- &auto_connect);
+static int adv_max_interval_get(void *data, u64 *val)
+{
+ struct hci_dev *hdev = data;
- if (n < 7) {
- err = -EINVAL;
- goto done;
- }
+ hci_dev_lock(hdev);
+ *val = hdev->le_adv_max_interval;
+ hci_dev_unlock(hdev);
- hci_dev_lock(hdev);
- err = hci_conn_params_add(hdev, &addr, addr_type, auto_connect,
- hdev->le_conn_min_interval,
- hdev->le_conn_max_interval);
- hci_dev_unlock(hdev);
+ return 0;
+}
- if (err)
- goto done;
- } else if (memcmp(buf, "del", 3) == 0) {
- n = sscanf(&buf[4], "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx %hhu",
- &addr.b[5], &addr.b[4], &addr.b[3], &addr.b[2],
- &addr.b[1], &addr.b[0], &addr_type);
+DEFINE_SIMPLE_ATTRIBUTE(adv_max_interval_fops, adv_max_interval_get,
+ adv_max_interval_set, "%llu\n");
- if (n < 7) {
- err = -EINVAL;
- goto done;
- }
+static int device_list_show(struct seq_file *f, void *ptr)
+{
+ struct hci_dev *hdev = f->private;
+ struct hci_conn_params *p;
- hci_dev_lock(hdev);
- hci_conn_params_del(hdev, &addr, addr_type);
- hci_dev_unlock(hdev);
- } else if (memcmp(buf, "clr", 3) == 0) {
- hci_dev_lock(hdev);
- hci_conn_params_clear(hdev);
- hci_pend_le_conns_clear(hdev);
- hci_update_background_scan(hdev);
- hci_dev_unlock(hdev);
- } else {
- err = -EINVAL;
+ hci_dev_lock(hdev);
+ list_for_each_entry(p, &hdev->le_conn_params, list) {
+ seq_printf(f, "%pMR %u %u\n", &p->addr, p->addr_type,
+ p->auto_connect);
}
+ hci_dev_unlock(hdev);
-done:
- kfree(buf);
+ return 0;
+}
- if (err)
- return err;
- else
- return count;
+static int device_list_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, device_list_show, inode->i_private);
}
-static const struct file_operations le_auto_conn_fops = {
- .open = le_auto_conn_open,
+static const struct file_operations device_list_fops = {
+ .open = device_list_open,
.read = seq_read,
- .write = le_auto_conn_write,
.llseek = seq_lseek,
.release = single_release,
};
@@ -1426,9 +1404,6 @@ static void le_setup(struct hci_request *req)
/* Read LE Supported States */
hci_req_add(req, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL);
- /* Read LE Advertising Channel TX Power */
- hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL);
-
/* Read LE White List Size */
hci_req_add(req, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL);
@@ -1503,14 +1478,17 @@ static void hci_setup_event_mask(struct hci_request *req)
/* Use a different default for LE-only devices */
memset(events, 0, sizeof(events));
events[0] |= 0x10; /* Disconnection Complete */
- events[0] |= 0x80; /* Encryption Change */
events[1] |= 0x08; /* Read Remote Version Information Complete */
events[1] |= 0x20; /* Command Complete */
events[1] |= 0x40; /* Command Status */
events[1] |= 0x80; /* Hardware Error */
events[2] |= 0x04; /* Number of Completed Packets */
events[3] |= 0x02; /* Data Buffer Overflow */
- events[5] |= 0x80; /* Encryption Key Refresh Complete */
+
+ if (hdev->le_features[0] & HCI_LE_ENCRYPTION) {
+ events[0] |= 0x80; /* Encryption Change */
+ events[5] |= 0x80; /* Encryption Key Refresh Complete */
+ }
}
if (lmp_inq_rssi_capable(hdev))
@@ -1549,13 +1527,6 @@ static void hci_setup_event_mask(struct hci_request *req)
events[7] |= 0x20; /* LE Meta-Event */
hci_req_add(req, HCI_OP_SET_EVENT_MASK, sizeof(events), events);
-
- if (lmp_le_capable(hdev)) {
- memset(events, 0, sizeof(events));
- events[0] = 0x1f;
- hci_req_add(req, HCI_OP_LE_SET_EVENT_MASK,
- sizeof(events), events);
- }
}
static void hci_init2_req(struct hci_request *req, unsigned long opt)
@@ -1570,8 +1541,6 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt)
if (lmp_le_capable(hdev))
le_setup(req);
- hci_setup_event_mask(req);
-
/* AVM Berlin (31), aka "BlueFRITZ!", doesn't support the read
* local supported commands HCI command.
*/
@@ -1654,7 +1623,7 @@ static void hci_set_le_support(struct hci_request *req)
if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
cp.le = 0x01;
- cp.simul = lmp_le_br_capable(hdev);
+ cp.simul = 0x00;
}
if (cp.le != lmp_host_le_capable(hdev))
@@ -1688,7 +1657,7 @@ static void hci_set_event_mask_page_2(struct hci_request *req)
}
/* Enable Authenticated Payload Timeout Expired event if supported */
- if (lmp_ping_capable(hdev))
+ if (lmp_ping_capable(hdev) || hdev->le_features[0] & HCI_LE_PING)
events[2] |= 0x80;
hci_req_add(req, HCI_OP_SET_EVENT_MASK_PAGE_2, sizeof(events), events);
@@ -1699,6 +1668,8 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt)
struct hci_dev *hdev = req->hdev;
u8 p;
+ hci_setup_event_mask(req);
+
/* Some Broadcom based Bluetooth controllers do not support the
* Delete Stored Link Key command. They are clearly indicating its
* absence in the bit mask of supported commands.
@@ -1725,8 +1696,33 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt)
if (hdev->commands[5] & 0x10)
hci_setup_link_policy(req);
- if (lmp_le_capable(hdev))
+ if (lmp_le_capable(hdev)) {
+ u8 events[8];
+
+ memset(events, 0, sizeof(events));
+ events[0] = 0x0f;
+
+ if (hdev->le_features[0] & HCI_LE_ENCRYPTION)
+ events[0] |= 0x10; /* LE Long Term Key Request */
+
+ /* If controller supports the Connection Parameters Request
+ * Link Layer Procedure, enable the corresponding event.
+ */
+ if (hdev->le_features[0] & HCI_LE_CONN_PARAM_REQ_PROC)
+ events[0] |= 0x20; /* LE Remote Connection
+ * Parameter Request
+ */
+
+ hci_req_add(req, HCI_OP_LE_SET_EVENT_MASK, sizeof(events),
+ events);
+
+ if (hdev->commands[25] & 0x40) {
+ /* Read LE Advertising Channel TX Power */
+ hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL);
+ }
+
hci_set_le_support(req);
+ }
/* Read features beyond page 1 if available */
for (p = 2; p < HCI_MAX_PAGES && p <= hdev->max_page; p++) {
@@ -1746,13 +1742,21 @@ static void hci_init4_req(struct hci_request *req, unsigned long opt)
if (hdev->commands[22] & 0x04)
hci_set_event_mask_page_2(req);
+ /* Read local codec list if the HCI command is supported */
+ if (hdev->commands[29] & 0x20)
+ hci_req_add(req, HCI_OP_READ_LOCAL_CODECS, 0, NULL);
+
+ /* Get MWS transport configuration if the HCI command is supported */
+ if (hdev->commands[30] & 0x08)
+ hci_req_add(req, HCI_OP_GET_MWS_TRANSPORT_CONFIG, 0, NULL);
+
/* Check for Synchronization Train support */
if (lmp_sync_train_capable(hdev))
hci_req_add(req, HCI_OP_READ_SYNC_TRAIN_PARAMS, 0, NULL);
/* Enable Secure Connections if supported and configured */
if ((lmp_sc_capable(hdev) ||
- test_bit(HCI_FORCE_SC, &hdev->dev_flags)) &&
+ test_bit(HCI_FORCE_SC, &hdev->dbg_flags)) &&
test_bit(HCI_SC_ENABLED, &hdev->dev_flags)) {
u8 support = 0x01;
hci_req_add(req, HCI_OP_WRITE_SC_SUPPORT,
@@ -1809,6 +1813,8 @@ static int __hci_init(struct hci_dev *hdev)
debugfs_create_u16("hci_revision", 0444, hdev->debugfs, &hdev->hci_rev);
debugfs_create_file("blacklist", 0444, hdev->debugfs, hdev,
&blacklist_fops);
+ debugfs_create_file("whitelist", 0444, hdev->debugfs, hdev,
+ &whitelist_fops);
debugfs_create_file("uuids", 0444, hdev->debugfs, hdev, &uuids_fops);
debugfs_create_file("conn_info_min_age", 0644, hdev->debugfs, hdev,
@@ -1830,8 +1836,6 @@ static int __hci_init(struct hci_dev *hdev)
if (lmp_ssp_capable(hdev)) {
debugfs_create_file("auto_accept_delay", 0644, hdev->debugfs,
hdev, &auto_accept_delay_fops);
- debugfs_create_file("ssp_debug_mode", 0644, hdev->debugfs,
- hdev, &ssp_debug_mode_fops);
debugfs_create_file("force_sc_support", 0644, hdev->debugfs,
hdev, &force_sc_support_fops);
debugfs_create_file("sc_only_mode", 0444, hdev->debugfs,
@@ -1879,20 +1883,60 @@ static int __hci_init(struct hci_dev *hdev)
hdev, &conn_min_interval_fops);
debugfs_create_file("conn_max_interval", 0644, hdev->debugfs,
hdev, &conn_max_interval_fops);
+ debugfs_create_file("conn_latency", 0644, hdev->debugfs,
+ hdev, &conn_latency_fops);
+ debugfs_create_file("supervision_timeout", 0644, hdev->debugfs,
+ hdev, &supervision_timeout_fops);
debugfs_create_file("adv_channel_map", 0644, hdev->debugfs,
hdev, &adv_channel_map_fops);
- debugfs_create_file("6lowpan", 0644, hdev->debugfs, hdev,
- &lowpan_debugfs_fops);
- debugfs_create_file("le_auto_conn", 0644, hdev->debugfs, hdev,
- &le_auto_conn_fops);
+ debugfs_create_file("adv_min_interval", 0644, hdev->debugfs,
+ hdev, &adv_min_interval_fops);
+ debugfs_create_file("adv_max_interval", 0644, hdev->debugfs,
+ hdev, &adv_max_interval_fops);
+ debugfs_create_file("device_list", 0444, hdev->debugfs, hdev,
+ &device_list_fops);
debugfs_create_u16("discov_interleaved_timeout", 0644,
hdev->debugfs,
&hdev->discov_interleaved_timeout);
+
+ smp_register(hdev);
}
return 0;
}
+static void hci_init0_req(struct hci_request *req, unsigned long opt)
+{
+ struct hci_dev *hdev = req->hdev;
+
+ BT_DBG("%s %ld", hdev->name, opt);
+
+ /* Reset */
+ if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks))
+ hci_reset_req(req, 0);
+
+ /* Read Local Version */
+ hci_req_add(req, HCI_OP_READ_LOCAL_VERSION, 0, NULL);
+
+ /* Read BD Address */
+ if (hdev->set_bdaddr)
+ hci_req_add(req, HCI_OP_READ_BD_ADDR, 0, NULL);
+}
+
+static int __hci_unconf_init(struct hci_dev *hdev)
+{
+ int err;
+
+ if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
+ return 0;
+
+ err = __hci_req_sync(hdev, hci_init0_req, 0, HCI_INIT_TIMEOUT);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
static void hci_scan_req(struct hci_request *req, unsigned long opt)
{
__u8 scan = opt;
@@ -1973,16 +2017,20 @@ bool hci_discovery_active(struct hci_dev *hdev)
void hci_discovery_set_state(struct hci_dev *hdev, int state)
{
+ int old_state = hdev->discovery.state;
+
BT_DBG("%s state %u -> %u", hdev->name, hdev->discovery.state, state);
- if (hdev->discovery.state == state)
+ if (old_state == state)
return;
+ hdev->discovery.state = state;
+
switch (state) {
case DISCOVERY_STOPPED:
hci_update_background_scan(hdev);
- if (hdev->discovery.state != DISCOVERY_STARTING)
+ if (old_state != DISCOVERY_STARTING)
mgmt_discovering(hdev, 0);
break;
case DISCOVERY_STARTING:
@@ -1995,8 +2043,6 @@ void hci_discovery_set_state(struct hci_dev *hdev, int state)
case DISCOVERY_STOPPING:
break;
}
-
- hdev->discovery.state = state;
}
void hci_inquiry_cache_flush(struct hci_dev *hdev)
@@ -2083,22 +2129,24 @@ void hci_inquiry_cache_update_resolve(struct hci_dev *hdev,
list_add(&ie->list, pos);
}
-bool hci_inquiry_cache_update(struct hci_dev *hdev, struct inquiry_data *data,
- bool name_known, bool *ssp)
+u32 hci_inquiry_cache_update(struct hci_dev *hdev, struct inquiry_data *data,
+ bool name_known)
{
struct discovery_state *cache = &hdev->discovery;
struct inquiry_entry *ie;
+ u32 flags = 0;
BT_DBG("cache %p, %pMR", cache, &data->bdaddr);
hci_remove_remote_oob_data(hdev, &data->bdaddr);
- *ssp = data->ssp_mode;
+ if (!data->ssp_mode)
+ flags |= MGMT_DEV_FOUND_LEGACY_PAIRING;
ie = hci_inquiry_cache_lookup(hdev, &data->bdaddr);
if (ie) {
- if (ie->data.ssp_mode)
- *ssp = true;
+ if (!ie->data.ssp_mode)
+ flags |= MGMT_DEV_FOUND_LEGACY_PAIRING;
if (ie->name_state == NAME_NEEDED &&
data->rssi != ie->data.rssi) {
@@ -2110,9 +2158,11 @@ bool hci_inquiry_cache_update(struct hci_dev *hdev, struct inquiry_data *data,
}
/* Entry not in the cache. Add new one. */
- ie = kzalloc(sizeof(struct inquiry_entry), GFP_ATOMIC);
- if (!ie)
- return false;
+ ie = kzalloc(sizeof(*ie), GFP_KERNEL);
+ if (!ie) {
+ flags |= MGMT_DEV_FOUND_CONFIRM_NAME;
+ goto done;
+ }
list_add(&ie->all, &cache->all);
@@ -2135,9 +2185,10 @@ update:
cache->timestamp = jiffies;
if (ie->name_state == NAME_NOT_KNOWN)
- return false;
+ flags |= MGMT_DEV_FOUND_CONFIRM_NAME;
- return true;
+done:
+ return flags;
}
static int inquiry_cache_dump(struct hci_dev *hdev, int num, __u8 *buf)
@@ -2186,12 +2237,6 @@ static void hci_inq_req(struct hci_request *req, unsigned long opt)
hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp);
}
-static int wait_inquiry(void *word)
-{
- schedule();
- return signal_pending(current);
-}
-
int hci_inquiry(void __user *arg)
{
__u8 __user *ptr = arg;
@@ -2213,6 +2258,11 @@ int hci_inquiry(void __user *arg)
goto done;
}
+ if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) {
+ err = -EOPNOTSUPP;
+ goto done;
+ }
+
if (hdev->dev_type != HCI_BREDR) {
err = -EOPNOTSUPP;
goto done;
@@ -2242,7 +2292,7 @@ int hci_inquiry(void __user *arg)
/* Wait until Inquiry procedure finishes (HCI_INQUIRY flag is
* cleared). If it is interrupted by a signal, return -EINTR.
*/
- if (wait_on_bit(&hdev->flags, HCI_INQUIRY, wait_inquiry,
+ if (wait_on_bit(&hdev->flags, HCI_INQUIRY,
TASK_INTERRUPTIBLE))
return -EINTR;
}
@@ -2295,7 +2345,8 @@ static int hci_dev_do_open(struct hci_dev *hdev)
goto done;
}
- if (!test_bit(HCI_SETUP, &hdev->dev_flags)) {
+ if (!test_bit(HCI_SETUP, &hdev->dev_flags) &&
+ !test_bit(HCI_CONFIG, &hdev->dev_flags)) {
/* Check for rfkill but allow the HCI setup stage to
* proceed (which in itself doesn't cause any RF activity).
*/
@@ -2338,14 +2389,47 @@ static int hci_dev_do_open(struct hci_dev *hdev)
atomic_set(&hdev->cmd_cnt, 1);
set_bit(HCI_INIT, &hdev->flags);
- if (hdev->setup && test_bit(HCI_SETUP, &hdev->dev_flags))
- ret = hdev->setup(hdev);
+ if (test_bit(HCI_SETUP, &hdev->dev_flags)) {
+ if (hdev->setup)
+ ret = hdev->setup(hdev);
- if (!ret) {
- if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
- set_bit(HCI_RAW, &hdev->flags);
+ /* The transport driver can set these quirks before
+ * creating the HCI device or in its setup callback.
+ *
+ * In case any of them is set, the controller has to
+ * start up as unconfigured.
+ */
+ if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) ||
+ test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks))
+ set_bit(HCI_UNCONFIGURED, &hdev->dev_flags);
- if (!test_bit(HCI_RAW, &hdev->flags) &&
+ /* For an unconfigured controller it is required to
+ * read at least the version information provided by
+ * the Read Local Version Information command.
+ *
+ * If the set_bdaddr driver callback is provided, then
+ * also the original Bluetooth public device address
+ * will be read using the Read BD Address command.
+ */
+ if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags))
+ ret = __hci_unconf_init(hdev);
+ }
+
+ if (test_bit(HCI_CONFIG, &hdev->dev_flags)) {
+ /* If public address change is configured, ensure that
+ * the address gets programmed. If the driver does not
+ * support changing the public address, fail the power
+ * on procedure.
+ */
+ if (bacmp(&hdev->public_addr, BDADDR_ANY) &&
+ hdev->set_bdaddr)
+ ret = hdev->set_bdaddr(hdev, &hdev->public_addr);
+ else
+ ret = -EADDRNOTAVAIL;
+ }
+
+ if (!ret) {
+ if (!test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) &&
!test_bit(HCI_USER_CHANNEL, &hdev->dev_flags))
ret = __hci_init(hdev);
}
@@ -2358,6 +2442,8 @@ static int hci_dev_do_open(struct hci_dev *hdev)
set_bit(HCI_UP, &hdev->flags);
hci_notify(hdev, HCI_DEV_UP);
if (!test_bit(HCI_SETUP, &hdev->dev_flags) &&
+ !test_bit(HCI_CONFIG, &hdev->dev_flags) &&
+ !test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) &&
!test_bit(HCI_USER_CHANNEL, &hdev->dev_flags) &&
hdev->dev_type == HCI_BREDR) {
hci_dev_lock(hdev);
@@ -2382,7 +2468,7 @@ static int hci_dev_do_open(struct hci_dev *hdev)
}
hdev->close(hdev);
- hdev->flags = 0;
+ hdev->flags &= BIT(HCI_RAW);
}
done:
@@ -2401,6 +2487,21 @@ int hci_dev_open(__u16 dev)
if (!hdev)
return -ENODEV;
+ /* Devices that are marked as unconfigured can only be powered
+ * up as user channel. Trying to bring them up as normal devices
+ * will result into a failure. Only user channel operation is
+ * possible.
+ *
+ * When this function is called for a user channel, the flag
+ * HCI_USER_CHANNEL will be set first before attempting to
+ * open the device.
+ */
+ if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) &&
+ !test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) {
+ err = -EOPNOTSUPP;
+ goto done;
+ }
+
/* We need to ensure that no other power on/off work is pending
* before proceeding to call hci_dev_do_open. This is
* particularly important if the setup procedure has not yet
@@ -2415,13 +2516,40 @@ int hci_dev_open(__u16 dev)
*/
flush_workqueue(hdev->req_workqueue);
+ /* For controllers not using the management interface and that
+ * are brought up using legacy ioctl, set the HCI_BONDABLE bit
+ * so that pairing works for them. Once the management interface
+ * is in use this bit will be cleared again and userspace has
+ * to explicitly enable it.
+ */
+ if (!test_bit(HCI_USER_CHANNEL, &hdev->dev_flags) &&
+ !test_bit(HCI_MGMT, &hdev->dev_flags))
+ set_bit(HCI_BONDABLE, &hdev->dev_flags);
+
err = hci_dev_do_open(hdev);
+done:
hci_dev_put(hdev);
-
return err;
}
+/* This function requires the caller holds hdev->lock */
+static void hci_pend_le_actions_clear(struct hci_dev *hdev)
+{
+ struct hci_conn_params *p;
+
+ list_for_each_entry(p, &hdev->le_conn_params, list) {
+ if (p->conn) {
+ hci_conn_drop(p->conn);
+ hci_conn_put(p->conn);
+ p->conn = NULL;
+ }
+ list_del_init(&p->action);
+ }
+
+ BT_DBG("All LE pending actions cleared");
+}
+
static int hci_dev_do_close(struct hci_dev *hdev)
{
BT_DBG("%s %p", hdev->name, hdev);
@@ -2432,7 +2560,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
hci_req_lock(hdev);
if (!test_and_clear_bit(HCI_UP, &hdev->flags)) {
- del_timer_sync(&hdev->cmd_timer);
+ cancel_delayed_work_sync(&hdev->cmd_timer);
hci_req_unlock(hdev);
return 0;
}
@@ -2458,8 +2586,8 @@ static int hci_dev_do_close(struct hci_dev *hdev)
hci_dev_lock(hdev);
hci_inquiry_cache_flush(hdev);
+ hci_pend_le_actions_clear(hdev);
hci_conn_hash_flush(hdev);
- hci_pend_le_conns_clear(hdev);
hci_dev_unlock(hdev);
hci_notify(hdev, HCI_DEV_DOWN);
@@ -2470,8 +2598,8 @@ static int hci_dev_do_close(struct hci_dev *hdev)
/* Reset device */
skb_queue_purge(&hdev->cmd_q);
atomic_set(&hdev->cmd_cnt, 1);
- if (!test_bit(HCI_RAW, &hdev->flags) &&
- !test_bit(HCI_AUTO_OFF, &hdev->dev_flags) &&
+ if (!test_bit(HCI_AUTO_OFF, &hdev->dev_flags) &&
+ !test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) &&
test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) {
set_bit(HCI_INIT, &hdev->flags);
__hci_req_sync(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT);
@@ -2488,7 +2616,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
/* Drop last sent command */
if (hdev->sent_cmd) {
- del_timer_sync(&hdev->cmd_timer);
+ cancel_delayed_work_sync(&hdev->cmd_timer);
kfree_skb(hdev->sent_cmd);
hdev->sent_cmd = NULL;
}
@@ -2501,7 +2629,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
hdev->close(hdev);
/* Clear flags */
- hdev->flags = 0;
+ hdev->flags &= BIT(HCI_RAW);
hdev->dev_flags &= ~HCI_PERSISTENT_MASK;
if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) {
@@ -2570,6 +2698,11 @@ int hci_dev_reset(__u16 dev)
goto done;
}
+ if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) {
+ ret = -EOPNOTSUPP;
+ goto done;
+ }
+
/* Drop queues */
skb_queue_purge(&hdev->rx_q);
skb_queue_purge(&hdev->cmd_q);
@@ -2585,8 +2718,7 @@ int hci_dev_reset(__u16 dev)
atomic_set(&hdev->cmd_cnt, 1);
hdev->acl_cnt = 0; hdev->sco_cnt = 0; hdev->le_cnt = 0;
- if (!test_bit(HCI_RAW, &hdev->flags))
- ret = __hci_req_sync(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT);
+ ret = __hci_req_sync(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT);
done:
hci_req_unlock(hdev);
@@ -2608,6 +2740,11 @@ int hci_dev_reset_stat(__u16 dev)
goto done;
}
+ if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) {
+ ret = -EOPNOTSUPP;
+ goto done;
+ }
+
memset(&hdev->stat, 0, sizeof(struct hci_dev_stats));
done:
@@ -2615,6 +2752,42 @@ done:
return ret;
}
+static void hci_update_scan_state(struct hci_dev *hdev, u8 scan)
+{
+ bool conn_changed, discov_changed;
+
+ BT_DBG("%s scan 0x%02x", hdev->name, scan);
+
+ if ((scan & SCAN_PAGE))
+ conn_changed = !test_and_set_bit(HCI_CONNECTABLE,
+ &hdev->dev_flags);
+ else
+ conn_changed = test_and_clear_bit(HCI_CONNECTABLE,
+ &hdev->dev_flags);
+
+ if ((scan & SCAN_INQUIRY)) {
+ discov_changed = !test_and_set_bit(HCI_DISCOVERABLE,
+ &hdev->dev_flags);
+ } else {
+ clear_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags);
+ discov_changed = test_and_clear_bit(HCI_DISCOVERABLE,
+ &hdev->dev_flags);
+ }
+
+ if (!test_bit(HCI_MGMT, &hdev->dev_flags))
+ return;
+
+ if (conn_changed || discov_changed) {
+ /* In case this was disabled through mgmt */
+ set_bit(HCI_BREDR_ENABLED, &hdev->dev_flags);
+
+ if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags))
+ mgmt_update_adv_data(hdev);
+
+ mgmt_new_settings(hdev);
+ }
+}
+
int hci_dev_cmd(unsigned int cmd, void __user *arg)
{
struct hci_dev *hdev;
@@ -2633,6 +2806,11 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
goto done;
}
+ if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) {
+ err = -EOPNOTSUPP;
+ goto done;
+ }
+
if (hdev->dev_type != HCI_BREDR) {
err = -EOPNOTSUPP;
goto done;
@@ -2670,6 +2848,12 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
case HCISETSCAN:
err = hci_req_sync(hdev, hci_scan_req, dr.dev_opt,
HCI_INIT_TIMEOUT);
+
+ /* Ensure that the connectable and discoverable states
+ * get correctly modified as this was a non-mgmt change.
+ */
+ if (!err)
+ hci_update_scan_state(hdev, dr.dev_opt);
break;
case HCISETLINKPOL:
@@ -2730,14 +2914,17 @@ int hci_get_dev_list(void __user *arg)
read_lock(&hci_dev_list_lock);
list_for_each_entry(hdev, &hci_dev_list, list) {
- if (test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags))
- cancel_delayed_work(&hdev->power_off);
+ unsigned long flags = hdev->flags;
- if (!test_bit(HCI_MGMT, &hdev->dev_flags))
- set_bit(HCI_PAIRABLE, &hdev->dev_flags);
+ /* When the auto-off is configured it means the transport
+ * is running, but in that case still indicate that the
+ * device is actually down.
+ */
+ if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags))
+ flags &= ~BIT(HCI_UP);
(dr + n)->dev_id = hdev->id;
- (dr + n)->dev_opt = hdev->flags;
+ (dr + n)->dev_opt = flags;
if (++n >= dev_num)
break;
@@ -2757,6 +2944,7 @@ int hci_get_dev_info(void __user *arg)
{
struct hci_dev *hdev;
struct hci_dev_info di;
+ unsigned long flags;
int err = 0;
if (copy_from_user(&di, arg, sizeof(di)))
@@ -2766,16 +2954,19 @@ int hci_get_dev_info(void __user *arg)
if (!hdev)
return -ENODEV;
- if (test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags))
- cancel_delayed_work_sync(&hdev->power_off);
-
- if (!test_bit(HCI_MGMT, &hdev->dev_flags))
- set_bit(HCI_PAIRABLE, &hdev->dev_flags);
+ /* When the auto-off is configured it means the transport
+ * is running, but in that case still indicate that the
+ * device is actually down.
+ */
+ if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags))
+ flags = hdev->flags & ~BIT(HCI_UP);
+ else
+ flags = hdev->flags;
strcpy(di.name, hdev->name);
di.bdaddr = hdev->bdaddr;
di.type = (hdev->bus & 0x0f) | ((hdev->dev_type & 0x03) << 4);
- di.flags = hdev->flags;
+ di.flags = flags;
di.pkt_type = hdev->pkt_type;
if (lmp_bredr_capable(hdev)) {
di.acl_mtu = hdev->acl_mtu;
@@ -2815,7 +3006,8 @@ static int hci_rfkill_set_block(void *data, bool blocked)
if (blocked) {
set_bit(HCI_RFKILLED, &hdev->dev_flags);
- if (!test_bit(HCI_SETUP, &hdev->dev_flags))
+ if (!test_bit(HCI_SETUP, &hdev->dev_flags) &&
+ !test_bit(HCI_CONFIG, &hdev->dev_flags))
hci_dev_do_close(hdev);
} else {
clear_bit(HCI_RFKILLED, &hdev->dev_flags);
@@ -2846,6 +3038,7 @@ static void hci_power_on(struct work_struct *work)
* valid, it is important to turn the device back off.
*/
if (test_bit(HCI_RFKILLED, &hdev->dev_flags) ||
+ test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) ||
(hdev->dev_type == HCI_BREDR &&
!bacmp(&hdev->bdaddr, BDADDR_ANY) &&
!bacmp(&hdev->static_addr, BDADDR_ANY))) {
@@ -2856,8 +3049,34 @@ static void hci_power_on(struct work_struct *work)
HCI_AUTO_OFF_TIMEOUT);
}
- if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags))
+ if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags)) {
+ /* For unconfigured devices, set the HCI_RAW flag
+ * so that userspace can easily identify them.
+ */
+ if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags))
+ set_bit(HCI_RAW, &hdev->flags);
+
+ /* For fully configured devices, this will send
+ * the Index Added event. For unconfigured devices,
+ * it will send Unconfigued Index Added event.
+ *
+ * Devices with HCI_QUIRK_RAW_DEVICE are ignored
+ * and no event will be send.
+ */
mgmt_index_added(hdev);
+ } else if (test_and_clear_bit(HCI_CONFIG, &hdev->dev_flags)) {
+ /* When the controller is now configured, then it
+ * is important to clear the HCI_RAW flag.
+ */
+ if (!test_bit(HCI_UNCONFIGURED, &hdev->dev_flags))
+ clear_bit(HCI_RAW, &hdev->flags);
+
+ /* Powering on the controller with HCI_CONFIG set only
+ * happens with the transition from unconfigured to
+ * configured. This will send the Index Added event.
+ */
+ mgmt_index_added(hdev);
+ }
}
static void hci_power_off(struct work_struct *work)
@@ -2972,16 +3191,16 @@ static bool hci_persistent_key(struct hci_dev *hdev, struct hci_conn *conn,
return false;
}
-static bool ltk_type_master(u8 type)
+static u8 ltk_role(u8 type)
{
- if (type == HCI_SMP_STK || type == HCI_SMP_LTK)
- return true;
+ if (type == SMP_LTK)
+ return HCI_ROLE_MASTER;
- return false;
+ return HCI_ROLE_SLAVE;
}
struct smp_ltk *hci_find_ltk(struct hci_dev *hdev, __le16 ediv, __le64 rand,
- bool master)
+ u8 role)
{
struct smp_ltk *k;
@@ -2989,7 +3208,7 @@ struct smp_ltk *hci_find_ltk(struct hci_dev *hdev, __le16 ediv, __le64 rand,
if (k->ediv != ediv || k->rand != rand)
continue;
- if (ltk_type_master(k->type) != master)
+ if (ltk_role(k->type) != role)
continue;
return k;
@@ -2999,14 +3218,14 @@ struct smp_ltk *hci_find_ltk(struct hci_dev *hdev, __le16 ediv, __le64 rand,
}
struct smp_ltk *hci_find_ltk_by_addr(struct hci_dev *hdev, bdaddr_t *bdaddr,
- u8 addr_type, bool master)
+ u8 addr_type, u8 role)
{
struct smp_ltk *k;
list_for_each_entry(k, &hdev->long_term_keys, list)
if (addr_type == k->bdaddr_type &&
bacmp(bdaddr, &k->bdaddr) == 0 &&
- ltk_type_master(k->type) == master)
+ ltk_role(k->type) == role)
return k;
return NULL;
@@ -3022,7 +3241,7 @@ struct smp_irk *hci_find_irk_by_rpa(struct hci_dev *hdev, bdaddr_t *rpa)
}
list_for_each_entry(irk, &hdev->identity_resolving_keys, list) {
- if (smp_irk_matches(hdev->tfm_aes, irk->val, rpa)) {
+ if (smp_irk_matches(hdev, irk->val, rpa)) {
bacpy(&irk->rpa, rpa);
return irk;
}
@@ -3049,12 +3268,12 @@ struct smp_irk *hci_find_irk_by_addr(struct hci_dev *hdev, bdaddr_t *bdaddr,
return NULL;
}
-int hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, int new_key,
- bdaddr_t *bdaddr, u8 *val, u8 type, u8 pin_len)
+struct link_key *hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn,
+ bdaddr_t *bdaddr, u8 *val, u8 type,
+ u8 pin_len, bool *persistent)
{
struct link_key *key, *old_key;
u8 old_key_type;
- bool persistent;
old_key = hci_find_link_key(hdev, bdaddr);
if (old_key) {
@@ -3064,7 +3283,7 @@ int hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, int new_key,
old_key_type = conn ? conn->key_type : 0xff;
key = kzalloc(sizeof(*key), GFP_KERNEL);
if (!key)
- return -ENOMEM;
+ return NULL;
list_add(&key->list, &hdev->link_keys);
}
@@ -3089,17 +3308,11 @@ int hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, int new_key,
else
key->type = type;
- if (!new_key)
- return 0;
-
- persistent = hci_persistent_key(hdev, conn, type, old_key_type);
-
- mgmt_new_link_key(hdev, key, persistent);
+ if (persistent)
+ *persistent = hci_persistent_key(hdev, conn, type,
+ old_key_type);
- if (conn)
- conn->flush_key = !persistent;
-
- return 0;
+ return key;
}
struct smp_ltk *hci_add_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr,
@@ -3107,9 +3320,9 @@ struct smp_ltk *hci_add_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr,
u8 tk[16], u8 enc_size, __le16 ediv, __le64 rand)
{
struct smp_ltk *key, *old_key;
- bool master = ltk_type_master(type);
+ u8 role = ltk_role(type);
- old_key = hci_find_ltk_by_addr(hdev, bdaddr, addr_type, master);
+ old_key = hci_find_ltk_by_addr(hdev, bdaddr, addr_type, role);
if (old_key)
key = old_key;
else {
@@ -3205,9 +3418,10 @@ void hci_remove_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type)
}
/* HCI command timer function */
-static void hci_cmd_timeout(unsigned long arg)
+static void hci_cmd_timeout(struct work_struct *work)
{
- struct hci_dev *hdev = (void *) arg;
+ struct hci_dev *hdev = container_of(work, struct hci_dev,
+ cmd_timer.work);
if (hdev->sent_cmd) {
struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data;
@@ -3313,12 +3527,12 @@ int hci_add_remote_oob_ext_data(struct hci_dev *hdev, bdaddr_t *bdaddr,
return 0;
}
-struct bdaddr_list *hci_blacklist_lookup(struct hci_dev *hdev,
+struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *bdaddr_list,
bdaddr_t *bdaddr, u8 type)
{
struct bdaddr_list *b;
- list_for_each_entry(b, &hdev->blacklist, list) {
+ list_for_each_entry(b, bdaddr_list, list) {
if (!bacmp(&b->bdaddr, bdaddr) && b->bdaddr_type == type)
return b;
}
@@ -3326,11 +3540,11 @@ struct bdaddr_list *hci_blacklist_lookup(struct hci_dev *hdev,
return NULL;
}
-static void hci_blacklist_clear(struct hci_dev *hdev)
+void hci_bdaddr_list_clear(struct list_head *bdaddr_list)
{
struct list_head *p, *n;
- list_for_each_safe(p, n, &hdev->blacklist) {
+ list_for_each_safe(p, n, bdaddr_list) {
struct bdaddr_list *b = list_entry(p, struct bdaddr_list, list);
list_del(p);
@@ -3338,99 +3552,38 @@ static void hci_blacklist_clear(struct hci_dev *hdev)
}
}
-int hci_blacklist_add(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
+int hci_bdaddr_list_add(struct list_head *list, bdaddr_t *bdaddr, u8 type)
{
struct bdaddr_list *entry;
if (!bacmp(bdaddr, BDADDR_ANY))
return -EBADF;
- if (hci_blacklist_lookup(hdev, bdaddr, type))
+ if (hci_bdaddr_list_lookup(list, bdaddr, type))
return -EEXIST;
- entry = kzalloc(sizeof(struct bdaddr_list), GFP_KERNEL);
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return -ENOMEM;
bacpy(&entry->bdaddr, bdaddr);
entry->bdaddr_type = type;
- list_add(&entry->list, &hdev->blacklist);
+ list_add(&entry->list, list);
- return mgmt_device_blocked(hdev, bdaddr, type);
+ return 0;
}
-int hci_blacklist_del(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
+int hci_bdaddr_list_del(struct list_head *list, bdaddr_t *bdaddr, u8 type)
{
struct bdaddr_list *entry;
if (!bacmp(bdaddr, BDADDR_ANY)) {
- hci_blacklist_clear(hdev);
+ hci_bdaddr_list_clear(list);
return 0;
}
- entry = hci_blacklist_lookup(hdev, bdaddr, type);
- if (!entry)
- return -ENOENT;
-
- list_del(&entry->list);
- kfree(entry);
-
- return mgmt_device_unblocked(hdev, bdaddr, type);
-}
-
-struct bdaddr_list *hci_white_list_lookup(struct hci_dev *hdev,
- bdaddr_t *bdaddr, u8 type)
-{
- struct bdaddr_list *b;
-
- list_for_each_entry(b, &hdev->le_white_list, list) {
- if (!bacmp(&b->bdaddr, bdaddr) && b->bdaddr_type == type)
- return b;
- }
-
- return NULL;
-}
-
-void hci_white_list_clear(struct hci_dev *hdev)
-{
- struct list_head *p, *n;
-
- list_for_each_safe(p, n, &hdev->le_white_list) {
- struct bdaddr_list *b = list_entry(p, struct bdaddr_list, list);
-
- list_del(p);
- kfree(b);
- }
-}
-
-int hci_white_list_add(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
-{
- struct bdaddr_list *entry;
-
- if (!bacmp(bdaddr, BDADDR_ANY))
- return -EBADF;
-
- entry = kzalloc(sizeof(struct bdaddr_list), GFP_KERNEL);
- if (!entry)
- return -ENOMEM;
-
- bacpy(&entry->bdaddr, bdaddr);
- entry->bdaddr_type = type;
-
- list_add(&entry->list, &hdev->le_white_list);
-
- return 0;
-}
-
-int hci_white_list_del(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
-{
- struct bdaddr_list *entry;
-
- if (!bacmp(bdaddr, BDADDR_ANY))
- return -EBADF;
-
- entry = hci_white_list_lookup(hdev, bdaddr, type);
+ entry = hci_bdaddr_list_lookup(list, bdaddr, type);
if (!entry)
return -ENOENT;
@@ -3446,6 +3599,10 @@ struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev,
{
struct hci_conn_params *params;
+ /* The conn params list only contains identity addresses */
+ if (!hci_is_identity_address(addr, addr_type))
+ return NULL;
+
list_for_each_entry(params, &hdev->le_conn_params, list) {
if (bacmp(&params->addr, addr) == 0 &&
params->addr_type == addr_type) {
@@ -3473,66 +3630,114 @@ static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type)
return true;
}
-static bool is_identity_address(bdaddr_t *addr, u8 addr_type)
+/* This function requires the caller holds hdev->lock */
+struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
+ bdaddr_t *addr, u8 addr_type)
{
- if (addr_type == ADDR_LE_DEV_PUBLIC)
- return true;
+ struct hci_conn_params *param;
- /* Check for Random Static address type */
- if ((addr->b[5] & 0xc0) == 0xc0)
- return true;
+ /* The list only contains identity addresses */
+ if (!hci_is_identity_address(addr, addr_type))
+ return NULL;
- return false;
+ list_for_each_entry(param, list, action) {
+ if (bacmp(&param->addr, addr) == 0 &&
+ param->addr_type == addr_type)
+ return param;
+ }
+
+ return NULL;
}
/* This function requires the caller holds hdev->lock */
-int hci_conn_params_add(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type,
- u8 auto_connect, u16 conn_min_interval,
- u16 conn_max_interval)
+struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
+ bdaddr_t *addr, u8 addr_type)
{
struct hci_conn_params *params;
- if (!is_identity_address(addr, addr_type))
- return -EINVAL;
+ if (!hci_is_identity_address(addr, addr_type))
+ return NULL;
params = hci_conn_params_lookup(hdev, addr, addr_type);
if (params)
- goto update;
+ return params;
params = kzalloc(sizeof(*params), GFP_KERNEL);
if (!params) {
BT_ERR("Out of memory");
- return -ENOMEM;
+ return NULL;
}
bacpy(&params->addr, addr);
params->addr_type = addr_type;
list_add(&params->list, &hdev->le_conn_params);
+ INIT_LIST_HEAD(&params->action);
-update:
- params->conn_min_interval = conn_min_interval;
- params->conn_max_interval = conn_max_interval;
- params->auto_connect = auto_connect;
+ params->conn_min_interval = hdev->le_conn_min_interval;
+ params->conn_max_interval = hdev->le_conn_max_interval;
+ params->conn_latency = hdev->le_conn_latency;
+ params->supervision_timeout = hdev->le_supv_timeout;
+ params->auto_connect = HCI_AUTO_CONN_DISABLED;
+
+ BT_DBG("addr %pMR (type %u)", addr, addr_type);
+
+ return params;
+}
+
+/* This function requires the caller holds hdev->lock */
+int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type,
+ u8 auto_connect)
+{
+ struct hci_conn_params *params;
+
+ params = hci_conn_params_add(hdev, addr, addr_type);
+ if (!params)
+ return -EIO;
+
+ if (params->auto_connect == auto_connect)
+ return 0;
+
+ list_del_init(&params->action);
switch (auto_connect) {
case HCI_AUTO_CONN_DISABLED:
case HCI_AUTO_CONN_LINK_LOSS:
- hci_pend_le_conn_del(hdev, addr, addr_type);
+ hci_update_background_scan(hdev);
break;
+ case HCI_AUTO_CONN_REPORT:
+ list_add(&params->action, &hdev->pend_le_reports);
+ hci_update_background_scan(hdev);
+ break;
+ case HCI_AUTO_CONN_DIRECT:
case HCI_AUTO_CONN_ALWAYS:
- if (!is_connected(hdev, addr, addr_type))
- hci_pend_le_conn_add(hdev, addr, addr_type);
+ if (!is_connected(hdev, addr, addr_type)) {
+ list_add(&params->action, &hdev->pend_le_conns);
+ hci_update_background_scan(hdev);
+ }
break;
}
- BT_DBG("addr %pMR (type %u) auto_connect %u conn_min_interval 0x%.4x "
- "conn_max_interval 0x%.4x", addr, addr_type, auto_connect,
- conn_min_interval, conn_max_interval);
+ params->auto_connect = auto_connect;
+
+ BT_DBG("addr %pMR (type %u) auto_connect %u", addr, addr_type,
+ auto_connect);
return 0;
}
+static void hci_conn_params_free(struct hci_conn_params *params)
+{
+ if (params->conn) {
+ hci_conn_drop(params->conn);
+ hci_conn_put(params->conn);
+ }
+
+ list_del(&params->action);
+ list_del(&params->list);
+ kfree(params);
+}
+
/* This function requires the caller holds hdev->lock */
void hci_conn_params_del(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type)
{
@@ -3542,97 +3747,39 @@ void hci_conn_params_del(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type)
if (!params)
return;
- hci_pend_le_conn_del(hdev, addr, addr_type);
+ hci_conn_params_free(params);
- list_del(&params->list);
- kfree(params);
+ hci_update_background_scan(hdev);
BT_DBG("addr %pMR (type %u)", addr, addr_type);
}
/* This function requires the caller holds hdev->lock */
-void hci_conn_params_clear(struct hci_dev *hdev)
+void hci_conn_params_clear_disabled(struct hci_dev *hdev)
{
struct hci_conn_params *params, *tmp;
list_for_each_entry_safe(params, tmp, &hdev->le_conn_params, list) {
+ if (params->auto_connect != HCI_AUTO_CONN_DISABLED)
+ continue;
list_del(&params->list);
kfree(params);
}
- BT_DBG("All LE connection parameters were removed");
+ BT_DBG("All LE disabled connection parameters were removed");
}
/* This function requires the caller holds hdev->lock */
-struct bdaddr_list *hci_pend_le_conn_lookup(struct hci_dev *hdev,
- bdaddr_t *addr, u8 addr_type)
+void hci_conn_params_clear_all(struct hci_dev *hdev)
{
- struct bdaddr_list *entry;
-
- list_for_each_entry(entry, &hdev->pend_le_conns, list) {
- if (bacmp(&entry->bdaddr, addr) == 0 &&
- entry->bdaddr_type == addr_type)
- return entry;
- }
-
- return NULL;
-}
-
-/* This function requires the caller holds hdev->lock */
-void hci_pend_le_conn_add(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type)
-{
- struct bdaddr_list *entry;
-
- entry = hci_pend_le_conn_lookup(hdev, addr, addr_type);
- if (entry)
- goto done;
-
- entry = kzalloc(sizeof(*entry), GFP_KERNEL);
- if (!entry) {
- BT_ERR("Out of memory");
- return;
- }
-
- bacpy(&entry->bdaddr, addr);
- entry->bdaddr_type = addr_type;
-
- list_add(&entry->list, &hdev->pend_le_conns);
-
- BT_DBG("addr %pMR (type %u)", addr, addr_type);
-
-done:
- hci_update_background_scan(hdev);
-}
-
-/* This function requires the caller holds hdev->lock */
-void hci_pend_le_conn_del(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type)
-{
- struct bdaddr_list *entry;
-
- entry = hci_pend_le_conn_lookup(hdev, addr, addr_type);
- if (!entry)
- goto done;
-
- list_del(&entry->list);
- kfree(entry);
+ struct hci_conn_params *params, *tmp;
- BT_DBG("addr %pMR (type %u)", addr, addr_type);
+ list_for_each_entry_safe(params, tmp, &hdev->le_conn_params, list)
+ hci_conn_params_free(params);
-done:
hci_update_background_scan(hdev);
-}
-
-/* This function requires the caller holds hdev->lock */
-void hci_pend_le_conns_clear(struct hci_dev *hdev)
-{
- struct bdaddr_list *entry, *tmp;
-
- list_for_each_entry_safe(entry, tmp, &hdev->pend_le_conns, list) {
- list_del(&entry->list);
- kfree(entry);
- }
- BT_DBG("All LE pending connections cleared");
+ BT_DBG("All LE connection parameters were removed");
}
static void inquiry_complete(struct hci_dev *hdev, u8 status)
@@ -3722,9 +3869,10 @@ static void set_random_addr(struct hci_request *req, bdaddr_t *rpa)
* In this kind of scenario skip the update and let the random
* address be updated at the next cycle.
*/
- if (test_bit(HCI_ADVERTISING, &hdev->dev_flags) ||
+ if (test_bit(HCI_LE_ADV, &hdev->dev_flags) ||
hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT)) {
BT_DBG("Deferring random address update");
+ set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags);
return;
}
@@ -3750,7 +3898,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
!bacmp(&hdev->random_addr, &hdev->rpa))
return 0;
- err = smp_generate_rpa(hdev->tfm_aes, hdev->irk, &hdev->rpa);
+ err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa);
if (err < 0) {
BT_ERR("%s failed to generate new RPA", hdev->name);
return err;
@@ -3784,7 +3932,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
* the HCI command if the current random address is already the
* static one.
*/
- if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dev_flags) ||
+ if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ||
!bacmp(&hdev->bdaddr, BDADDR_ANY)) {
*own_addr_type = ADDR_LE_DEV_RANDOM;
if (bacmp(&hdev->static_addr, &hdev->random_addr))
@@ -3813,7 +3961,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
void hci_copy_identity_address(struct hci_dev *hdev, bdaddr_t *bdaddr,
u8 *bdaddr_type)
{
- if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dev_flags) ||
+ if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ||
!bacmp(&hdev->bdaddr, BDADDR_ANY)) {
bacpy(bdaddr, &hdev->static_addr);
*bdaddr_type = ADDR_LE_DEV_RANDOM;
@@ -3828,7 +3976,7 @@ struct hci_dev *hci_alloc_dev(void)
{
struct hci_dev *hdev;
- hdev = kzalloc(sizeof(struct hci_dev), GFP_KERNEL);
+ hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
if (!hdev)
return NULL;
@@ -3837,6 +3985,7 @@ struct hci_dev *hci_alloc_dev(void)
hdev->link_mode = (HCI_LM_ACCEPT);
hdev->num_iac = 0x01; /* One IAC support is mandatory */
hdev->io_capability = 0x03; /* No Input No Output */
+ hdev->manufacturer = 0xffff; /* Default to internal use */
hdev->inq_tx_power = HCI_TX_POWER_INVALID;
hdev->adv_tx_power = HCI_TX_POWER_INVALID;
@@ -3844,10 +3993,14 @@ struct hci_dev *hci_alloc_dev(void)
hdev->sniff_min_interval = 80;
hdev->le_adv_channel_map = 0x07;
+ hdev->le_adv_min_interval = 0x0800;
+ hdev->le_adv_max_interval = 0x0800;
hdev->le_scan_interval = 0x0060;
hdev->le_scan_window = 0x0030;
hdev->le_conn_min_interval = 0x0028;
hdev->le_conn_max_interval = 0x0038;
+ hdev->le_conn_latency = 0x0000;
+ hdev->le_supv_timeout = 0x002a;
hdev->rpa_timeout = HCI_DEFAULT_RPA_TIMEOUT;
hdev->discov_interleaved_timeout = DISCOV_INTERLEAVED_TIMEOUT;
@@ -3859,6 +4012,7 @@ struct hci_dev *hci_alloc_dev(void)
INIT_LIST_HEAD(&hdev->mgmt_pending);
INIT_LIST_HEAD(&hdev->blacklist);
+ INIT_LIST_HEAD(&hdev->whitelist);
INIT_LIST_HEAD(&hdev->uuids);
INIT_LIST_HEAD(&hdev->link_keys);
INIT_LIST_HEAD(&hdev->long_term_keys);
@@ -3867,6 +4021,7 @@ struct hci_dev *hci_alloc_dev(void)
INIT_LIST_HEAD(&hdev->le_white_list);
INIT_LIST_HEAD(&hdev->le_conn_params);
INIT_LIST_HEAD(&hdev->pend_le_conns);
+ INIT_LIST_HEAD(&hdev->pend_le_reports);
INIT_LIST_HEAD(&hdev->conn_hash.list);
INIT_WORK(&hdev->rx_work, hci_rx_work);
@@ -3884,7 +4039,7 @@ struct hci_dev *hci_alloc_dev(void)
init_waitqueue_head(&hdev->req_wait_q);
- setup_timer(&hdev->cmd_timer, hci_cmd_timeout, (unsigned long) hdev);
+ INIT_DELAYED_WORK(&hdev->cmd_timer, hci_cmd_timeout);
hci_init_sysfs(hdev);
discovery_init(hdev);
@@ -3906,7 +4061,7 @@ int hci_register_dev(struct hci_dev *hdev)
{
int id, error;
- if (!hdev->open || !hdev->close)
+ if (!hdev->open || !hdev->close || !hdev->send)
return -EINVAL;
/* Do not allow HCI_AMP devices to register at index 0,
@@ -3951,18 +4106,9 @@ int hci_register_dev(struct hci_dev *hdev)
dev_set_name(&hdev->dev, "%s", hdev->name);
- hdev->tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0,
- CRYPTO_ALG_ASYNC);
- if (IS_ERR(hdev->tfm_aes)) {
- BT_ERR("Unable to create crypto context");
- error = PTR_ERR(hdev->tfm_aes);
- hdev->tfm_aes = NULL;
- goto err_wqueue;
- }
-
error = device_add(&hdev->dev);
if (error < 0)
- goto err_tfm;
+ goto err_wqueue;
hdev->rfkill = rfkill_alloc(hdev->name, &hdev->dev,
RFKILL_TYPE_BLUETOOTH, &hci_rfkill_ops,
@@ -3991,6 +4137,12 @@ int hci_register_dev(struct hci_dev *hdev)
list_add(&hdev->list, &hci_dev_list);
write_unlock(&hci_dev_list_lock);
+ /* Devices that are marked for raw-only usage are unconfigured
+ * and should not be included in normal operation.
+ */
+ if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
+ set_bit(HCI_UNCONFIGURED, &hdev->dev_flags);
+
hci_notify(hdev, HCI_DEV_REG);
hci_dev_hold(hdev);
@@ -3998,8 +4150,6 @@ int hci_register_dev(struct hci_dev *hdev)
return id;
-err_tfm:
- crypto_free_blkcipher(hdev->tfm_aes);
err_wqueue:
destroy_workqueue(hdev->workqueue);
destroy_workqueue(hdev->req_workqueue);
@@ -4033,7 +4183,8 @@ void hci_unregister_dev(struct hci_dev *hdev)
cancel_work_sync(&hdev->power_on);
if (!test_bit(HCI_INIT, &hdev->flags) &&
- !test_bit(HCI_SETUP, &hdev->dev_flags)) {
+ !test_bit(HCI_SETUP, &hdev->dev_flags) &&
+ !test_bit(HCI_CONFIG, &hdev->dev_flags)) {
hci_dev_lock(hdev);
mgmt_index_removed(hdev);
hci_dev_unlock(hdev);
@@ -4050,8 +4201,7 @@ void hci_unregister_dev(struct hci_dev *hdev)
rfkill_destroy(hdev->rfkill);
}
- if (hdev->tfm_aes)
- crypto_free_blkcipher(hdev->tfm_aes);
+ smp_unregister(hdev);
device_del(&hdev->dev);
@@ -4061,15 +4211,15 @@ void hci_unregister_dev(struct hci_dev *hdev)
destroy_workqueue(hdev->req_workqueue);
hci_dev_lock(hdev);
- hci_blacklist_clear(hdev);
+ hci_bdaddr_list_clear(&hdev->blacklist);
+ hci_bdaddr_list_clear(&hdev->whitelist);
hci_uuids_clear(hdev);
hci_link_keys_clear(hdev);
hci_smp_ltks_clear(hdev);
hci_smp_irks_clear(hdev);
hci_remote_oob_data_clear(hdev);
- hci_white_list_clear(hdev);
- hci_conn_params_clear(hdev);
- hci_pend_le_conns_clear(hdev);
+ hci_bdaddr_list_clear(&hdev->le_white_list);
+ hci_conn_params_clear_all(hdev);
hci_dev_unlock(hdev);
hci_dev_put(hdev);
@@ -4224,26 +4374,6 @@ static int hci_reassembly(struct hci_dev *hdev, int type, void *data,
return remain;
}
-int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count)
-{
- int rem = 0;
-
- if (type < HCI_ACLDATA_PKT || type > HCI_EVENT_PKT)
- return -EILSEQ;
-
- while (count) {
- rem = hci_reassembly(hdev, type, data, count, type - 1);
- if (rem < 0)
- return rem;
-
- data += (count - rem);
- count = rem;
- }
-
- return rem;
-}
-EXPORT_SYMBOL(hci_recv_fragment);
-
#define STREAM_REASSEMBLY 0
int hci_recv_stream_fragment(struct hci_dev *hdev, void *data, int count)
@@ -4307,6 +4437,8 @@ EXPORT_SYMBOL(hci_unregister_cb);
static void hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
{
+ int err;
+
BT_DBG("%s type %d len %d", hdev->name, bt_cb(skb)->pkt_type, skb->len);
/* Time stamp */
@@ -4323,8 +4455,11 @@ static void hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
/* Get rid of skb owner, prior to sending to the driver. */
skb_orphan(skb);
- if (hdev->send(hdev, skb) < 0)
- BT_ERR("%s sending frame failed", hdev->name);
+ err = hdev->send(hdev, skb);
+ if (err < 0) {
+ BT_ERR("%s sending frame failed (%d)", hdev->name, err);
+ kfree_skb(skb);
+ }
}
void hci_req_init(struct hci_request *req, struct hci_dev *hdev)
@@ -4366,6 +4501,11 @@ int hci_req_run(struct hci_request *req, hci_req_complete_t complete)
return 0;
}
+bool hci_req_pending(struct hci_dev *hdev)
+{
+ return (hdev->req_status == HCI_REQ_PEND);
+}
+
static struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode,
u32 plen, const void *param)
{
@@ -4387,6 +4527,7 @@ static struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode,
BT_DBG("skb len %d", skb->len);
bt_cb(skb)->pkt_type = HCI_COMMAND_PKT;
+ bt_cb(skb)->opcode = opcode;
return skb;
}
@@ -4798,7 +4939,7 @@ static inline int __get_blocks(struct hci_dev *hdev, struct sk_buff *skb)
static void __check_timeout(struct hci_dev *hdev, unsigned int cnt)
{
- if (!test_bit(HCI_RAW, &hdev->flags)) {
+ if (!test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) {
/* ACL tx timeout must be longer than maximum
* link supervision timeout (40.9 seconds) */
if (!cnt && time_after(jiffies, hdev->acl_last_tx +
@@ -4981,7 +5122,7 @@ static void hci_sched_le(struct hci_dev *hdev)
if (!hci_conn_num(hdev, LE_LINK))
return;
- if (!test_bit(HCI_RAW, &hdev->flags)) {
+ if (!test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) {
/* LE tx timeout must be longer than maximum
* link supervision timeout (40.9 seconds) */
if (!hdev->le_cnt && hdev->le_pkts &&
@@ -5226,8 +5367,7 @@ static void hci_rx_work(struct work_struct *work)
hci_send_to_sock(hdev, skb);
}
- if (test_bit(HCI_RAW, &hdev->flags) ||
- test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) {
+ if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) {
kfree_skb(skb);
continue;
}
@@ -5287,10 +5427,10 @@ static void hci_cmd_work(struct work_struct *work)
atomic_dec(&hdev->cmd_cnt);
hci_send_frame(hdev, skb);
if (test_bit(HCI_RESET, &hdev->flags))
- del_timer(&hdev->cmd_timer);
+ cancel_delayed_work(&hdev->cmd_timer);
else
- mod_timer(&hdev->cmd_timer,
- jiffies + HCI_CMD_TIMEOUT);
+ schedule_delayed_work(&hdev->cmd_timer,
+ HCI_CMD_TIMEOUT);
} else {
skb_queue_head(&hdev->cmd_q, skb);
queue_work(hdev->workqueue, &hdev->cmd_work);
@@ -5307,26 +5447,135 @@ void hci_req_add_le_scan_disable(struct hci_request *req)
hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
}
+static void add_to_white_list(struct hci_request *req,
+ struct hci_conn_params *params)
+{
+ struct hci_cp_le_add_to_white_list cp;
+
+ cp.bdaddr_type = params->addr_type;
+ bacpy(&cp.bdaddr, &params->addr);
+
+ hci_req_add(req, HCI_OP_LE_ADD_TO_WHITE_LIST, sizeof(cp), &cp);
+}
+
+static u8 update_white_list(struct hci_request *req)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct hci_conn_params *params;
+ struct bdaddr_list *b;
+ uint8_t white_list_entries = 0;
+
+ /* Go through the current white list programmed into the
+ * controller one by one and check if that address is still
+ * in the list of pending connections or list of devices to
+ * report. If not present in either list, then queue the
+ * command to remove it from the controller.
+ */
+ list_for_each_entry(b, &hdev->le_white_list, list) {
+ struct hci_cp_le_del_from_white_list cp;
+
+ if (hci_pend_le_action_lookup(&hdev->pend_le_conns,
+ &b->bdaddr, b->bdaddr_type) ||
+ hci_pend_le_action_lookup(&hdev->pend_le_reports,
+ &b->bdaddr, b->bdaddr_type)) {
+ white_list_entries++;
+ continue;
+ }
+
+ cp.bdaddr_type = b->bdaddr_type;
+ bacpy(&cp.bdaddr, &b->bdaddr);
+
+ hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST,
+ sizeof(cp), &cp);
+ }
+
+ /* Since all no longer valid white list entries have been
+ * removed, walk through the list of pending connections
+ * and ensure that any new device gets programmed into
+ * the controller.
+ *
+ * If the list of the devices is larger than the list of
+ * available white list entries in the controller, then
+ * just abort and return filer policy value to not use the
+ * white list.
+ */
+ list_for_each_entry(params, &hdev->pend_le_conns, action) {
+ if (hci_bdaddr_list_lookup(&hdev->le_white_list,
+ &params->addr, params->addr_type))
+ continue;
+
+ if (white_list_entries >= hdev->le_white_list_size) {
+ /* Select filter policy to accept all advertising */
+ return 0x00;
+ }
+
+ if (hci_find_irk_by_addr(hdev, &params->addr,
+ params->addr_type)) {
+ /* White list can not be used with RPAs */
+ return 0x00;
+ }
+
+ white_list_entries++;
+ add_to_white_list(req, params);
+ }
+
+ /* After adding all new pending connections, walk through
+ * the list of pending reports and also add these to the
+ * white list if there is still space.
+ */
+ list_for_each_entry(params, &hdev->pend_le_reports, action) {
+ if (hci_bdaddr_list_lookup(&hdev->le_white_list,
+ &params->addr, params->addr_type))
+ continue;
+
+ if (white_list_entries >= hdev->le_white_list_size) {
+ /* Select filter policy to accept all advertising */
+ return 0x00;
+ }
+
+ if (hci_find_irk_by_addr(hdev, &params->addr,
+ params->addr_type)) {
+ /* White list can not be used with RPAs */
+ return 0x00;
+ }
+
+ white_list_entries++;
+ add_to_white_list(req, params);
+ }
+
+ /* Select filter policy to use white list */
+ return 0x01;
+}
+
void hci_req_add_le_passive_scan(struct hci_request *req)
{
struct hci_cp_le_set_scan_param param_cp;
struct hci_cp_le_set_scan_enable enable_cp;
struct hci_dev *hdev = req->hdev;
u8 own_addr_type;
+ u8 filter_policy;
- /* Set require_privacy to true to avoid identification from
- * unknown peer devices. Since this is passive scanning, no
- * SCAN_REQ using the local identity should be sent. Mandating
- * privacy is just an extra precaution.
+ /* Set require_privacy to false since no SCAN_REQ are send
+ * during passive scanning. Not using an unresolvable address
+ * here is important so that peer devices using direct
+ * advertising with our address will be correctly reported
+ * by the controller.
*/
- if (hci_update_random_address(req, true, &own_addr_type))
+ if (hci_update_random_address(req, false, &own_addr_type))
return;
+ /* Adding or removing entries from the white list must
+ * happen before enabling scanning. The controller does
+ * not allow white list modification while scanning.
+ */
+ filter_policy = update_white_list(req);
+
memset(&param_cp, 0, sizeof(param_cp));
param_cp.type = LE_SCAN_PASSIVE;
param_cp.interval = cpu_to_le16(hdev->le_scan_interval);
param_cp.window = cpu_to_le16(hdev->le_scan_window);
param_cp.own_address_type = own_addr_type;
+ param_cp.filter_policy = filter_policy;
hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp),
&param_cp);
@@ -5356,11 +5605,29 @@ void hci_update_background_scan(struct hci_dev *hdev)
struct hci_conn *conn;
int err;
+ if (!test_bit(HCI_UP, &hdev->flags) ||
+ test_bit(HCI_INIT, &hdev->flags) ||
+ test_bit(HCI_SETUP, &hdev->dev_flags) ||
+ test_bit(HCI_CONFIG, &hdev->dev_flags) ||
+ test_bit(HCI_AUTO_OFF, &hdev->dev_flags) ||
+ test_bit(HCI_UNREGISTER, &hdev->dev_flags))
+ return;
+
+ /* No point in doing scanning if LE support hasn't been enabled */
+ if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags))
+ return;
+
+ /* If discovery is active don't interfere with it */
+ if (hdev->discovery.state != DISCOVERY_STOPPED)
+ return;
+
hci_req_init(&req, hdev);
- if (list_empty(&hdev->pend_le_conns)) {
- /* If there is no pending LE connections, we should stop
- * the background scanning.
+ if (list_empty(&hdev->pend_le_conns) &&
+ list_empty(&hdev->pend_le_reports)) {
+ /* If there is no pending LE connections or devices
+ * to be scanned for, we should stop the background
+ * scanning.
*/
/* If controller is not scanning we are done. */
@@ -5398,3 +5665,52 @@ void hci_update_background_scan(struct hci_dev *hdev)
if (err)
BT_ERR("Failed to run HCI request: err %d", err);
}
+
+static bool disconnected_whitelist_entries(struct hci_dev *hdev)
+{
+ struct bdaddr_list *b;
+
+ list_for_each_entry(b, &hdev->whitelist, list) {
+ struct hci_conn *conn;
+
+ conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &b->bdaddr);
+ if (!conn)
+ return true;
+
+ if (conn->state != BT_CONNECTED && conn->state != BT_CONFIG)
+ return true;
+ }
+
+ return false;
+}
+
+void hci_update_page_scan(struct hci_dev *hdev, struct hci_request *req)
+{
+ u8 scan;
+
+ if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags))
+ return;
+
+ if (!hdev_is_powered(hdev))
+ return;
+
+ if (mgmt_powering_down(hdev))
+ return;
+
+ if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags) ||
+ disconnected_whitelist_entries(hdev))
+ scan = SCAN_PAGE;
+ else
+ scan = SCAN_DISABLED;
+
+ if (test_bit(HCI_PSCAN, &hdev->flags) == !!(scan & SCAN_PAGE))
+ return;
+
+ if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags))
+ scan |= SCAN_INQUIRY;
+
+ if (req)
+ hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+ else
+ hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+}
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 640c54ec1bd2..8b0a2a6de419 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -32,6 +32,7 @@
#include "a2mp.h"
#include "amp.h"
+#include "smp.h"
/* Handle HCI Event packets */
@@ -100,12 +101,8 @@ static void hci_cc_role_discovery(struct hci_dev *hdev, struct sk_buff *skb)
hci_dev_lock(hdev);
conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle));
- if (conn) {
- if (rp->role)
- conn->link_mode &= ~HCI_LM_MASTER;
- else
- conn->link_mode |= HCI_LM_MASTER;
- }
+ if (conn)
+ conn->role = rp->role;
hci_dev_unlock(hdev);
}
@@ -174,12 +171,14 @@ static void hci_cc_write_def_link_policy(struct hci_dev *hdev,
BT_DBG("%s status 0x%2.2x", hdev->name, status);
+ if (status)
+ return;
+
sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_DEF_LINK_POLICY);
if (!sent)
return;
- if (!status)
- hdev->link_policy = get_unaligned_le16(sent);
+ hdev->link_policy = get_unaligned_le16(sent);
}
static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb)
@@ -269,28 +268,30 @@ static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb)
static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb)
{
__u8 status = *((__u8 *) skb->data);
+ __u8 param;
void *sent;
BT_DBG("%s status 0x%2.2x", hdev->name, status);
+ if (status)
+ return;
+
sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_ENCRYPT_MODE);
if (!sent)
return;
- if (!status) {
- __u8 param = *((__u8 *) sent);
+ param = *((__u8 *) sent);
- if (param)
- set_bit(HCI_ENCRYPT, &hdev->flags);
- else
- clear_bit(HCI_ENCRYPT, &hdev->flags);
- }
+ if (param)
+ set_bit(HCI_ENCRYPT, &hdev->flags);
+ else
+ clear_bit(HCI_ENCRYPT, &hdev->flags);
}
static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb)
{
- __u8 param, status = *((__u8 *) skb->data);
- int old_pscan, old_iscan;
+ __u8 status = *((__u8 *) skb->data);
+ __u8 param;
void *sent;
BT_DBG("%s status 0x%2.2x", hdev->name, status);
@@ -304,32 +305,19 @@ static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb)
hci_dev_lock(hdev);
if (status) {
- mgmt_write_scan_failed(hdev, param, status);
hdev->discov_timeout = 0;
goto done;
}
- /* We need to ensure that we set this back on if someone changed
- * the scan mode through a raw HCI socket.
- */
- set_bit(HCI_BREDR_ENABLED, &hdev->dev_flags);
-
- old_pscan = test_and_clear_bit(HCI_PSCAN, &hdev->flags);
- old_iscan = test_and_clear_bit(HCI_ISCAN, &hdev->flags);
-
- if (param & SCAN_INQUIRY) {
+ if (param & SCAN_INQUIRY)
set_bit(HCI_ISCAN, &hdev->flags);
- if (!old_iscan)
- mgmt_discoverable(hdev, 1);
- } else if (old_iscan)
- mgmt_discoverable(hdev, 0);
+ else
+ clear_bit(HCI_ISCAN, &hdev->flags);
- if (param & SCAN_PAGE) {
+ if (param & SCAN_PAGE)
set_bit(HCI_PSCAN, &hdev->flags);
- if (!old_pscan)
- mgmt_connectable(hdev, 1);
- } else if (old_pscan)
- mgmt_connectable(hdev, 0);
+ else
+ clear_bit(HCI_PSCAN, &hdev->flags);
done:
hci_dev_unlock(hdev);
@@ -601,8 +589,10 @@ static void hci_cc_read_flow_control_mode(struct hci_dev *hdev,
BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
- if (!rp->status)
- hdev->flow_ctl_mode = rp->mode;
+ if (rp->status)
+ return;
+
+ hdev->flow_ctl_mode = rp->mode;
}
static void hci_cc_read_buffer_size(struct hci_dev *hdev, struct sk_buff *skb)
@@ -637,8 +627,14 @@ static void hci_cc_read_bd_addr(struct hci_dev *hdev, struct sk_buff *skb)
BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
- if (!rp->status)
+ if (rp->status)
+ return;
+
+ if (test_bit(HCI_INIT, &hdev->flags))
bacpy(&hdev->bdaddr, &rp->bdaddr);
+
+ if (test_bit(HCI_SETUP, &hdev->dev_flags))
+ bacpy(&hdev->setup_addr, &rp->bdaddr);
}
static void hci_cc_read_page_scan_activity(struct hci_dev *hdev,
@@ -648,7 +644,10 @@ static void hci_cc_read_page_scan_activity(struct hci_dev *hdev,
BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
- if (test_bit(HCI_INIT, &hdev->flags) && !rp->status) {
+ if (rp->status)
+ return;
+
+ if (test_bit(HCI_INIT, &hdev->flags)) {
hdev->page_scan_interval = __le16_to_cpu(rp->interval);
hdev->page_scan_window = __le16_to_cpu(rp->window);
}
@@ -680,7 +679,10 @@ static void hci_cc_read_page_scan_type(struct hci_dev *hdev,
BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
- if (test_bit(HCI_INIT, &hdev->flags) && !rp->status)
+ if (rp->status)
+ return;
+
+ if (test_bit(HCI_INIT, &hdev->flags))
hdev->page_scan_type = rp->type;
}
@@ -720,6 +722,41 @@ static void hci_cc_read_data_block_size(struct hci_dev *hdev,
hdev->block_cnt, hdev->block_len);
}
+static void hci_cc_read_clock(struct hci_dev *hdev, struct sk_buff *skb)
+{
+ struct hci_rp_read_clock *rp = (void *) skb->data;
+ struct hci_cp_read_clock *cp;
+ struct hci_conn *conn;
+
+ BT_DBG("%s", hdev->name);
+
+ if (skb->len < sizeof(*rp))
+ return;
+
+ if (rp->status)
+ return;
+
+ hci_dev_lock(hdev);
+
+ cp = hci_sent_cmd_data(hdev, HCI_OP_READ_CLOCK);
+ if (!cp)
+ goto unlock;
+
+ if (cp->which == 0x00) {
+ hdev->clock = le32_to_cpu(rp->clock);
+ goto unlock;
+ }
+
+ conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle));
+ if (conn) {
+ conn->clock = le32_to_cpu(rp->clock);
+ conn->clock_accuracy = le16_to_cpu(rp->accuracy);
+ }
+
+unlock:
+ hci_dev_unlock(hdev);
+}
+
static void hci_cc_read_local_amp_info(struct hci_dev *hdev,
struct sk_buff *skb)
{
@@ -789,8 +826,10 @@ static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev,
BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
- if (!rp->status)
- hdev->inq_tx_power = rp->tx_power;
+ if (rp->status)
+ return;
+
+ hdev->inq_tx_power = rp->tx_power;
}
static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb)
@@ -861,8 +900,10 @@ static void hci_cc_le_read_local_features(struct hci_dev *hdev,
BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
- if (!rp->status)
- memcpy(hdev->le_features, rp->features, 8);
+ if (rp->status)
+ return;
+
+ memcpy(hdev->le_features, rp->features, 8);
}
static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev,
@@ -872,8 +913,10 @@ static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev,
BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
- if (!rp->status)
- hdev->adv_tx_power = rp->tx_power;
+ if (rp->status)
+ return;
+
+ hdev->adv_tx_power = rp->tx_power;
}
static void hci_cc_user_confirm_reply(struct hci_dev *hdev, struct sk_buff *skb)
@@ -973,14 +1016,16 @@ static void hci_cc_le_set_random_addr(struct hci_dev *hdev, struct sk_buff *skb)
BT_DBG("%s status 0x%2.2x", hdev->name, status);
+ if (status)
+ return;
+
sent = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_RANDOM_ADDR);
if (!sent)
return;
hci_dev_lock(hdev);
- if (!status)
- bacpy(&hdev->random_addr, sent);
+ bacpy(&hdev->random_addr, sent);
hci_dev_unlock(hdev);
}
@@ -991,11 +1036,11 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb)
BT_DBG("%s status 0x%2.2x", hdev->name, status);
- sent = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_ADV_ENABLE);
- if (!sent)
+ if (status)
return;
- if (status)
+ sent = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_ADV_ENABLE);
+ if (!sent)
return;
hci_dev_lock(hdev);
@@ -1006,15 +1051,17 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb)
if (*sent) {
struct hci_conn *conn;
+ set_bit(HCI_LE_ADV, &hdev->dev_flags);
+
conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
if (conn)
queue_delayed_work(hdev->workqueue,
&conn->le_conn_timeout,
- HCI_LE_CONN_TIMEOUT);
+ conn->conn_timeout);
+ } else {
+ clear_bit(HCI_LE_ADV, &hdev->dev_flags);
}
- mgmt_advertising(hdev, *sent);
-
hci_dev_unlock(hdev);
}
@@ -1025,14 +1072,16 @@ static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb)
BT_DBG("%s status 0x%2.2x", hdev->name, status);
+ if (status)
+ return;
+
cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_SCAN_PARAM);
if (!cp)
return;
hci_dev_lock(hdev);
- if (!status)
- hdev->le_scan_type = cp->type;
+ hdev->le_scan_type = cp->type;
hci_dev_unlock(hdev);
}
@@ -1053,13 +1102,15 @@ static void clear_pending_adv_report(struct hci_dev *hdev)
}
static void store_pending_adv_report(struct hci_dev *hdev, bdaddr_t *bdaddr,
- u8 bdaddr_type, s8 rssi, u8 *data, u8 len)
+ u8 bdaddr_type, s8 rssi, u32 flags,
+ u8 *data, u8 len)
{
struct discovery_state *d = &hdev->discovery;
bacpy(&d->last_adv_addr, bdaddr);
d->last_adv_addr_type = bdaddr_type;
d->last_adv_rssi = rssi;
+ d->last_adv_flags = flags;
memcpy(d->last_adv_data, data, len);
d->last_adv_data_len = len;
}
@@ -1072,11 +1123,11 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
BT_DBG("%s status 0x%2.2x", hdev->name, status);
- cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_SCAN_ENABLE);
- if (!cp)
+ if (status)
return;
- if (status)
+ cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_SCAN_ENABLE);
+ if (!cp)
return;
switch (cp->enable) {
@@ -1096,7 +1147,7 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK,
d->last_adv_addr_type, NULL,
- d->last_adv_rssi, 0, 1,
+ d->last_adv_rssi, d->last_adv_flags,
d->last_adv_data,
d->last_adv_data_len, NULL, 0);
}
@@ -1107,13 +1158,21 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
cancel_delayed_work(&hdev->le_scan_disable);
clear_bit(HCI_LE_SCAN, &hdev->dev_flags);
+
/* The HCI_LE_SCAN_INTERRUPTED flag indicates that we
* interrupted scanning due to a connect request. Mark
- * therefore discovery as stopped.
+ * therefore discovery as stopped. If this was not
+ * because of a connect request advertising might have
+ * been disabled because of active scanning, so
+ * re-enable it again if necessary.
*/
if (test_and_clear_bit(HCI_LE_SCAN_INTERRUPTED,
&hdev->dev_flags))
hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
+ else if (!test_bit(HCI_LE_ADV, &hdev->dev_flags) &&
+ hdev->discovery.state == DISCOVERY_FINDING)
+ mgmt_reenable_advertising(hdev);
+
break;
default:
@@ -1129,8 +1188,10 @@ static void hci_cc_le_read_white_list_size(struct hci_dev *hdev,
BT_DBG("%s status 0x%2.2x size %u", hdev->name, rp->status, rp->size);
- if (!rp->status)
- hdev->le_white_list_size = rp->size;
+ if (rp->status)
+ return;
+
+ hdev->le_white_list_size = rp->size;
}
static void hci_cc_le_clear_white_list(struct hci_dev *hdev,
@@ -1140,8 +1201,10 @@ static void hci_cc_le_clear_white_list(struct hci_dev *hdev,
BT_DBG("%s status 0x%2.2x", hdev->name, status);
- if (!status)
- hci_white_list_clear(hdev);
+ if (status)
+ return;
+
+ hci_bdaddr_list_clear(&hdev->le_white_list);
}
static void hci_cc_le_add_to_white_list(struct hci_dev *hdev,
@@ -1152,12 +1215,15 @@ static void hci_cc_le_add_to_white_list(struct hci_dev *hdev,
BT_DBG("%s status 0x%2.2x", hdev->name, status);
+ if (status)
+ return;
+
sent = hci_sent_cmd_data(hdev, HCI_OP_LE_ADD_TO_WHITE_LIST);
if (!sent)
return;
- if (!status)
- hci_white_list_add(hdev, &sent->bdaddr, sent->bdaddr_type);
+ hci_bdaddr_list_add(&hdev->le_white_list, &sent->bdaddr,
+ sent->bdaddr_type);
}
static void hci_cc_le_del_from_white_list(struct hci_dev *hdev,
@@ -1168,12 +1234,15 @@ static void hci_cc_le_del_from_white_list(struct hci_dev *hdev,
BT_DBG("%s status 0x%2.2x", hdev->name, status);
+ if (status)
+ return;
+
sent = hci_sent_cmd_data(hdev, HCI_OP_LE_DEL_FROM_WHITE_LIST);
if (!sent)
return;
- if (!status)
- hci_white_list_del(hdev, &sent->bdaddr, sent->bdaddr_type);
+ hci_bdaddr_list_del(&hdev->le_white_list, &sent->bdaddr,
+ sent->bdaddr_type);
}
static void hci_cc_le_read_supported_states(struct hci_dev *hdev,
@@ -1183,8 +1252,10 @@ static void hci_cc_le_read_supported_states(struct hci_dev *hdev,
BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
- if (!rp->status)
- memcpy(hdev->le_states, rp->le_states, 8);
+ if (rp->status)
+ return;
+
+ memcpy(hdev->le_states, rp->le_states, 8);
}
static void hci_cc_write_le_host_supported(struct hci_dev *hdev,
@@ -1195,25 +1266,26 @@ static void hci_cc_write_le_host_supported(struct hci_dev *hdev,
BT_DBG("%s status 0x%2.2x", hdev->name, status);
+ if (status)
+ return;
+
sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED);
if (!sent)
return;
- if (!status) {
- if (sent->le) {
- hdev->features[1][0] |= LMP_HOST_LE;
- set_bit(HCI_LE_ENABLED, &hdev->dev_flags);
- } else {
- hdev->features[1][0] &= ~LMP_HOST_LE;
- clear_bit(HCI_LE_ENABLED, &hdev->dev_flags);
- clear_bit(HCI_ADVERTISING, &hdev->dev_flags);
- }
-
- if (sent->simul)
- hdev->features[1][0] |= LMP_HOST_LE_BREDR;
- else
- hdev->features[1][0] &= ~LMP_HOST_LE_BREDR;
+ if (sent->le) {
+ hdev->features[1][0] |= LMP_HOST_LE;
+ set_bit(HCI_LE_ENABLED, &hdev->dev_flags);
+ } else {
+ hdev->features[1][0] &= ~LMP_HOST_LE;
+ clear_bit(HCI_LE_ENABLED, &hdev->dev_flags);
+ clear_bit(HCI_ADVERTISING, &hdev->dev_flags);
}
+
+ if (sent->simul)
+ hdev->features[1][0] |= LMP_HOST_LE_BREDR;
+ else
+ hdev->features[1][0] &= ~LMP_HOST_LE_BREDR;
}
static void hci_cc_set_adv_param(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1342,11 +1414,9 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status)
}
} else {
if (!conn) {
- conn = hci_conn_add(hdev, ACL_LINK, &cp->bdaddr);
- if (conn) {
- conn->out = true;
- conn->link_mode |= HCI_LM_MASTER;
- } else
+ conn = hci_conn_add(hdev, ACL_LINK, &cp->bdaddr,
+ HCI_ROLE_MASTER);
+ if (!conn)
BT_ERR("No memory for new connection");
}
}
@@ -1575,6 +1645,8 @@ static void hci_cs_remote_name_req(struct hci_dev *hdev, __u8 status)
if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->flags)) {
struct hci_cp_auth_requested auth_cp;
+ set_bit(HCI_CONN_AUTH_INITIATOR, &conn->flags);
+
auth_cp.handle = __cpu_to_le16(conn->handle);
hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED,
sizeof(auth_cp), &auth_cp);
@@ -1835,7 +1907,7 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, u8 status)
if (cp->filter_policy == HCI_LE_USE_PEER_ADDR)
queue_delayed_work(conn->hdev->workqueue,
&conn->le_conn_timeout,
- HCI_LE_CONN_TIMEOUT);
+ conn->conn_timeout);
unlock:
hci_dev_unlock(hdev);
@@ -1929,7 +2001,7 @@ static void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_dev_lock(hdev);
for (; num_rsp; num_rsp--, info++) {
- bool name_known, ssp;
+ u32 flags;
bacpy(&data.bdaddr, &info->bdaddr);
data.pscan_rep_mode = info->pscan_rep_mode;
@@ -1940,10 +2012,10 @@ static void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb)
data.rssi = 0x00;
data.ssp_mode = 0x00;
- name_known = hci_inquiry_cache_update(hdev, &data, false, &ssp);
+ flags = hci_inquiry_cache_update(hdev, &data, false);
+
mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00,
- info->dev_class, 0, !name_known, ssp, NULL,
- 0, NULL, 0);
+ info->dev_class, 0, flags, NULL, 0, NULL, 0);
}
hci_dev_unlock(hdev);
@@ -1988,10 +2060,10 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_conn_add_sysfs(conn);
if (test_bit(HCI_AUTH, &hdev->flags))
- conn->link_mode |= HCI_LM_AUTH;
+ set_bit(HCI_CONN_AUTH, &conn->flags);
if (test_bit(HCI_ENCRYPT, &hdev->flags))
- conn->link_mode |= HCI_LM_ENCRYPT;
+ set_bit(HCI_CONN_ENCRYPT, &conn->flags);
/* Get remote features */
if (conn->type == ACL_LINK) {
@@ -1999,6 +2071,8 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
cp.handle = ev->handle;
hci_send_cmd(hdev, HCI_OP_READ_REMOTE_FEATURES,
sizeof(cp), &cp);
+
+ hci_update_page_scan(hdev, NULL);
}
/* Set packet type for incoming connection */
@@ -2031,10 +2105,21 @@ unlock:
hci_conn_check_pending(hdev);
}
+static void hci_reject_conn(struct hci_dev *hdev, bdaddr_t *bdaddr)
+{
+ struct hci_cp_reject_conn_req cp;
+
+ bacpy(&cp.bdaddr, bdaddr);
+ cp.reason = HCI_ERROR_REJ_BAD_ADDR;
+ hci_send_cmd(hdev, HCI_OP_REJECT_CONN_REQ, sizeof(cp), &cp);
+}
+
static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_ev_conn_request *ev = (void *) skb->data;
int mask = hdev->link_mode;
+ struct inquiry_entry *ie;
+ struct hci_conn *conn;
__u8 flags = 0;
BT_DBG("%s bdaddr %pMR type 0x%x", hdev->name, &ev->bdaddr,
@@ -2043,73 +2128,79 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type,
&flags);
- if ((mask & HCI_LM_ACCEPT) &&
- !hci_blacklist_lookup(hdev, &ev->bdaddr, BDADDR_BREDR)) {
- /* Connection accepted */
- struct inquiry_entry *ie;
- struct hci_conn *conn;
+ if (!(mask & HCI_LM_ACCEPT)) {
+ hci_reject_conn(hdev, &ev->bdaddr);
+ return;
+ }
- hci_dev_lock(hdev);
+ if (hci_bdaddr_list_lookup(&hdev->blacklist, &ev->bdaddr,
+ BDADDR_BREDR)) {
+ hci_reject_conn(hdev, &ev->bdaddr);
+ return;
+ }
- ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr);
- if (ie)
- memcpy(ie->data.dev_class, ev->dev_class, 3);
+ if (!test_bit(HCI_CONNECTABLE, &hdev->dev_flags) &&
+ !hci_bdaddr_list_lookup(&hdev->whitelist, &ev->bdaddr,
+ BDADDR_BREDR)) {
+ hci_reject_conn(hdev, &ev->bdaddr);
+ return;
+ }
+
+ /* Connection accepted */
+
+ hci_dev_lock(hdev);
- conn = hci_conn_hash_lookup_ba(hdev, ev->link_type,
- &ev->bdaddr);
+ ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr);
+ if (ie)
+ memcpy(ie->data.dev_class, ev->dev_class, 3);
+
+ conn = hci_conn_hash_lookup_ba(hdev, ev->link_type,
+ &ev->bdaddr);
+ if (!conn) {
+ conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr,
+ HCI_ROLE_SLAVE);
if (!conn) {
- conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr);
- if (!conn) {
- BT_ERR("No memory for new connection");
- hci_dev_unlock(hdev);
- return;
- }
+ BT_ERR("No memory for new connection");
+ hci_dev_unlock(hdev);
+ return;
}
+ }
- memcpy(conn->dev_class, ev->dev_class, 3);
+ memcpy(conn->dev_class, ev->dev_class, 3);
- hci_dev_unlock(hdev);
+ hci_dev_unlock(hdev);
- if (ev->link_type == ACL_LINK ||
- (!(flags & HCI_PROTO_DEFER) && !lmp_esco_capable(hdev))) {
- struct hci_cp_accept_conn_req cp;
- conn->state = BT_CONNECT;
+ if (ev->link_type == ACL_LINK ||
+ (!(flags & HCI_PROTO_DEFER) && !lmp_esco_capable(hdev))) {
+ struct hci_cp_accept_conn_req cp;
+ conn->state = BT_CONNECT;
- bacpy(&cp.bdaddr, &ev->bdaddr);
+ bacpy(&cp.bdaddr, &ev->bdaddr);
- if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER))
- cp.role = 0x00; /* Become master */
- else
- cp.role = 0x01; /* Remain slave */
+ if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER))
+ cp.role = 0x00; /* Become master */
+ else
+ cp.role = 0x01; /* Remain slave */
- hci_send_cmd(hdev, HCI_OP_ACCEPT_CONN_REQ, sizeof(cp),
- &cp);
- } else if (!(flags & HCI_PROTO_DEFER)) {
- struct hci_cp_accept_sync_conn_req cp;
- conn->state = BT_CONNECT;
+ hci_send_cmd(hdev, HCI_OP_ACCEPT_CONN_REQ, sizeof(cp), &cp);
+ } else if (!(flags & HCI_PROTO_DEFER)) {
+ struct hci_cp_accept_sync_conn_req cp;
+ conn->state = BT_CONNECT;
- bacpy(&cp.bdaddr, &ev->bdaddr);
- cp.pkt_type = cpu_to_le16(conn->pkt_type);
+ bacpy(&cp.bdaddr, &ev->bdaddr);
+ cp.pkt_type = cpu_to_le16(conn->pkt_type);
- cp.tx_bandwidth = cpu_to_le32(0x00001f40);
- cp.rx_bandwidth = cpu_to_le32(0x00001f40);
- cp.max_latency = cpu_to_le16(0xffff);
- cp.content_format = cpu_to_le16(hdev->voice_setting);
- cp.retrans_effort = 0xff;
+ cp.tx_bandwidth = cpu_to_le32(0x00001f40);
+ cp.rx_bandwidth = cpu_to_le32(0x00001f40);
+ cp.max_latency = cpu_to_le16(0xffff);
+ cp.content_format = cpu_to_le16(hdev->voice_setting);
+ cp.retrans_effort = 0xff;
- hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ,
- sizeof(cp), &cp);
- } else {
- conn->state = BT_CONNECT2;
- hci_proto_connect_cfm(conn, 0);
- }
+ hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ, sizeof(cp),
+ &cp);
} else {
- /* Connection rejected */
- struct hci_cp_reject_conn_req cp;
-
- bacpy(&cp.bdaddr, &ev->bdaddr);
- cp.reason = HCI_ERROR_REJ_BAD_ADDR;
- hci_send_cmd(hdev, HCI_OP_REJECT_CONN_REQ, sizeof(cp), &cp);
+ conn->state = BT_CONNECT2;
+ hci_proto_connect_cfm(conn, 0);
}
}
@@ -2158,8 +2249,12 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
mgmt_device_disconnected(hdev, &conn->dst, conn->type, conn->dst_type,
reason, mgmt_connected);
- if (conn->type == ACL_LINK && conn->flush_key)
- hci_remove_link_key(hdev, &conn->dst);
+ if (conn->type == ACL_LINK) {
+ if (test_bit(HCI_CONN_FLUSH_KEY, &conn->flags))
+ hci_remove_link_key(hdev, &conn->dst);
+
+ hci_update_page_scan(hdev, NULL);
+ }
params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type);
if (params) {
@@ -2169,8 +2264,11 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
break;
/* Fall through */
+ case HCI_AUTO_CONN_DIRECT:
case HCI_AUTO_CONN_ALWAYS:
- hci_pend_le_conn_add(hdev, &conn->dst, conn->dst_type);
+ list_del_init(&params->action);
+ list_add(&params->action, &hdev->pend_le_conns);
+ hci_update_background_scan(hdev);
break;
default:
@@ -2218,12 +2316,11 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
test_bit(HCI_CONN_REAUTH_PEND, &conn->flags)) {
BT_INFO("re-auth of legacy device is not possible.");
} else {
- conn->link_mode |= HCI_LM_AUTH;
+ set_bit(HCI_CONN_AUTH, &conn->flags);
conn->sec_level = conn->pending_sec_level;
}
} else {
- mgmt_auth_failed(hdev, &conn->dst, conn->type, conn->dst_type,
- ev->status);
+ mgmt_auth_failed(conn, ev->status);
}
clear_bit(HCI_CONN_AUTH_PEND, &conn->flags);
@@ -2297,6 +2394,9 @@ check_auth:
if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->flags)) {
struct hci_cp_auth_requested cp;
+
+ set_bit(HCI_CONN_AUTH_INITIATOR, &conn->flags);
+
cp.handle = __cpu_to_le16(conn->handle);
hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp);
}
@@ -2321,23 +2421,29 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
if (!ev->status) {
if (ev->encrypt) {
/* Encryption implies authentication */
- conn->link_mode |= HCI_LM_AUTH;
- conn->link_mode |= HCI_LM_ENCRYPT;
+ set_bit(HCI_CONN_AUTH, &conn->flags);
+ set_bit(HCI_CONN_ENCRYPT, &conn->flags);
conn->sec_level = conn->pending_sec_level;
/* P-256 authentication key implies FIPS */
if (conn->key_type == HCI_LK_AUTH_COMBINATION_P256)
- conn->link_mode |= HCI_LM_FIPS;
+ set_bit(HCI_CONN_FIPS, &conn->flags);
if ((conn->type == ACL_LINK && ev->encrypt == 0x02) ||
conn->type == LE_LINK)
set_bit(HCI_CONN_AES_CCM, &conn->flags);
} else {
- conn->link_mode &= ~HCI_LM_ENCRYPT;
+ clear_bit(HCI_CONN_ENCRYPT, &conn->flags);
clear_bit(HCI_CONN_AES_CCM, &conn->flags);
}
}
+ /* We should disregard the current RPA and generate a new one
+ * whenever the encryption procedure fails.
+ */
+ if (ev->status && conn->type == LE_LINK)
+ set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags);
+
clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
if (ev->status && conn->state == BT_CONNECTED) {
@@ -2384,7 +2490,7 @@ static void hci_change_link_key_complete_evt(struct hci_dev *hdev,
conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
if (conn) {
if (!ev->status)
- conn->link_mode |= HCI_LM_SECURE;
+ set_bit(HCI_CONN_SECURE, &conn->flags);
clear_bit(HCI_CONN_AUTH_PEND, &conn->flags);
@@ -2595,6 +2701,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_cc_read_local_amp_info(hdev, skb);
break;
+ case HCI_OP_READ_CLOCK:
+ hci_cc_read_clock(hdev, skb);
+ break;
+
case HCI_OP_READ_LOCAL_AMP_ASSOC:
hci_cc_read_local_amp_assoc(hdev, skb);
break;
@@ -2709,7 +2819,7 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
}
if (opcode != HCI_OP_NOP)
- del_timer(&hdev->cmd_timer);
+ cancel_delayed_work(&hdev->cmd_timer);
hci_req_cmd_complete(hdev, opcode, status);
@@ -2800,7 +2910,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
}
if (opcode != HCI_OP_NOP)
- del_timer(&hdev->cmd_timer);
+ cancel_delayed_work(&hdev->cmd_timer);
if (ev->status ||
(hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req.event))
@@ -2824,12 +2934,8 @@ static void hci_role_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
if (conn) {
- if (!ev->status) {
- if (ev->role)
- conn->link_mode &= ~HCI_LM_MASTER;
- else
- conn->link_mode |= HCI_LM_MASTER;
- }
+ if (!ev->status)
+ conn->role = ev->role;
clear_bit(HCI_CONN_RSWITCH_PEND, &conn->flags);
@@ -3023,10 +3129,11 @@ static void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_conn_drop(conn);
}
- if (!test_bit(HCI_PAIRABLE, &hdev->dev_flags))
+ if (!test_bit(HCI_BONDABLE, &hdev->dev_flags) &&
+ !test_bit(HCI_CONN_AUTH_INITIATOR, &conn->flags)) {
hci_send_cmd(hdev, HCI_OP_PIN_CODE_NEG_REPLY,
sizeof(ev->bdaddr), &ev->bdaddr);
- else if (test_bit(HCI_MGMT, &hdev->dev_flags)) {
+ } else if (test_bit(HCI_MGMT, &hdev->dev_flags)) {
u8 secure;
if (conn->pending_sec_level == BT_SECURITY_HIGH)
@@ -3065,12 +3172,6 @@ static void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
BT_DBG("%s found key type %u for %pMR", hdev->name, key->type,
&ev->bdaddr);
- if (!test_bit(HCI_DEBUG_KEYS, &hdev->dev_flags) &&
- key->type == HCI_LK_DEBUG_COMBINATION) {
- BT_DBG("%s ignoring debug key", hdev->name);
- goto not_found;
- }
-
conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
if (conn) {
if ((key->type == HCI_LK_UNAUTH_COMBINATION_P192 ||
@@ -3110,6 +3211,8 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_ev_link_key_notify *ev = (void *) skb->data;
struct hci_conn *conn;
+ struct link_key *key;
+ bool persistent;
u8 pin_len = 0;
BT_DBG("%s", hdev->name);
@@ -3128,10 +3231,33 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_conn_drop(conn);
}
- if (test_bit(HCI_MGMT, &hdev->dev_flags))
- hci_add_link_key(hdev, conn, 1, &ev->bdaddr, ev->link_key,
- ev->key_type, pin_len);
+ if (!test_bit(HCI_MGMT, &hdev->dev_flags))
+ goto unlock;
+
+ key = hci_add_link_key(hdev, conn, &ev->bdaddr, ev->link_key,
+ ev->key_type, pin_len, &persistent);
+ if (!key)
+ goto unlock;
+
+ mgmt_new_link_key(hdev, key, persistent);
+ /* Keep debug keys around only if the HCI_KEEP_DEBUG_KEYS flag
+ * is set. If it's not set simply remove the key from the kernel
+ * list (we've still notified user space about it but with
+ * store_hint being 0).
+ */
+ if (key->type == HCI_LK_DEBUG_COMBINATION &&
+ !test_bit(HCI_KEEP_DEBUG_KEYS, &hdev->dev_flags)) {
+ list_del(&key->list);
+ kfree(key);
+ } else if (conn) {
+ if (persistent)
+ clear_bit(HCI_CONN_FLUSH_KEY, &conn->flags);
+ else
+ set_bit(HCI_CONN_FLUSH_KEY, &conn->flags);
+ }
+
+unlock:
hci_dev_unlock(hdev);
}
@@ -3197,7 +3323,6 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev,
{
struct inquiry_data data;
int num_rsp = *((__u8 *) skb->data);
- bool name_known, ssp;
BT_DBG("%s num_rsp %d", hdev->name, num_rsp);
@@ -3214,6 +3339,8 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev,
info = (void *) (skb->data + 1);
for (; num_rsp; num_rsp--, info++) {
+ u32 flags;
+
bacpy(&data.bdaddr, &info->bdaddr);
data.pscan_rep_mode = info->pscan_rep_mode;
data.pscan_period_mode = info->pscan_period_mode;
@@ -3223,16 +3350,18 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev,
data.rssi = info->rssi;
data.ssp_mode = 0x00;
- name_known = hci_inquiry_cache_update(hdev, &data,
- false, &ssp);
+ flags = hci_inquiry_cache_update(hdev, &data, false);
+
mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00,
info->dev_class, info->rssi,
- !name_known, ssp, NULL, 0, NULL, 0);
+ flags, NULL, 0, NULL, 0);
}
} else {
struct inquiry_info_with_rssi *info = (void *) (skb->data + 1);
for (; num_rsp; num_rsp--, info++) {
+ u32 flags;
+
bacpy(&data.bdaddr, &info->bdaddr);
data.pscan_rep_mode = info->pscan_rep_mode;
data.pscan_period_mode = info->pscan_period_mode;
@@ -3241,11 +3370,12 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev,
data.clock_offset = info->clock_offset;
data.rssi = info->rssi;
data.ssp_mode = 0x00;
- name_known = hci_inquiry_cache_update(hdev, &data,
- false, &ssp);
+
+ flags = hci_inquiry_cache_update(hdev, &data, false);
+
mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00,
info->dev_class, info->rssi,
- !name_known, ssp, NULL, 0, NULL, 0);
+ flags, NULL, 0, NULL, 0);
}
}
@@ -3348,6 +3478,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
hci_conn_add_sysfs(conn);
break;
+ case 0x10: /* Connection Accept Timeout */
case 0x0d: /* Connection Rejected due to Limited Resources */
case 0x11: /* Unsupported Feature or Parameter Value */
case 0x1c: /* SCO interval rejected */
@@ -3411,7 +3542,8 @@ static void hci_extended_inquiry_result_evt(struct hci_dev *hdev,
hci_dev_lock(hdev);
for (; num_rsp; num_rsp--, info++) {
- bool name_known, ssp;
+ u32 flags;
+ bool name_known;
bacpy(&data.bdaddr, &info->bdaddr);
data.pscan_rep_mode = info->pscan_rep_mode;
@@ -3429,12 +3561,13 @@ static void hci_extended_inquiry_result_evt(struct hci_dev *hdev,
else
name_known = true;
- name_known = hci_inquiry_cache_update(hdev, &data, name_known,
- &ssp);
+ flags = hci_inquiry_cache_update(hdev, &data, name_known);
+
eir_len = eir_get_length(info->data, sizeof(info->data));
+
mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00,
- info->dev_class, info->rssi, !name_known,
- ssp, info->data, eir_len, NULL, 0);
+ info->dev_class, info->rssi,
+ flags, info->data, eir_len, NULL, 0);
}
hci_dev_unlock(hdev);
@@ -3526,7 +3659,11 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
if (!test_bit(HCI_MGMT, &hdev->dev_flags))
goto unlock;
- if (test_bit(HCI_PAIRABLE, &hdev->dev_flags) ||
+ /* Allow pairing if we're pairable, the initiators of the
+ * pairing or if the remote is not requesting bonding.
+ */
+ if (test_bit(HCI_BONDABLE, &hdev->dev_flags) ||
+ test_bit(HCI_CONN_AUTH_INITIATOR, &conn->flags) ||
(conn->remote_auth & ~0x01) == HCI_AT_NO_BONDING) {
struct hci_cp_io_capability_reply cp;
@@ -3538,23 +3675,24 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
/* If we are initiators, there is no remote information yet */
if (conn->remote_auth == 0xff) {
- cp.authentication = conn->auth_type;
-
/* Request MITM protection if our IO caps allow it
* except for the no-bonding case.
- * conn->auth_type is not updated here since
- * that might cause the user confirmation to be
- * rejected in case the remote doesn't have the
- * IO capabilities for MITM.
*/
if (conn->io_capability != HCI_IO_NO_INPUT_OUTPUT &&
- cp.authentication != HCI_AT_NO_BONDING)
- cp.authentication |= 0x01;
+ conn->auth_type != HCI_AT_NO_BONDING)
+ conn->auth_type |= 0x01;
} else {
conn->auth_type = hci_get_auth_req(conn);
- cp.authentication = conn->auth_type;
}
+ /* If we're not bondable, force one of the non-bondable
+ * authentication requirement values.
+ */
+ if (!test_bit(HCI_BONDABLE, &hdev->dev_flags))
+ conn->auth_type &= HCI_AT_NO_BONDING_MITM;
+
+ cp.authentication = conn->auth_type;
+
if (hci_find_remote_oob_data(hdev, &conn->dst) &&
(conn->out || test_bit(HCI_CONN_REMOTE_OOB, &conn->flags)))
cp.oob_data = 0x01;
@@ -3621,9 +3759,12 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev,
rem_mitm = (conn->remote_auth & 0x01);
/* If we require MITM but the remote device can't provide that
- * (it has NoInputNoOutput) then reject the confirmation request
+ * (it has NoInputNoOutput) then reject the confirmation
+ * request. We check the security level here since it doesn't
+ * necessarily match conn->auth_type.
*/
- if (loc_mitm && conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT) {
+ if (conn->pending_sec_level > BT_SECURITY_MEDIUM &&
+ conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT) {
BT_DBG("Rejecting request: remote device can't provide MITM");
hci_send_cmd(hdev, HCI_OP_USER_CONFIRM_NEG_REPLY,
sizeof(ev->bdaddr), &ev->bdaddr);
@@ -3637,9 +3778,11 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev,
/* If we're not the initiators request authorization to
* proceed from user space (mgmt_user_confirm with
* confirm_hint set to 1). The exception is if neither
- * side had MITM in which case we do auto-accept.
+ * side had MITM or if the local IO capability is
+ * NoInputNoOutput, in which case we do auto-accept
*/
if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags) &&
+ conn->io_capability != HCI_IO_NO_INPUT_OUTPUT &&
(loc_mitm || rem_mitm)) {
BT_DBG("Confirming auto-accept as acceptor");
confirm_hint = 1;
@@ -3753,14 +3896,16 @@ static void hci_simple_pair_complete_evt(struct hci_dev *hdev,
if (!conn)
goto unlock;
+ /* Reset the authentication requirement to unknown */
+ conn->remote_auth = 0xff;
+
/* To avoid duplicate auth_failed events to user space we check
* the HCI_CONN_AUTH_PEND flag which will be set if we
* initiated the authentication. A traditional auth_complete
* event gets always produced as initiator and is also mapped to
* the mgmt_auth_failed event */
if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags) && ev->status)
- mgmt_auth_failed(hdev, &conn->dst, conn->type, conn->dst_type,
- ev->status);
+ mgmt_auth_failed(conn, ev->status);
hci_conn_drop(conn);
@@ -3967,16 +4112,23 @@ static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev,
static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_ev_le_conn_complete *ev = (void *) skb->data;
+ struct hci_conn_params *params;
struct hci_conn *conn;
struct smp_irk *irk;
+ u8 addr_type;
BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
hci_dev_lock(hdev);
+ /* All controllers implicitly stop advertising in the event of a
+ * connection, so ensure that the state bit is cleared.
+ */
+ clear_bit(HCI_LE_ADV, &hdev->dev_flags);
+
conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
if (!conn) {
- conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr);
+ conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr, ev->role);
if (!conn) {
BT_ERR("No memory for new connection");
goto unlock;
@@ -3984,11 +4136,6 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
conn->dst_type = ev->bdaddr_type;
- if (ev->role == LE_CONN_ROLE_MASTER) {
- conn->out = true;
- conn->link_mode |= HCI_LM_MASTER;
- }
-
/* If we didn't have a hci_conn object previously
* but we're in master role this must be something
* initiated using a white list. Since white list based
@@ -4025,6 +4172,14 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
conn->init_addr_type = ev->bdaddr_type;
bacpy(&conn->init_addr, &ev->bdaddr);
+
+ /* For incoming connections, set the default minimum
+ * and maximum connection interval. They will be used
+ * to check if the parameters are in range and if not
+ * trigger the connection update procedure.
+ */
+ conn->le_conn_min_interval = hdev->le_conn_min_interval;
+ conn->le_conn_max_interval = hdev->le_conn_max_interval;
}
/* Lookup the identity address from the stored connection
@@ -4047,6 +4202,17 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
goto unlock;
}
+ if (conn->dst_type == ADDR_LE_DEV_PUBLIC)
+ addr_type = BDADDR_LE_PUBLIC;
+ else
+ addr_type = BDADDR_LE_RANDOM;
+
+ /* Drop the connection if the device is blocked */
+ if (hci_bdaddr_list_lookup(&hdev->blacklist, &conn->dst, addr_type)) {
+ hci_conn_drop(conn);
+ goto unlock;
+ }
+
if (!test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags))
mgmt_device_connected(hdev, &conn->dst, conn->type,
conn->dst_type, 0, NULL, 0, NULL);
@@ -4055,42 +4221,115 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
conn->handle = __le16_to_cpu(ev->handle);
conn->state = BT_CONNECTED;
- if (test_bit(HCI_6LOWPAN_ENABLED, &hdev->dev_flags))
- set_bit(HCI_CONN_6LOWPAN, &conn->flags);
+ conn->le_conn_interval = le16_to_cpu(ev->interval);
+ conn->le_conn_latency = le16_to_cpu(ev->latency);
+ conn->le_supv_timeout = le16_to_cpu(ev->supervision_timeout);
hci_conn_add_sysfs(conn);
hci_proto_connect_cfm(conn, ev->status);
- hci_pend_le_conn_del(hdev, &conn->dst, conn->dst_type);
+ params = hci_pend_le_action_lookup(&hdev->pend_le_conns, &conn->dst,
+ conn->dst_type);
+ if (params) {
+ list_del_init(&params->action);
+ if (params->conn) {
+ hci_conn_drop(params->conn);
+ hci_conn_put(params->conn);
+ params->conn = NULL;
+ }
+ }
unlock:
+ hci_update_background_scan(hdev);
+ hci_dev_unlock(hdev);
+}
+
+static void hci_le_conn_update_complete_evt(struct hci_dev *hdev,
+ struct sk_buff *skb)
+{
+ struct hci_ev_le_conn_update_complete *ev = (void *) skb->data;
+ struct hci_conn *conn;
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
+
+ if (ev->status)
+ return;
+
+ hci_dev_lock(hdev);
+
+ conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
+ if (conn) {
+ conn->le_conn_interval = le16_to_cpu(ev->interval);
+ conn->le_conn_latency = le16_to_cpu(ev->latency);
+ conn->le_supv_timeout = le16_to_cpu(ev->supervision_timeout);
+ }
+
hci_dev_unlock(hdev);
}
/* This function requires the caller holds hdev->lock */
static void check_pending_le_conn(struct hci_dev *hdev, bdaddr_t *addr,
- u8 addr_type)
+ u8 addr_type, u8 adv_type)
{
struct hci_conn *conn;
- struct smp_irk *irk;
+ struct hci_conn_params *params;
+
+ /* If the event is not connectable don't proceed further */
+ if (adv_type != LE_ADV_IND && adv_type != LE_ADV_DIRECT_IND)
+ return;
+
+ /* Ignore if the device is blocked */
+ if (hci_bdaddr_list_lookup(&hdev->blacklist, addr, addr_type))
+ return;
- /* If this is a resolvable address, we should resolve it and then
- * update address and address type variables.
+ /* Most controller will fail if we try to create new connections
+ * while we have an existing one in slave role.
*/
- irk = hci_get_irk(hdev, addr, addr_type);
- if (irk) {
- addr = &irk->bdaddr;
- addr_type = irk->addr_type;
- }
+ if (hdev->conn_hash.le_num_slave > 0)
+ return;
- if (!hci_pend_le_conn_lookup(hdev, addr, addr_type))
+ /* If we're not connectable only connect devices that we have in
+ * our pend_le_conns list.
+ */
+ params = hci_pend_le_action_lookup(&hdev->pend_le_conns,
+ addr, addr_type);
+ if (!params)
return;
+ switch (params->auto_connect) {
+ case HCI_AUTO_CONN_DIRECT:
+ /* Only devices advertising with ADV_DIRECT_IND are
+ * triggering a connection attempt. This is allowing
+ * incoming connections from slave devices.
+ */
+ if (adv_type != LE_ADV_DIRECT_IND)
+ return;
+ break;
+ case HCI_AUTO_CONN_ALWAYS:
+ /* Devices advertising with ADV_IND or ADV_DIRECT_IND
+ * are triggering a connection attempt. This means
+ * that incoming connectioms from slave device are
+ * accepted and also outgoing connections to slave
+ * devices are established when found.
+ */
+ break;
+ default:
+ return;
+ }
+
conn = hci_connect_le(hdev, addr, addr_type, BT_SECURITY_LOW,
- HCI_AT_NO_BONDING);
- if (!IS_ERR(conn))
+ HCI_LE_AUTOCONN_TIMEOUT, HCI_ROLE_MASTER);
+ if (!IS_ERR(conn)) {
+ /* Store the pointer since we don't really have any
+ * other owner of the object besides the params that
+ * triggered it. This way we can abort the connection if
+ * the parameters get removed and keep the reference
+ * count consistent once the connection is established.
+ */
+ params->conn = hci_conn_get(conn);
return;
+ }
switch (PTR_ERR(conn)) {
case -EBUSY:
@@ -4109,15 +4348,62 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
u8 bdaddr_type, s8 rssi, u8 *data, u8 len)
{
struct discovery_state *d = &hdev->discovery;
+ struct smp_irk *irk;
bool match;
+ u32 flags;
+
+ /* Check if we need to convert to identity address */
+ irk = hci_get_irk(hdev, bdaddr, bdaddr_type);
+ if (irk) {
+ bdaddr = &irk->bdaddr;
+ bdaddr_type = irk->addr_type;
+ }
- /* Passive scanning shouldn't trigger any device found events */
+ /* Check if we have been requested to connect to this device */
+ check_pending_le_conn(hdev, bdaddr, bdaddr_type, type);
+
+ /* Passive scanning shouldn't trigger any device found events,
+ * except for devices marked as CONN_REPORT for which we do send
+ * device found events.
+ */
if (hdev->le_scan_type == LE_SCAN_PASSIVE) {
- if (type == LE_ADV_IND || type == LE_ADV_DIRECT_IND)
- check_pending_le_conn(hdev, bdaddr, bdaddr_type);
+ if (type == LE_ADV_DIRECT_IND)
+ return;
+
+ if (!hci_pend_le_action_lookup(&hdev->pend_le_reports,
+ bdaddr, bdaddr_type))
+ return;
+
+ if (type == LE_ADV_NONCONN_IND || type == LE_ADV_SCAN_IND)
+ flags = MGMT_DEV_FOUND_NOT_CONNECTABLE;
+ else
+ flags = 0;
+ mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL,
+ rssi, flags, data, len, NULL, 0);
return;
}
+ /* When receiving non-connectable or scannable undirected
+ * advertising reports, this means that the remote device is
+ * not connectable and then clearly indicate this in the
+ * device found event.
+ *
+ * When receiving a scan response, then there is no way to
+ * know if the remote device is connectable or not. However
+ * since scan responses are merged with a previously seen
+ * advertising report, the flags field from that report
+ * will be used.
+ *
+ * In the really unlikely case that a controller get confused
+ * and just sends a scan response event, then it is marked as
+ * not connectable as well.
+ */
+ if (type == LE_ADV_NONCONN_IND || type == LE_ADV_SCAN_IND ||
+ type == LE_ADV_SCAN_RSP)
+ flags = MGMT_DEV_FOUND_NOT_CONNECTABLE;
+ else
+ flags = 0;
+
/* If there's nothing pending either store the data from this
* event or send an immediate device found event if the data
* should not be stored for later.
@@ -4128,12 +4414,12 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
*/
if (type == LE_ADV_IND || type == LE_ADV_SCAN_IND) {
store_pending_adv_report(hdev, bdaddr, bdaddr_type,
- rssi, data, len);
+ rssi, flags, data, len);
return;
}
mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL,
- rssi, 0, 1, data, len, NULL, 0);
+ rssi, flags, data, len, NULL, 0);
return;
}
@@ -4150,7 +4436,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
if (!match)
mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK,
d->last_adv_addr_type, NULL,
- d->last_adv_rssi, 0, 1,
+ d->last_adv_rssi, d->last_adv_flags,
d->last_adv_data,
d->last_adv_data_len, NULL, 0);
@@ -4159,7 +4445,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
*/
if (type == LE_ADV_IND || type == LE_ADV_SCAN_IND) {
store_pending_adv_report(hdev, bdaddr, bdaddr_type,
- rssi, data, len);
+ rssi, flags, data, len);
return;
}
@@ -4168,7 +4454,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
*/
clear_pending_adv_report(hdev);
mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL,
- rssi, 0, 1, data, len, NULL, 0);
+ rssi, flags, data, len, NULL, 0);
return;
}
@@ -4177,8 +4463,8 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
* sending a merged device found event.
*/
mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK,
- d->last_adv_addr_type, NULL, rssi, 0, 1, data, len,
- d->last_adv_data, d->last_adv_data_len);
+ d->last_adv_addr_type, NULL, rssi, d->last_adv_flags,
+ d->last_adv_data, d->last_adv_data_len, data, len);
clear_pending_adv_report(hdev);
}
@@ -4219,17 +4505,14 @@ static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
if (conn == NULL)
goto not_found;
- ltk = hci_find_ltk(hdev, ev->ediv, ev->rand, conn->out);
+ ltk = hci_find_ltk(hdev, ev->ediv, ev->rand, conn->role);
if (ltk == NULL)
goto not_found;
memcpy(cp.ltk, ltk->val, sizeof(ltk->val));
cp.handle = cpu_to_le16(conn->handle);
- if (ltk->authenticated)
- conn->pending_sec_level = BT_SECURITY_HIGH;
- else
- conn->pending_sec_level = BT_SECURITY_MEDIUM;
+ conn->pending_sec_level = smp_ltk_sec_level(ltk);
conn->enc_key_size = ltk->enc_size;
@@ -4241,9 +4524,12 @@ static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
* distribute the keys. Later, security can be re-established
* using a distributed LTK.
*/
- if (ltk->type == HCI_SMP_STK_SLAVE) {
+ if (ltk->type == SMP_STK) {
+ set_bit(HCI_CONN_STK_ENCRYPT, &conn->flags);
list_del(&ltk->list);
kfree(ltk);
+ } else {
+ clear_bit(HCI_CONN_STK_ENCRYPT, &conn->flags);
}
hci_dev_unlock(hdev);
@@ -4256,6 +4542,76 @@ not_found:
hci_dev_unlock(hdev);
}
+static void send_conn_param_neg_reply(struct hci_dev *hdev, u16 handle,
+ u8 reason)
+{
+ struct hci_cp_le_conn_param_req_neg_reply cp;
+
+ cp.handle = cpu_to_le16(handle);
+ cp.reason = reason;
+
+ hci_send_cmd(hdev, HCI_OP_LE_CONN_PARAM_REQ_NEG_REPLY, sizeof(cp),
+ &cp);
+}
+
+static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev,
+ struct sk_buff *skb)
+{
+ struct hci_ev_le_remote_conn_param_req *ev = (void *) skb->data;
+ struct hci_cp_le_conn_param_req_reply cp;
+ struct hci_conn *hcon;
+ u16 handle, min, max, latency, timeout;
+
+ handle = le16_to_cpu(ev->handle);
+ min = le16_to_cpu(ev->interval_min);
+ max = le16_to_cpu(ev->interval_max);
+ latency = le16_to_cpu(ev->latency);
+ timeout = le16_to_cpu(ev->timeout);
+
+ hcon = hci_conn_hash_lookup_handle(hdev, handle);
+ if (!hcon || hcon->state != BT_CONNECTED)
+ return send_conn_param_neg_reply(hdev, handle,
+ HCI_ERROR_UNKNOWN_CONN_ID);
+
+ if (hci_check_conn_params(min, max, latency, timeout))
+ return send_conn_param_neg_reply(hdev, handle,
+ HCI_ERROR_INVALID_LL_PARAMS);
+
+ if (hcon->role == HCI_ROLE_MASTER) {
+ struct hci_conn_params *params;
+ u8 store_hint;
+
+ hci_dev_lock(hdev);
+
+ params = hci_conn_params_lookup(hdev, &hcon->dst,
+ hcon->dst_type);
+ if (params) {
+ params->conn_min_interval = min;
+ params->conn_max_interval = max;
+ params->conn_latency = latency;
+ params->supervision_timeout = timeout;
+ store_hint = 0x01;
+ } else{
+ store_hint = 0x00;
+ }
+
+ hci_dev_unlock(hdev);
+
+ mgmt_new_conn_param(hdev, &hcon->dst, hcon->dst_type,
+ store_hint, min, max, latency, timeout);
+ }
+
+ cp.handle = ev->handle;
+ cp.interval_min = ev->interval_min;
+ cp.interval_max = ev->interval_max;
+ cp.latency = ev->latency;
+ cp.timeout = ev->timeout;
+ cp.min_ce_len = 0;
+ cp.max_ce_len = 0;
+
+ hci_send_cmd(hdev, HCI_OP_LE_CONN_PARAM_REQ_REPLY, sizeof(cp), &cp);
+}
+
static void hci_le_meta_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_ev_le_meta *le_ev = (void *) skb->data;
@@ -4267,6 +4623,10 @@ static void hci_le_meta_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_le_conn_complete_evt(hdev, skb);
break;
+ case HCI_EV_LE_CONN_UPDATE_COMPLETE:
+ hci_le_conn_update_complete_evt(hdev, skb);
+ break;
+
case HCI_EV_LE_ADVERTISING_REPORT:
hci_le_adv_report_evt(hdev, skb);
break;
@@ -4275,6 +4635,10 @@ static void hci_le_meta_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_le_ltk_request_evt(hdev, skb);
break;
+ case HCI_EV_LE_REMOTE_CONN_PARAM_REQ:
+ hci_le_remote_conn_param_req_evt(hdev, skb);
+ break;
+
default:
break;
}
@@ -4306,7 +4670,7 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
/* Received events are (currently) only needed when a request is
* ongoing so avoid unnecessary memory allocation.
*/
- if (hdev->req_status == HCI_REQ_PEND) {
+ if (hci_req_pending(hdev)) {
kfree_skb(hdev->recv_evt);
hdev->recv_evt = skb_clone(skb, GFP_KERNEL);
}
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 80d25c150a65..115f149362ba 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -35,13 +35,32 @@ static atomic_t monitor_promisc = ATOMIC_INIT(0);
/* ----- HCI socket interface ----- */
+/* Socket info */
+#define hci_pi(sk) ((struct hci_pinfo *) sk)
+
+struct hci_pinfo {
+ struct bt_sock bt;
+ struct hci_dev *hdev;
+ struct hci_filter filter;
+ __u32 cmsg_mask;
+ unsigned short channel;
+};
+
static inline int hci_test_bit(int nr, void *addr)
{
return *((__u32 *) addr + (nr >> 5)) & ((__u32) 1 << (nr & 31));
}
/* Security filter */
-static struct hci_sec_filter hci_sec_filter = {
+#define HCI_SFLT_MAX_OGF 5
+
+struct hci_sec_filter {
+ __u32 type_mask;
+ __u32 event_mask[2];
+ __u32 ocf_mask[HCI_SFLT_MAX_OGF + 1][4];
+};
+
+static const struct hci_sec_filter hci_sec_filter = {
/* Packet types */
0x10,
/* Events */
@@ -481,7 +500,7 @@ static int hci_sock_blacklist_add(struct hci_dev *hdev, void __user *arg)
hci_dev_lock(hdev);
- err = hci_blacklist_add(hdev, &bdaddr, BDADDR_BREDR);
+ err = hci_bdaddr_list_add(&hdev->blacklist, &bdaddr, BDADDR_BREDR);
hci_dev_unlock(hdev);
@@ -498,7 +517,7 @@ static int hci_sock_blacklist_del(struct hci_dev *hdev, void __user *arg)
hci_dev_lock(hdev);
- err = hci_blacklist_del(hdev, &bdaddr, BDADDR_BREDR);
+ err = hci_bdaddr_list_del(&hdev->blacklist, &bdaddr, BDADDR_BREDR);
hci_dev_unlock(hdev);
@@ -517,6 +536,9 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd,
if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags))
return -EBUSY;
+ if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags))
+ return -EOPNOTSUPP;
+
if (hdev->dev_type != HCI_BREDR)
return -EOPNOTSUPP;
@@ -690,7 +712,8 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
if (test_bit(HCI_UP, &hdev->flags) ||
test_bit(HCI_INIT, &hdev->flags) ||
- test_bit(HCI_SETUP, &hdev->dev_flags)) {
+ test_bit(HCI_SETUP, &hdev->dev_flags) ||
+ test_bit(HCI_CONFIG, &hdev->dev_flags)) {
err = -EBUSY;
hci_dev_put(hdev);
goto done;
@@ -960,7 +983,7 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
goto drop;
}
- if (test_bit(HCI_RAW, &hdev->flags) || (ogf == 0x3f)) {
+ if (ogf == 0x3f) {
skb_queue_tail(&hdev->raw_q, skb);
queue_work(hdev->workqueue, &hdev->tx_work);
} else {
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 8181ea4bc2f2..1b7d605706aa 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -154,7 +154,7 @@ static int hidp_input_event(struct input_dev *dev, unsigned int type,
(!!test_bit(LED_COMPOSE, dev->led) << 3) |
(!!test_bit(LED_SCROLLL, dev->led) << 2) |
(!!test_bit(LED_CAPSL, dev->led) << 1) |
- (!!test_bit(LED_NUML, dev->led));
+ (!!test_bit(LED_NUML, dev->led) << 0);
if (session->leds == newleds)
return 0;
@@ -915,7 +915,7 @@ static int hidp_session_new(struct hidp_session **out, const bdaddr_t *bdaddr,
/* connection management */
bacpy(&session->bdaddr, bdaddr);
- session->conn = conn;
+ session->conn = l2cap_conn_get(conn);
session->user.probe = hidp_session_probe;
session->user.remove = hidp_session_remove;
session->ctrl_sock = ctrl_sock;
@@ -941,13 +941,13 @@ static int hidp_session_new(struct hidp_session **out, const bdaddr_t *bdaddr,
if (ret)
goto err_free;
- l2cap_conn_get(session->conn);
get_file(session->intr_sock->file);
get_file(session->ctrl_sock->file);
*out = session;
return 0;
err_free:
+ l2cap_conn_put(session->conn);
kfree(session);
return ret;
}
@@ -1327,10 +1327,8 @@ int hidp_connection_add(struct hidp_connadd_req *req,
conn = NULL;
l2cap_chan_lock(chan);
- if (chan->conn) {
- l2cap_conn_get(chan->conn);
- conn = chan->conn;
- }
+ if (chan->conn)
+ conn = l2cap_conn_get(chan->conn);
l2cap_chan_unlock(chan);
if (!conn)
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 323f23cd2c37..b6f9777e057d 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -40,14 +40,13 @@
#include "smp.h"
#include "a2mp.h"
#include "amp.h"
-#include "6lowpan.h"
#define LE_FLOWCTL_MAX_CREDITS 65535
bool disable_ertm;
static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN | L2CAP_FEAT_UCD;
-static u8 l2cap_fixed_chan[8] = { L2CAP_FC_L2CAP | L2CAP_FC_CONNLESS, };
+static u8 l2cap_fixed_chan[8] = { L2CAP_FC_SIG_BREDR | L2CAP_FC_CONNLESS, };
static LIST_HEAD(chan_list);
static DEFINE_RWLOCK(chan_list_lock);
@@ -205,11 +204,16 @@ done:
write_unlock(&chan_list_lock);
return err;
}
+EXPORT_SYMBOL_GPL(l2cap_add_psm);
int l2cap_add_scid(struct l2cap_chan *chan, __u16 scid)
{
write_lock(&chan_list_lock);
+ /* Override the defaults (which are for conn-oriented) */
+ chan->omtu = L2CAP_DEFAULT_MTU;
+ chan->chan_type = L2CAP_CHAN_FIXED;
+
chan->scid = scid;
write_unlock(&chan_list_lock);
@@ -437,6 +441,7 @@ struct l2cap_chan *l2cap_chan_create(void)
return chan;
}
+EXPORT_SYMBOL_GPL(l2cap_chan_create);
static void l2cap_chan_destroy(struct kref *kref)
{
@@ -464,6 +469,7 @@ void l2cap_chan_put(struct l2cap_chan *c)
kref_put(&c->kref, l2cap_chan_destroy);
}
+EXPORT_SYMBOL_GPL(l2cap_chan_put);
void l2cap_chan_set_defaults(struct l2cap_chan *chan)
{
@@ -482,6 +488,7 @@ void l2cap_chan_set_defaults(struct l2cap_chan *chan)
set_bit(FLAG_FORCE_ACTIVE, &chan->flags);
}
+EXPORT_SYMBOL_GPL(l2cap_chan_set_defaults);
static void l2cap_le_flowctl_init(struct l2cap_chan *chan)
{
@@ -539,7 +546,10 @@ void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan)
l2cap_chan_hold(chan);
- hci_conn_hold(conn->hcon);
+ /* Only keep a reference for fixed channels if they requested it */
+ if (chan->chan_type != L2CAP_CHAN_FIXED ||
+ test_bit(FLAG_HOLD_HCI_CONN, &chan->flags))
+ hci_conn_hold(conn->hcon);
list_add(&chan->list, &conn->chan_l);
}
@@ -559,6 +569,8 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err)
BT_DBG("chan %p, conn %p, err %d", chan, conn, err);
+ chan->ops->teardown(chan, err);
+
if (conn) {
struct amp_mgr *mgr = conn->hcon->amp_mgr;
/* Delete from channel list */
@@ -568,7 +580,12 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err)
chan->conn = NULL;
- if (chan->scid != L2CAP_CID_A2MP)
+ /* Reference was only held for non-fixed channels or
+ * fixed channels that explicitly requested it using the
+ * FLAG_HOLD_HCI_CONN flag.
+ */
+ if (chan->chan_type != L2CAP_CHAN_FIXED ||
+ test_bit(FLAG_HOLD_HCI_CONN, &chan->flags))
hci_conn_drop(conn->hcon);
if (mgr && mgr->bredr_chan == chan)
@@ -582,8 +599,6 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err)
amp_disconnect_logical_link(hs_hchan);
}
- chan->ops->teardown(chan, err);
-
if (test_bit(CONF_NOT_COMPLETE, &chan->conf_state))
return;
@@ -614,10 +629,13 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err)
return;
}
+EXPORT_SYMBOL_GPL(l2cap_chan_del);
-void l2cap_conn_update_id_addr(struct hci_conn *hcon)
+static void l2cap_conn_update_id_addr(struct work_struct *work)
{
- struct l2cap_conn *conn = hcon->l2cap_data;
+ struct l2cap_conn *conn = container_of(work, struct l2cap_conn,
+ id_addr_update_work);
+ struct hci_conn *hcon = conn->hcon;
struct l2cap_chan *chan;
mutex_lock(&conn->chan_lock);
@@ -717,6 +735,7 @@ void l2cap_chan_close(struct l2cap_chan *chan, int reason)
break;
}
}
+EXPORT_SYMBOL(l2cap_chan_close);
static inline u8 l2cap_get_auth_type(struct l2cap_chan *chan)
{
@@ -770,7 +789,7 @@ static inline u8 l2cap_get_auth_type(struct l2cap_chan *chan)
}
/* Service level security */
-int l2cap_chan_check_security(struct l2cap_chan *chan)
+int l2cap_chan_check_security(struct l2cap_chan *chan, bool initiator)
{
struct l2cap_conn *conn = chan->conn;
__u8 auth_type;
@@ -780,7 +799,8 @@ int l2cap_chan_check_security(struct l2cap_chan *chan)
auth_type = l2cap_get_auth_type(chan);
- return hci_conn_security(conn->hcon, chan->sec_level, auth_type);
+ return hci_conn_security(conn->hcon, chan->sec_level, auth_type,
+ initiator);
}
static u8 l2cap_get_ident(struct l2cap_conn *conn)
@@ -793,14 +813,14 @@ static u8 l2cap_get_ident(struct l2cap_conn *conn)
* 200 - 254 are used by utilities like l2ping, etc.
*/
- spin_lock(&conn->lock);
+ mutex_lock(&conn->ident_lock);
if (++conn->tx_ident > 128)
conn->tx_ident = 1;
id = conn->tx_ident;
- spin_unlock(&conn->lock);
+ mutex_unlock(&conn->ident_lock);
return id;
}
@@ -1076,6 +1096,9 @@ static void l2cap_send_rr_or_rnr(struct l2cap_chan *chan, bool poll)
static inline int __l2cap_no_conn_pending(struct l2cap_chan *chan)
{
+ if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED)
+ return true;
+
return !test_bit(CONF_CONNECT_PEND, &chan->conf_state);
}
@@ -1260,6 +1283,24 @@ static void l2cap_start_connection(struct l2cap_chan *chan)
}
}
+static void l2cap_request_info(struct l2cap_conn *conn)
+{
+ struct l2cap_info_req req;
+
+ if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT)
+ return;
+
+ req.type = cpu_to_le16(L2CAP_IT_FEAT_MASK);
+
+ conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT;
+ conn->info_ident = l2cap_get_ident(conn);
+
+ schedule_delayed_work(&conn->info_timer, L2CAP_INFO_TIMEOUT);
+
+ l2cap_send_cmd(conn, conn->info_ident, L2CAP_INFO_REQ,
+ sizeof(req), &req);
+}
+
static void l2cap_do_start(struct l2cap_chan *chan)
{
struct l2cap_conn *conn = chan->conn;
@@ -1269,26 +1310,17 @@ static void l2cap_do_start(struct l2cap_chan *chan)
return;
}
- if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) {
- if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE))
- return;
-
- if (l2cap_chan_check_security(chan) &&
- __l2cap_no_conn_pending(chan)) {
- l2cap_start_connection(chan);
- }
- } else {
- struct l2cap_info_req req;
- req.type = cpu_to_le16(L2CAP_IT_FEAT_MASK);
-
- conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT;
- conn->info_ident = l2cap_get_ident(conn);
+ if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT)) {
+ l2cap_request_info(conn);
+ return;
+ }
- schedule_delayed_work(&conn->info_timer, L2CAP_INFO_TIMEOUT);
+ if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE))
+ return;
- l2cap_send_cmd(conn, conn->info_ident, L2CAP_INFO_REQ,
- sizeof(req), &req);
- }
+ if (l2cap_chan_check_security(chan, true) &&
+ __l2cap_no_conn_pending(chan))
+ l2cap_start_connection(chan);
}
static inline int l2cap_mode_supported(__u8 mode, __u32 feat_mask)
@@ -1347,12 +1379,13 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
l2cap_chan_lock(chan);
if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED) {
+ l2cap_chan_ready(chan);
l2cap_chan_unlock(chan);
continue;
}
if (chan->state == BT_CONNECT) {
- if (!l2cap_chan_check_security(chan) ||
+ if (!l2cap_chan_check_security(chan, true) ||
!__l2cap_no_conn_pending(chan)) {
l2cap_chan_unlock(chan);
continue;
@@ -1374,7 +1407,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
rsp.scid = cpu_to_le16(chan->dcid);
rsp.dcid = cpu_to_le16(chan->scid);
- if (l2cap_chan_check_security(chan)) {
+ if (l2cap_chan_check_security(chan, false)) {
if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) {
rsp.result = cpu_to_le16(L2CAP_CR_PEND);
rsp.status = cpu_to_le16(L2CAP_CS_AUTHOR_PEND);
@@ -1411,88 +1444,37 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
mutex_unlock(&conn->chan_lock);
}
-/* Find socket with cid and source/destination bdaddr.
- * Returns closest match, locked.
- */
-static struct l2cap_chan *l2cap_global_chan_by_scid(int state, u16 cid,
- bdaddr_t *src,
- bdaddr_t *dst)
-{
- struct l2cap_chan *c, *c1 = NULL;
-
- read_lock(&chan_list_lock);
-
- list_for_each_entry(c, &chan_list, global_l) {
- if (state && c->state != state)
- continue;
-
- if (c->scid == cid) {
- int src_match, dst_match;
- int src_any, dst_any;
-
- /* Exact match. */
- src_match = !bacmp(&c->src, src);
- dst_match = !bacmp(&c->dst, dst);
- if (src_match && dst_match) {
- read_unlock(&chan_list_lock);
- return c;
- }
-
- /* Closest match */
- src_any = !bacmp(&c->src, BDADDR_ANY);
- dst_any = !bacmp(&c->dst, BDADDR_ANY);
- if ((src_match && dst_any) || (src_any && dst_match) ||
- (src_any && dst_any))
- c1 = c;
- }
- }
-
- read_unlock(&chan_list_lock);
-
- return c1;
-}
-
static void l2cap_le_conn_ready(struct l2cap_conn *conn)
{
struct hci_conn *hcon = conn->hcon;
- struct l2cap_chan *chan, *pchan;
- u8 dst_type;
-
- BT_DBG("");
-
- bt_6lowpan_add_conn(conn);
-
- /* Check if we have socket listening on cid */
- pchan = l2cap_global_chan_by_scid(BT_LISTEN, L2CAP_CID_ATT,
- &hcon->src, &hcon->dst);
- if (!pchan)
- return;
-
- /* Client ATT sockets should override the server one */
- if (__l2cap_get_chan_by_dcid(conn, L2CAP_CID_ATT))
- return;
-
- dst_type = bdaddr_type(hcon, hcon->dst_type);
-
- /* If device is blocked, do not create a channel for it */
- if (hci_blacklist_lookup(hcon->hdev, &hcon->dst, dst_type))
- return;
+ struct hci_dev *hdev = hcon->hdev;
- l2cap_chan_lock(pchan);
+ BT_DBG("%s conn %p", hdev->name, conn);
- chan = pchan->ops->new_connection(pchan);
- if (!chan)
- goto clean;
+ /* For outgoing pairing which doesn't necessarily have an
+ * associated socket (e.g. mgmt_pair_device).
+ */
+ if (hcon->out)
+ smp_conn_security(hcon, hcon->pending_sec_level);
- bacpy(&chan->src, &hcon->src);
- bacpy(&chan->dst, &hcon->dst);
- chan->src_type = bdaddr_type(hcon, hcon->src_type);
- chan->dst_type = dst_type;
+ /* For LE slave connections, make sure the connection interval
+ * is in the range of the minium and maximum interval that has
+ * been configured for this connection. If not, then trigger
+ * the connection update procedure.
+ */
+ if (hcon->role == HCI_ROLE_SLAVE &&
+ (hcon->le_conn_interval < hcon->le_conn_min_interval ||
+ hcon->le_conn_interval > hcon->le_conn_max_interval)) {
+ struct l2cap_conn_param_update_req req;
- __l2cap_chan_add(conn, chan);
+ req.min = cpu_to_le16(hcon->le_conn_min_interval);
+ req.max = cpu_to_le16(hcon->le_conn_max_interval);
+ req.latency = cpu_to_le16(hcon->le_conn_latency);
+ req.to_multiplier = cpu_to_le16(hcon->le_supv_timeout);
-clean:
- l2cap_chan_unlock(pchan);
+ l2cap_send_cmd(conn, l2cap_get_ident(conn),
+ L2CAP_CONN_PARAM_UPDATE_REQ, sizeof(req), &req);
+ }
}
static void l2cap_conn_ready(struct l2cap_conn *conn)
@@ -1502,17 +1484,11 @@ static void l2cap_conn_ready(struct l2cap_conn *conn)
BT_DBG("conn %p", conn);
- /* For outgoing pairing which doesn't necessarily have an
- * associated socket (e.g. mgmt_pair_device).
- */
- if (hcon->out && hcon->type == LE_LINK)
- smp_conn_security(hcon, hcon->pending_sec_level);
+ if (hcon->type == ACL_LINK)
+ l2cap_request_info(conn);
mutex_lock(&conn->chan_lock);
- if (hcon->type == LE_LINK)
- l2cap_le_conn_ready(conn);
-
list_for_each_entry(chan, &conn->chan_l, list) {
l2cap_chan_lock(chan);
@@ -1525,8 +1501,8 @@ static void l2cap_conn_ready(struct l2cap_conn *conn)
if (hcon->type == LE_LINK) {
l2cap_le_start(chan);
} else if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED) {
- l2cap_chan_ready(chan);
-
+ if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE)
+ l2cap_chan_ready(chan);
} else if (chan->state == BT_CONNECT) {
l2cap_do_start(chan);
}
@@ -1536,6 +1512,9 @@ static void l2cap_conn_ready(struct l2cap_conn *conn)
mutex_unlock(&conn->chan_lock);
+ if (hcon->type == LE_LINK)
+ l2cap_le_conn_ready(conn);
+
queue_work(hcon->hdev->workqueue, &conn->pending_rx_work);
}
@@ -1671,8 +1650,14 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err)
if (work_pending(&conn->pending_rx_work))
cancel_work_sync(&conn->pending_rx_work);
+ if (work_pending(&conn->id_addr_update_work))
+ cancel_work_sync(&conn->id_addr_update_work);
+
l2cap_unregister_all_users(conn);
+ /* Force the connection to be immediately dropped */
+ hcon->disc_timeout = 0;
+
mutex_lock(&conn->chan_lock);
/* Kill channels */
@@ -1695,29 +1680,11 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err)
if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT)
cancel_delayed_work_sync(&conn->info_timer);
- if (test_and_clear_bit(HCI_CONN_LE_SMP_PEND, &hcon->flags)) {
- cancel_delayed_work_sync(&conn->security_timer);
- smp_chan_destroy(conn);
- }
-
hcon->l2cap_data = NULL;
conn->hchan = NULL;
l2cap_conn_put(conn);
}
-static void security_timeout(struct work_struct *work)
-{
- struct l2cap_conn *conn = container_of(work, struct l2cap_conn,
- security_timer.work);
-
- BT_DBG("conn %p", conn);
-
- if (test_and_clear_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->flags)) {
- smp_chan_destroy(conn);
- l2cap_conn_del(conn->hcon, ETIMEDOUT);
- }
-}
-
static void l2cap_conn_free(struct kref *ref)
{
struct l2cap_conn *conn = container_of(ref, struct l2cap_conn, ref);
@@ -1726,9 +1693,10 @@ static void l2cap_conn_free(struct kref *ref)
kfree(conn);
}
-void l2cap_conn_get(struct l2cap_conn *conn)
+struct l2cap_conn *l2cap_conn_get(struct l2cap_conn *conn)
{
kref_get(&conn->ref);
+ return conn;
}
EXPORT_SYMBOL(l2cap_conn_get);
@@ -1770,6 +1738,7 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm,
src_match = !bacmp(&c->src, src);
dst_match = !bacmp(&c->dst, dst);
if (src_match && dst_match) {
+ l2cap_chan_hold(c);
read_unlock(&chan_list_lock);
return c;
}
@@ -1783,6 +1752,9 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm,
}
}
+ if (c1)
+ l2cap_chan_hold(c1);
+
read_unlock(&chan_list_lock);
return c1;
@@ -2003,10 +1975,12 @@ static void l2cap_ertm_resend(struct l2cap_chan *chan)
tx_skb->data + L2CAP_HDR_SIZE);
}
+ /* Update FCS */
if (chan->fcs == L2CAP_FCS_CRC16) {
- u16 fcs = crc16(0, (u8 *) tx_skb->data, tx_skb->len);
- put_unaligned_le16(fcs, skb_put(tx_skb,
- L2CAP_FCS_SIZE));
+ u16 fcs = crc16(0, (u8 *) tx_skb->data,
+ tx_skb->len - L2CAP_FCS_SIZE);
+ put_unaligned_le16(fcs, skb_tail_pointer(tx_skb) -
+ L2CAP_FCS_SIZE);
}
l2cap_do_send(chan, tx_skb);
@@ -2118,7 +2092,8 @@ static inline int l2cap_skbuff_fromiovec(struct l2cap_chan *chan,
struct sk_buff **frag;
int sent = 0;
- if (memcpy_fromiovec(skb_put(skb, count), msg->msg_iov, count))
+ if (chan->ops->memcpy_fromiovec(chan, skb_put(skb, count),
+ msg->msg_iov, count))
return -EFAULT;
sent += count;
@@ -2131,18 +2106,17 @@ static inline int l2cap_skbuff_fromiovec(struct l2cap_chan *chan,
count = min_t(unsigned int, conn->mtu, len);
- tmp = chan->ops->alloc_skb(chan, count,
+ tmp = chan->ops->alloc_skb(chan, 0, count,
msg->msg_flags & MSG_DONTWAIT);
if (IS_ERR(tmp))
return PTR_ERR(tmp);
*frag = tmp;
- if (memcpy_fromiovec(skb_put(*frag, count), msg->msg_iov, count))
+ if (chan->ops->memcpy_fromiovec(chan, skb_put(*frag, count),
+ msg->msg_iov, count))
return -EFAULT;
- (*frag)->priority = skb->priority;
-
sent += count;
len -= count;
@@ -2156,26 +2130,23 @@ static inline int l2cap_skbuff_fromiovec(struct l2cap_chan *chan,
}
static struct sk_buff *l2cap_create_connless_pdu(struct l2cap_chan *chan,
- struct msghdr *msg, size_t len,
- u32 priority)
+ struct msghdr *msg, size_t len)
{
struct l2cap_conn *conn = chan->conn;
struct sk_buff *skb;
int err, count, hlen = L2CAP_HDR_SIZE + L2CAP_PSMLEN_SIZE;
struct l2cap_hdr *lh;
- BT_DBG("chan %p psm 0x%2.2x len %zu priority %u", chan,
- __le16_to_cpu(chan->psm), len, priority);
+ BT_DBG("chan %p psm 0x%2.2x len %zu", chan,
+ __le16_to_cpu(chan->psm), len);
count = min_t(unsigned int, (conn->mtu - hlen), len);
- skb = chan->ops->alloc_skb(chan, count + hlen,
+ skb = chan->ops->alloc_skb(chan, hlen, count,
msg->msg_flags & MSG_DONTWAIT);
if (IS_ERR(skb))
return skb;
- skb->priority = priority;
-
/* Create L2CAP header */
lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
lh->cid = cpu_to_le16(chan->dcid);
@@ -2191,8 +2162,7 @@ static struct sk_buff *l2cap_create_connless_pdu(struct l2cap_chan *chan,
}
static struct sk_buff *l2cap_create_basic_pdu(struct l2cap_chan *chan,
- struct msghdr *msg, size_t len,
- u32 priority)
+ struct msghdr *msg, size_t len)
{
struct l2cap_conn *conn = chan->conn;
struct sk_buff *skb;
@@ -2203,13 +2173,11 @@ static struct sk_buff *l2cap_create_basic_pdu(struct l2cap_chan *chan,
count = min_t(unsigned int, (conn->mtu - L2CAP_HDR_SIZE), len);
- skb = chan->ops->alloc_skb(chan, count + L2CAP_HDR_SIZE,
+ skb = chan->ops->alloc_skb(chan, L2CAP_HDR_SIZE, count,
msg->msg_flags & MSG_DONTWAIT);
if (IS_ERR(skb))
return skb;
- skb->priority = priority;
-
/* Create L2CAP header */
lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
lh->cid = cpu_to_le16(chan->dcid);
@@ -2247,7 +2215,7 @@ static struct sk_buff *l2cap_create_iframe_pdu(struct l2cap_chan *chan,
count = min_t(unsigned int, (conn->mtu - hlen), len);
- skb = chan->ops->alloc_skb(chan, count + hlen,
+ skb = chan->ops->alloc_skb(chan, hlen, count,
msg->msg_flags & MSG_DONTWAIT);
if (IS_ERR(skb))
return skb;
@@ -2316,7 +2284,6 @@ static int l2cap_segment_sdu(struct l2cap_chan *chan,
} else {
sar = L2CAP_SAR_START;
sdu_len = len;
- pdu_len -= L2CAP_SDULEN_SIZE;
}
while (len > 0) {
@@ -2331,10 +2298,8 @@ static int l2cap_segment_sdu(struct l2cap_chan *chan,
__skb_queue_tail(seg_queue, skb);
len -= pdu_len;
- if (sdu_len) {
+ if (sdu_len)
sdu_len = 0;
- pdu_len += L2CAP_SDULEN_SIZE;
- }
if (len <= pdu_len) {
sar = L2CAP_SAR_END;
@@ -2368,7 +2333,7 @@ static struct sk_buff *l2cap_create_le_flowctl_pdu(struct l2cap_chan *chan,
count = min_t(unsigned int, (conn->mtu - hlen), len);
- skb = chan->ops->alloc_skb(chan, count + hlen,
+ skb = chan->ops->alloc_skb(chan, hlen, count,
msg->msg_flags & MSG_DONTWAIT);
if (IS_ERR(skb))
return skb;
@@ -2400,12 +2365,8 @@ static int l2cap_segment_le_sdu(struct l2cap_chan *chan,
BT_DBG("chan %p, msg %p, len %zu", chan, msg, len);
- pdu_len = chan->conn->mtu - L2CAP_HDR_SIZE;
-
- pdu_len = min_t(size_t, pdu_len, chan->remote_mps);
-
sdu_len = len;
- pdu_len -= L2CAP_SDULEN_SIZE;
+ pdu_len = chan->remote_mps - L2CAP_SDULEN_SIZE;
while (len > 0) {
if (len <= pdu_len)
@@ -2430,8 +2391,7 @@ static int l2cap_segment_le_sdu(struct l2cap_chan *chan,
return 0;
}
-int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len,
- u32 priority)
+int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len)
{
struct sk_buff *skb;
int err;
@@ -2442,7 +2402,7 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len,
/* Connectionless channel */
if (chan->chan_type == L2CAP_CHAN_CONN_LESS) {
- skb = l2cap_create_connless_pdu(chan, msg, len, priority);
+ skb = l2cap_create_connless_pdu(chan, msg, len);
if (IS_ERR(skb))
return PTR_ERR(skb);
@@ -2499,7 +2459,7 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len,
return -EMSGSIZE;
/* Create a basic PDU */
- skb = l2cap_create_basic_pdu(chan, msg, len, priority);
+ skb = l2cap_create_basic_pdu(chan, msg, len);
if (IS_ERR(skb))
return PTR_ERR(skb);
@@ -2562,6 +2522,7 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len,
return err;
}
+EXPORT_SYMBOL_GPL(l2cap_chan_send);
static void l2cap_send_srej(struct l2cap_chan *chan, u16 txseq)
{
@@ -3217,6 +3178,9 @@ done:
switch (chan->mode) {
case L2CAP_MODE_BASIC:
+ if (disable_ertm)
+ break;
+
if (!(chan->conn->feat_mask & L2CAP_FEAT_ERTM) &&
!(chan->conn->feat_mask & L2CAP_FEAT_STREAMING))
break;
@@ -3829,7 +3793,7 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn,
chan->ident = cmd->ident;
if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE) {
- if (l2cap_chan_check_security(chan)) {
+ if (l2cap_chan_check_security(chan, false)) {
if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) {
l2cap_state_change(chan, BT_CONNECT2);
result = L2CAP_CR_PEND;
@@ -3863,6 +3827,7 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn,
response:
l2cap_chan_unlock(pchan);
mutex_unlock(&conn->chan_lock);
+ l2cap_chan_put(pchan);
sendresp:
rsp.scid = cpu_to_le16(scid);
@@ -5197,27 +5162,6 @@ static inline int l2cap_move_channel_confirm_rsp(struct l2cap_conn *conn,
return 0;
}
-static inline int l2cap_check_conn_param(u16 min, u16 max, u16 latency,
- u16 to_multiplier)
-{
- u16 max_latency;
-
- if (min > max || min < 6 || max > 3200)
- return -EINVAL;
-
- if (to_multiplier < 10 || to_multiplier > 3200)
- return -EINVAL;
-
- if (max >= to_multiplier * 8)
- return -EINVAL;
-
- max_latency = (to_multiplier * 8 / max) - 1;
- if (latency > 499 || latency > max_latency)
- return -EINVAL;
-
- return 0;
-}
-
static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn,
struct l2cap_cmd_hdr *cmd,
u16 cmd_len, u8 *data)
@@ -5228,7 +5172,7 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn,
u16 min, max, latency, to_multiplier;
int err;
- if (!(hcon->link_mode & HCI_LM_MASTER))
+ if (hcon->role != HCI_ROLE_MASTER)
return -EINVAL;
if (cmd_len != sizeof(struct l2cap_conn_param_update_req))
@@ -5245,7 +5189,7 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn,
memset(&rsp, 0, sizeof(rsp));
- err = l2cap_check_conn_param(min, max, latency, to_multiplier);
+ err = hci_check_conn_params(min, max, latency, to_multiplier);
if (err)
rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED);
else
@@ -5254,8 +5198,16 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn,
l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_PARAM_UPDATE_RSP,
sizeof(rsp), &rsp);
- if (!err)
- hci_le_conn_update(hcon, min, max, latency, to_multiplier);
+ if (!err) {
+ u8 store_hint;
+
+ store_hint = hci_le_conn_update(hcon, min, max, latency,
+ to_multiplier);
+ mgmt_new_conn_param(hcon->hdev, &hcon->dst, hcon->dst_type,
+ store_hint, min, max, latency,
+ to_multiplier);
+
+ }
return 0;
}
@@ -5479,6 +5431,11 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn,
if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) {
l2cap_state_change(chan, BT_CONNECT2);
+ /* The following result value is actually not defined
+ * for LE CoC but we use it to let the function know
+ * that it should bail out after doing its cleanup
+ * instead of sending a response.
+ */
result = L2CAP_CR_PEND;
chan->ops->defer(chan);
} else {
@@ -5489,6 +5446,7 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn,
response_unlock:
l2cap_chan_unlock(pchan);
mutex_unlock(&conn->chan_lock);
+ l2cap_chan_put(pchan);
if (result == L2CAP_CR_PEND)
return 0;
@@ -6837,12 +6795,12 @@ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm,
struct l2cap_chan *chan;
if (hcon->type != ACL_LINK)
- goto drop;
+ goto free_skb;
chan = l2cap_global_chan_by_psm(0, psm, &hcon->src, &hcon->dst,
ACL_LINK);
if (!chan)
- goto drop;
+ goto free_skb;
BT_DBG("chan %p, len %d", chan, skb->len);
@@ -6856,39 +6814,14 @@ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm,
bacpy(&bt_cb(skb)->bdaddr, &hcon->dst);
bt_cb(skb)->psm = psm;
- if (!chan->ops->recv(chan, skb))
- return;
-
-drop:
- kfree_skb(skb);
-}
-
-static void l2cap_att_channel(struct l2cap_conn *conn,
- struct sk_buff *skb)
-{
- struct hci_conn *hcon = conn->hcon;
- struct l2cap_chan *chan;
-
- if (hcon->type != LE_LINK)
- goto drop;
-
- chan = l2cap_global_chan_by_scid(BT_CONNECTED, L2CAP_CID_ATT,
- &hcon->src, &hcon->dst);
- if (!chan)
- goto drop;
-
- BT_DBG("chan %p, len %d", chan, skb->len);
-
- if (hci_blacklist_lookup(hcon->hdev, &hcon->dst, hcon->dst_type))
- goto drop;
-
- if (chan->imtu < skb->len)
- goto drop;
-
- if (!chan->ops->recv(chan, skb))
+ if (!chan->ops->recv(chan, skb)) {
+ l2cap_chan_put(chan);
return;
+ }
drop:
+ l2cap_chan_put(chan);
+free_skb:
kfree_skb(skb);
}
@@ -6914,6 +6847,16 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb)
return;
}
+ /* Since we can't actively block incoming LE connections we must
+ * at least ensure that we ignore incoming data from them.
+ */
+ if (hcon->type == LE_LINK &&
+ hci_bdaddr_list_lookup(&hcon->hdev->blacklist, &hcon->dst,
+ bdaddr_type(hcon, hcon->dst_type))) {
+ kfree_skb(skb);
+ return;
+ }
+
BT_DBG("len %d, cid 0x%4.4x", len, cid);
switch (cid) {
@@ -6927,23 +6870,10 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb)
l2cap_conless_channel(conn, psm, skb);
break;
- case L2CAP_CID_ATT:
- l2cap_att_channel(conn, skb);
- break;
-
case L2CAP_CID_LE_SIGNALING:
l2cap_le_sig_channel(conn, skb);
break;
- case L2CAP_CID_SMP:
- if (smp_sig_channel(conn, skb))
- l2cap_conn_del(conn->hcon, EACCES);
- break;
-
- case L2CAP_FC_6LOWPAN:
- bt_6lowpan_recv(conn, skb);
- break;
-
default:
l2cap_data_channel(conn, cid, skb);
break;
@@ -6974,7 +6904,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon)
if (!hchan)
return NULL;
- conn = kzalloc(sizeof(struct l2cap_conn), GFP_KERNEL);
+ conn = kzalloc(sizeof(*conn), GFP_KERNEL);
if (!conn) {
hci_chan_del(hchan);
return NULL;
@@ -6982,8 +6912,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon)
kref_init(&conn->ref);
hcon->l2cap_data = conn;
- conn->hcon = hcon;
- hci_conn_get(conn->hcon);
+ conn->hcon = hci_conn_get(hcon);
conn->hchan = hchan;
BT_DBG("hcon %p conn %p hchan %p", hcon, conn, hchan);
@@ -7006,19 +6935,17 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon)
conn->hs_enabled = test_bit(HCI_HS_ENABLED,
&hcon->hdev->dev_flags);
- spin_lock_init(&conn->lock);
+ mutex_init(&conn->ident_lock);
mutex_init(&conn->chan_lock);
INIT_LIST_HEAD(&conn->chan_l);
INIT_LIST_HEAD(&conn->users);
- if (hcon->type == LE_LINK)
- INIT_DELAYED_WORK(&conn->security_timer, security_timeout);
- else
- INIT_DELAYED_WORK(&conn->info_timer, l2cap_info_timeout);
+ INIT_DELAYED_WORK(&conn->info_timer, l2cap_info_timeout);
skb_queue_head_init(&conn->pending_rx);
INIT_WORK(&conn->pending_rx_work, process_pending_rx);
+ INIT_WORK(&conn->id_addr_update_work, l2cap_conn_update_id_addr);
conn->disc_reason = HCI_ERROR_REMOTE_USER_TERM;
@@ -7042,7 +6969,6 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
struct l2cap_conn *conn;
struct hci_conn *hcon;
struct hci_dev *hdev;
- __u8 auth_type;
int err;
BT_DBG("%pMR -> %pMR (type %u) psm 0x%2.2x", &chan->src, dst,
@@ -7054,8 +6980,6 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
hci_dev_lock(hdev);
- l2cap_chan_lock(chan);
-
if (!is_valid_psm(__le16_to_cpu(psm), dst_type) && !cid &&
chan->chan_type != L2CAP_CHAN_RAW) {
err = -EINVAL;
@@ -7084,7 +7008,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
break;
/* fall through */
default:
- err = -ENOTSUPP;
+ err = -EOPNOTSUPP;
goto done;
}
@@ -7118,9 +7042,9 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
chan->psm = psm;
chan->dcid = cid;
- auth_type = l2cap_get_auth_type(chan);
-
if (bdaddr_type_is_le(dst_type)) {
+ u8 role;
+
/* Convert from L2CAP channel address type to HCI address type
*/
if (dst_type == BDADDR_LE_PUBLIC)
@@ -7128,9 +7052,15 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
else
dst_type = ADDR_LE_DEV_RANDOM;
+ if (test_bit(HCI_ADVERTISING, &hdev->dev_flags))
+ role = HCI_ROLE_SLAVE;
+ else
+ role = HCI_ROLE_MASTER;
+
hcon = hci_connect_le(hdev, dst, dst_type, chan->sec_level,
- auth_type);
+ HCI_LE_CONN_TIMEOUT, role);
} else {
+ u8 auth_type = l2cap_get_auth_type(chan);
hcon = hci_connect_acl(hdev, dst, chan->sec_level, auth_type);
}
@@ -7146,19 +7076,20 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
goto done;
}
+ mutex_lock(&conn->chan_lock);
+ l2cap_chan_lock(chan);
+
if (cid && __l2cap_get_chan_by_dcid(conn, cid)) {
hci_conn_drop(hcon);
err = -EBUSY;
- goto done;
+ goto chan_unlock;
}
/* Update source addr of the socket */
bacpy(&chan->src, &hcon->src);
chan->src_type = bdaddr_type(hcon, hcon->src_type);
- l2cap_chan_unlock(chan);
- l2cap_chan_add(conn, chan);
- l2cap_chan_lock(chan);
+ __l2cap_chan_add(conn, chan);
/* l2cap_chan_add takes its own ref so we can drop this one */
hci_conn_drop(hcon);
@@ -7176,7 +7107,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
if (hcon->state == BT_CONNECTED) {
if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED) {
__clear_chan_timer(chan);
- if (l2cap_chan_check_security(chan))
+ if (l2cap_chan_check_security(chan, true))
l2cap_state_change(chan, BT_CONNECTED);
} else
l2cap_do_start(chan);
@@ -7184,12 +7115,15 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
err = 0;
-done:
+chan_unlock:
l2cap_chan_unlock(chan);
+ mutex_unlock(&conn->chan_lock);
+done:
hci_dev_unlock(hdev);
hci_dev_put(hdev);
return err;
}
+EXPORT_SYMBOL_GPL(l2cap_chan_connect);
/* ---- L2CAP interface with lower layer (HCI) ---- */
@@ -7222,19 +7156,99 @@ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr)
return exact ? lm1 : lm2;
}
+/* Find the next fixed channel in BT_LISTEN state, continue iteration
+ * from an existing channel in the list or from the beginning of the
+ * global list (by passing NULL as first parameter).
+ */
+static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c,
+ bdaddr_t *src, u8 link_type)
+{
+ read_lock(&chan_list_lock);
+
+ if (c)
+ c = list_next_entry(c, global_l);
+ else
+ c = list_entry(chan_list.next, typeof(*c), global_l);
+
+ list_for_each_entry_from(c, &chan_list, global_l) {
+ if (c->chan_type != L2CAP_CHAN_FIXED)
+ continue;
+ if (c->state != BT_LISTEN)
+ continue;
+ if (bacmp(&c->src, src) && bacmp(&c->src, BDADDR_ANY))
+ continue;
+ if (link_type == ACL_LINK && c->src_type != BDADDR_BREDR)
+ continue;
+ if (link_type == LE_LINK && c->src_type == BDADDR_BREDR)
+ continue;
+
+ l2cap_chan_hold(c);
+ read_unlock(&chan_list_lock);
+ return c;
+ }
+
+ read_unlock(&chan_list_lock);
+
+ return NULL;
+}
+
void l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
{
+ struct hci_dev *hdev = hcon->hdev;
struct l2cap_conn *conn;
+ struct l2cap_chan *pchan;
+ u8 dst_type;
BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status);
- if (!status) {
- conn = l2cap_conn_add(hcon);
- if (conn)
- l2cap_conn_ready(conn);
- } else {
+ if (status) {
l2cap_conn_del(hcon, bt_to_errno(status));
+ return;
}
+
+ conn = l2cap_conn_add(hcon);
+ if (!conn)
+ return;
+
+ dst_type = bdaddr_type(hcon, hcon->dst_type);
+
+ /* If device is blocked, do not create channels for it */
+ if (hci_bdaddr_list_lookup(&hdev->blacklist, &hcon->dst, dst_type))
+ return;
+
+ /* Find fixed channels and notify them of the new connection. We
+ * use multiple individual lookups, continuing each time where
+ * we left off, because the list lock would prevent calling the
+ * potentially sleeping l2cap_chan_lock() function.
+ */
+ pchan = l2cap_global_fixed_chan(NULL, &hdev->bdaddr, hcon->type);
+ while (pchan) {
+ struct l2cap_chan *chan, *next;
+
+ /* Client fixed channels should override server ones */
+ if (__l2cap_get_chan_by_dcid(conn, pchan->scid))
+ goto next;
+
+ l2cap_chan_lock(pchan);
+ chan = pchan->ops->new_connection(pchan);
+ if (chan) {
+ bacpy(&chan->src, &hcon->src);
+ bacpy(&chan->dst, &hcon->dst);
+ chan->src_type = bdaddr_type(hcon, hcon->src_type);
+ chan->dst_type = dst_type;
+
+ __l2cap_chan_add(conn, chan);
+ }
+
+ l2cap_chan_unlock(pchan);
+next:
+ next = l2cap_global_fixed_chan(pchan, &hdev->bdaddr,
+ hcon->type);
+ l2cap_chan_put(pchan);
+ pchan = next;
+ }
+
+ l2cap_conn_ready(conn);
}
int l2cap_disconn_ind(struct hci_conn *hcon)
@@ -7252,8 +7266,6 @@ void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
{
BT_DBG("hcon %p reason %d", hcon, reason);
- bt_6lowpan_del_conn(hcon->l2cap_data);
-
l2cap_conn_del(hcon, bt_to_errno(reason));
}
@@ -7284,12 +7296,6 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
BT_DBG("conn %p status 0x%2.2x encrypt %u", conn, status, encrypt);
- if (hcon->type == LE_LINK) {
- if (!status && encrypt)
- smp_distribute_keys(conn);
- cancel_delayed_work(&conn->security_timer);
- }
-
mutex_lock(&conn->chan_lock);
list_for_each_entry(chan, &conn->chan_l, list) {
@@ -7303,15 +7309,8 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
continue;
}
- if (chan->scid == L2CAP_CID_ATT) {
- if (!status && encrypt) {
- chan->sec_level = hcon->sec_level;
- l2cap_chan_ready(chan);
- }
-
- l2cap_chan_unlock(chan);
- continue;
- }
+ if (!status && encrypt)
+ chan->sec_level = hcon->sec_level;
if (!__l2cap_no_conn_pending(chan)) {
l2cap_chan_unlock(chan);
@@ -7536,14 +7535,11 @@ int __init l2cap_init(void)
debugfs_create_u16("l2cap_le_default_mps", 0644, bt_debugfs,
&le_default_mps);
- bt_6lowpan_init();
-
return 0;
}
void l2cap_exit(void)
{
- bt_6lowpan_cleanup();
debugfs_remove(l2cap_debugfs);
l2cap_cleanup_sockets();
}
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index e1378693cc90..31f106e61ca2 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -99,15 +99,6 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
if (!bdaddr_type_is_valid(la.l2_bdaddr_type))
return -EINVAL;
- if (la.l2_cid) {
- /* When the socket gets created it defaults to
- * CHAN_CONN_ORIENTED, so we need to overwrite the
- * default here.
- */
- chan->chan_type = L2CAP_CHAN_FIXED;
- chan->omtu = L2CAP_DEFAULT_MTU;
- }
-
if (bdaddr_type_is_le(la.l2_bdaddr_type)) {
/* We only allow ATT user space socket */
if (la.l2_cid &&
@@ -155,6 +146,14 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
case L2CAP_CHAN_RAW:
chan->sec_level = BT_SECURITY_SDP;
break;
+ case L2CAP_CHAN_FIXED:
+ /* Fixed channels default to the L2CAP core not holding a
+ * hci_conn reference for them. For fixed channels mapping to
+ * L2CAP sockets we do want to hold a reference so set the
+ * appropriate flag to request it.
+ */
+ set_bit(FLAG_HOLD_HCI_CONN, &chan->flags);
+ break;
}
bacpy(&chan->src, &la.l2_bdaddr);
@@ -279,7 +278,7 @@ static int l2cap_sock_listen(struct socket *sock, int backlog)
break;
/* fall through */
default:
- err = -ENOTSUPP;
+ err = -EOPNOTSUPP;
goto done;
}
@@ -361,7 +360,8 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
BT_DBG("sock %p, sk %p", sock, sk);
if (peer && sk->sk_state != BT_CONNECTED &&
- sk->sk_state != BT_CONNECT && sk->sk_state != BT_CONNECT2)
+ sk->sk_state != BT_CONNECT && sk->sk_state != BT_CONNECT2 &&
+ sk->sk_state != BT_CONFIG)
return -ENOTCONN;
memset(la, 0, sizeof(struct sockaddr_l2));
@@ -789,6 +789,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
if (chan->scid == L2CAP_CID_ATT) {
if (smp_conn_security(conn->hcon, sec.level))
break;
+ set_bit(FLAG_PENDING_SECURITY, &chan->flags);
sk->sk_state = BT_CONFIG;
chan->state = BT_CONFIG;
@@ -796,7 +797,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
} else if ((sk->sk_state == BT_CONNECT2 &&
test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) ||
sk->sk_state == BT_CONNECTED) {
- if (!l2cap_chan_check_security(chan))
+ if (!l2cap_chan_check_security(chan, true))
set_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags);
else
sk->sk_state_change(sk);
@@ -964,7 +965,7 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
return err;
l2cap_chan_lock(chan);
- err = l2cap_chan_send(chan, msg, len, sk->sk_priority);
+ err = l2cap_chan_send(chan, msg, len);
l2cap_chan_unlock(chan);
return err;
@@ -1111,7 +1112,8 @@ static int l2cap_sock_shutdown(struct socket *sock, int how)
l2cap_chan_close(chan, 0);
lock_sock(sk);
- if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime)
+ if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime &&
+ !(current->flags & PF_EXITING))
err = bt_sock_wait_state(sk, BT_CLOSED,
sk->sk_lingertime);
}
@@ -1292,6 +1294,7 @@ static void l2cap_sock_state_change_cb(struct l2cap_chan *chan, int state,
}
static struct sk_buff *l2cap_sock_alloc_skb_cb(struct l2cap_chan *chan,
+ unsigned long hdr_len,
unsigned long len, int nb)
{
struct sock *sk = chan->data;
@@ -1299,17 +1302,26 @@ static struct sk_buff *l2cap_sock_alloc_skb_cb(struct l2cap_chan *chan,
int err;
l2cap_chan_unlock(chan);
- skb = bt_skb_send_alloc(sk, len, nb, &err);
+ skb = bt_skb_send_alloc(sk, hdr_len + len, nb, &err);
l2cap_chan_lock(chan);
if (!skb)
return ERR_PTR(err);
+ skb->priority = sk->sk_priority;
+
bt_cb(skb)->chan = chan;
return skb;
}
+static int l2cap_sock_memcpy_fromiovec_cb(struct l2cap_chan *chan,
+ unsigned char *kdata,
+ struct iovec *iov, int len)
+{
+ return memcpy_fromiovec(kdata, iov, len);
+}
+
static void l2cap_sock_ready_cb(struct l2cap_chan *chan)
{
struct sock *sk = chan->data;
@@ -1347,6 +1359,11 @@ static void l2cap_sock_resume_cb(struct l2cap_chan *chan)
{
struct sock *sk = chan->data;
+ if (test_and_clear_bit(FLAG_PENDING_SECURITY, &chan->flags)) {
+ sk->sk_state = BT_CONNECTED;
+ chan->state = BT_CONNECTED;
+ }
+
clear_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags);
sk->sk_state_change(sk);
}
@@ -1375,20 +1392,21 @@ static void l2cap_sock_suspend_cb(struct l2cap_chan *chan)
sk->sk_state_change(sk);
}
-static struct l2cap_ops l2cap_chan_ops = {
- .name = "L2CAP Socket Interface",
- .new_connection = l2cap_sock_new_connection_cb,
- .recv = l2cap_sock_recv_cb,
- .close = l2cap_sock_close_cb,
- .teardown = l2cap_sock_teardown_cb,
- .state_change = l2cap_sock_state_change_cb,
- .ready = l2cap_sock_ready_cb,
- .defer = l2cap_sock_defer_cb,
- .resume = l2cap_sock_resume_cb,
- .suspend = l2cap_sock_suspend_cb,
- .set_shutdown = l2cap_sock_set_shutdown_cb,
- .get_sndtimeo = l2cap_sock_get_sndtimeo_cb,
- .alloc_skb = l2cap_sock_alloc_skb_cb,
+static const struct l2cap_ops l2cap_chan_ops = {
+ .name = "L2CAP Socket Interface",
+ .new_connection = l2cap_sock_new_connection_cb,
+ .recv = l2cap_sock_recv_cb,
+ .close = l2cap_sock_close_cb,
+ .teardown = l2cap_sock_teardown_cb,
+ .state_change = l2cap_sock_state_change_cb,
+ .ready = l2cap_sock_ready_cb,
+ .defer = l2cap_sock_defer_cb,
+ .resume = l2cap_sock_resume_cb,
+ .suspend = l2cap_sock_suspend_cb,
+ .set_shutdown = l2cap_sock_set_shutdown_cb,
+ .get_sndtimeo = l2cap_sock_get_sndtimeo_cb,
+ .alloc_skb = l2cap_sock_alloc_skb_cb,
+ .memcpy_fromiovec = l2cap_sock_memcpy_fromiovec_cb,
};
static void l2cap_sock_destruct(struct sock *sk)
diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c
index 941ad7530eda..b36bc0415854 100644
--- a/net/bluetooth/lib.c
+++ b/net/bluetooth/lib.c
@@ -135,40 +135,34 @@ int bt_to_errno(__u16 code)
}
EXPORT_SYMBOL(bt_to_errno);
-int bt_info(const char *format, ...)
+void bt_info(const char *format, ...)
{
struct va_format vaf;
va_list args;
- int r;
va_start(args, format);
vaf.fmt = format;
vaf.va = &args;
- r = pr_info("%pV", &vaf);
+ pr_info("%pV", &vaf);
va_end(args);
-
- return r;
}
EXPORT_SYMBOL(bt_info);
-int bt_err(const char *format, ...)
+void bt_err(const char *format, ...)
{
struct va_format vaf;
va_list args;
- int r;
va_start(args, format);
vaf.fmt = format;
vaf.va = &args;
- r = pr_err("%pV", &vaf);
+ pr_err("%pV", &vaf);
va_end(args);
-
- return r;
}
EXPORT_SYMBOL(bt_err);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index af8e0a6243b7..efb71b022ab6 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -35,7 +35,7 @@
#include "smp.h"
#define MGMT_VERSION 1
-#define MGMT_REVISION 6
+#define MGMT_REVISION 7
static const u16 mgmt_commands[] = {
MGMT_OP_READ_INDEX_LIST,
@@ -44,7 +44,7 @@ static const u16 mgmt_commands[] = {
MGMT_OP_SET_DISCOVERABLE,
MGMT_OP_SET_CONNECTABLE,
MGMT_OP_SET_FAST_CONNECTABLE,
- MGMT_OP_SET_PAIRABLE,
+ MGMT_OP_SET_BONDABLE,
MGMT_OP_SET_LINK_SECURITY,
MGMT_OP_SET_SSP,
MGMT_OP_SET_HS,
@@ -85,6 +85,14 @@ static const u16 mgmt_commands[] = {
MGMT_OP_SET_PRIVACY,
MGMT_OP_LOAD_IRKS,
MGMT_OP_GET_CONN_INFO,
+ MGMT_OP_GET_CLOCK_INFO,
+ MGMT_OP_ADD_DEVICE,
+ MGMT_OP_REMOVE_DEVICE,
+ MGMT_OP_LOAD_CONN_PARAM,
+ MGMT_OP_READ_UNCONF_INDEX_LIST,
+ MGMT_OP_READ_CONFIG_INFO,
+ MGMT_OP_SET_EXTERNAL_CONFIG,
+ MGMT_OP_SET_PUBLIC_ADDRESS,
};
static const u16 mgmt_events[] = {
@@ -111,13 +119,16 @@ static const u16 mgmt_events[] = {
MGMT_EV_PASSKEY_NOTIFY,
MGMT_EV_NEW_IRK,
MGMT_EV_NEW_CSRK,
+ MGMT_EV_DEVICE_ADDED,
+ MGMT_EV_DEVICE_REMOVED,
+ MGMT_EV_NEW_CONN_PARAM,
+ MGMT_EV_UNCONF_INDEX_ADDED,
+ MGMT_EV_UNCONF_INDEX_REMOVED,
+ MGMT_EV_NEW_CONFIG_OPTIONS,
};
#define CACHE_TIMEOUT msecs_to_jiffies(2 * 1000)
-#define hdev_is_powered(hdev) (test_bit(HCI_UP, &hdev->flags) && \
- !test_bit(HCI_AUTO_OFF, &hdev->dev_flags))
-
struct pending_cmd {
struct list_head list;
u16 opcode;
@@ -200,6 +211,36 @@ static u8 mgmt_status(u8 hci_status)
return MGMT_STATUS_FAILED;
}
+static int mgmt_event(u16 event, struct hci_dev *hdev, void *data, u16 data_len,
+ struct sock *skip_sk)
+{
+ struct sk_buff *skb;
+ struct mgmt_hdr *hdr;
+
+ skb = alloc_skb(sizeof(*hdr) + data_len, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ hdr = (void *) skb_put(skb, sizeof(*hdr));
+ hdr->opcode = cpu_to_le16(event);
+ if (hdev)
+ hdr->index = cpu_to_le16(hdev->id);
+ else
+ hdr->index = cpu_to_le16(MGMT_INDEX_NONE);
+ hdr->len = cpu_to_le16(data_len);
+
+ if (data)
+ memcpy(skb_put(skb, data_len), data, data_len);
+
+ /* Time stamp */
+ __net_timestamp(skb);
+
+ hci_send_to_control(skb, skip_sk);
+ kfree_skb(skb);
+
+ return 0;
+}
+
static int cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status)
{
struct sk_buff *skb;
@@ -327,7 +368,8 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
count = 0;
list_for_each_entry(d, &hci_dev_list, list) {
- if (d->dev_type == HCI_BREDR)
+ if (d->dev_type == HCI_BREDR &&
+ !test_bit(HCI_UNCONFIGURED, &d->dev_flags))
count++;
}
@@ -340,13 +382,19 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
count = 0;
list_for_each_entry(d, &hci_dev_list, list) {
- if (test_bit(HCI_SETUP, &d->dev_flags))
+ if (test_bit(HCI_SETUP, &d->dev_flags) ||
+ test_bit(HCI_CONFIG, &d->dev_flags) ||
+ test_bit(HCI_USER_CHANNEL, &d->dev_flags))
continue;
- if (test_bit(HCI_USER_CHANNEL, &d->dev_flags))
+ /* Devices marked as raw-only are neither configured
+ * nor unconfigured controllers.
+ */
+ if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks))
continue;
- if (d->dev_type == HCI_BREDR) {
+ if (d->dev_type == HCI_BREDR &&
+ !test_bit(HCI_UNCONFIGURED, &d->dev_flags)) {
rp->index[count++] = cpu_to_le16(d->id);
BT_DBG("Added hci%u", d->id);
}
@@ -365,19 +413,151 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
return err;
}
+static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev,
+ void *data, u16 data_len)
+{
+ struct mgmt_rp_read_unconf_index_list *rp;
+ struct hci_dev *d;
+ size_t rp_len;
+ u16 count;
+ int err;
+
+ BT_DBG("sock %p", sk);
+
+ read_lock(&hci_dev_list_lock);
+
+ count = 0;
+ list_for_each_entry(d, &hci_dev_list, list) {
+ if (d->dev_type == HCI_BREDR &&
+ test_bit(HCI_UNCONFIGURED, &d->dev_flags))
+ count++;
+ }
+
+ rp_len = sizeof(*rp) + (2 * count);
+ rp = kmalloc(rp_len, GFP_ATOMIC);
+ if (!rp) {
+ read_unlock(&hci_dev_list_lock);
+ return -ENOMEM;
+ }
+
+ count = 0;
+ list_for_each_entry(d, &hci_dev_list, list) {
+ if (test_bit(HCI_SETUP, &d->dev_flags) ||
+ test_bit(HCI_CONFIG, &d->dev_flags) ||
+ test_bit(HCI_USER_CHANNEL, &d->dev_flags))
+ continue;
+
+ /* Devices marked as raw-only are neither configured
+ * nor unconfigured controllers.
+ */
+ if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks))
+ continue;
+
+ if (d->dev_type == HCI_BREDR &&
+ test_bit(HCI_UNCONFIGURED, &d->dev_flags)) {
+ rp->index[count++] = cpu_to_le16(d->id);
+ BT_DBG("Added hci%u", d->id);
+ }
+ }
+
+ rp->num_controllers = cpu_to_le16(count);
+ rp_len = sizeof(*rp) + (2 * count);
+
+ read_unlock(&hci_dev_list_lock);
+
+ err = cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_UNCONF_INDEX_LIST,
+ 0, rp, rp_len);
+
+ kfree(rp);
+
+ return err;
+}
+
+static bool is_configured(struct hci_dev *hdev)
+{
+ if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) &&
+ !test_bit(HCI_EXT_CONFIGURED, &hdev->dev_flags))
+ return false;
+
+ if (test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) &&
+ !bacmp(&hdev->public_addr, BDADDR_ANY))
+ return false;
+
+ return true;
+}
+
+static __le32 get_missing_options(struct hci_dev *hdev)
+{
+ u32 options = 0;
+
+ if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) &&
+ !test_bit(HCI_EXT_CONFIGURED, &hdev->dev_flags))
+ options |= MGMT_OPTION_EXTERNAL_CONFIG;
+
+ if (test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) &&
+ !bacmp(&hdev->public_addr, BDADDR_ANY))
+ options |= MGMT_OPTION_PUBLIC_ADDRESS;
+
+ return cpu_to_le32(options);
+}
+
+static int new_options(struct hci_dev *hdev, struct sock *skip)
+{
+ __le32 options = get_missing_options(hdev);
+
+ return mgmt_event(MGMT_EV_NEW_CONFIG_OPTIONS, hdev, &options,
+ sizeof(options), skip);
+}
+
+static int send_options_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev)
+{
+ __le32 options = get_missing_options(hdev);
+
+ return cmd_complete(sk, hdev->id, opcode, 0, &options,
+ sizeof(options));
+}
+
+static int read_config_info(struct sock *sk, struct hci_dev *hdev,
+ void *data, u16 data_len)
+{
+ struct mgmt_rp_read_config_info rp;
+ u32 options = 0;
+
+ BT_DBG("sock %p %s", sk, hdev->name);
+
+ hci_dev_lock(hdev);
+
+ memset(&rp, 0, sizeof(rp));
+ rp.manufacturer = cpu_to_le16(hdev->manufacturer);
+
+ if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks))
+ options |= MGMT_OPTION_EXTERNAL_CONFIG;
+
+ if (hdev->set_bdaddr)
+ options |= MGMT_OPTION_PUBLIC_ADDRESS;
+
+ rp.supported_options = cpu_to_le32(options);
+ rp.missing_options = get_missing_options(hdev);
+
+ hci_dev_unlock(hdev);
+
+ return cmd_complete(sk, hdev->id, MGMT_OP_READ_CONFIG_INFO, 0, &rp,
+ sizeof(rp));
+}
+
static u32 get_supported_settings(struct hci_dev *hdev)
{
u32 settings = 0;
settings |= MGMT_SETTING_POWERED;
- settings |= MGMT_SETTING_PAIRABLE;
+ settings |= MGMT_SETTING_BONDABLE;
settings |= MGMT_SETTING_DEBUG_KEYS;
+ settings |= MGMT_SETTING_CONNECTABLE;
+ settings |= MGMT_SETTING_DISCOVERABLE;
if (lmp_bredr_capable(hdev)) {
- settings |= MGMT_SETTING_CONNECTABLE;
if (hdev->hci_ver >= BLUETOOTH_VER_1_2)
settings |= MGMT_SETTING_FAST_CONNECTABLE;
- settings |= MGMT_SETTING_DISCOVERABLE;
settings |= MGMT_SETTING_BREDR;
settings |= MGMT_SETTING_LINK_SECURITY;
@@ -387,7 +567,7 @@ static u32 get_supported_settings(struct hci_dev *hdev)
}
if (lmp_sc_capable(hdev) ||
- test_bit(HCI_FORCE_SC, &hdev->dev_flags))
+ test_bit(HCI_FORCE_SC, &hdev->dbg_flags))
settings |= MGMT_SETTING_SECURE_CONN;
}
@@ -397,6 +577,10 @@ static u32 get_supported_settings(struct hci_dev *hdev)
settings |= MGMT_SETTING_PRIVACY;
}
+ if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) ||
+ hdev->set_bdaddr)
+ settings |= MGMT_SETTING_CONFIGURATION;
+
return settings;
}
@@ -416,8 +600,8 @@ static u32 get_current_settings(struct hci_dev *hdev)
if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags))
settings |= MGMT_SETTING_DISCOVERABLE;
- if (test_bit(HCI_PAIRABLE, &hdev->dev_flags))
- settings |= MGMT_SETTING_PAIRABLE;
+ if (test_bit(HCI_BONDABLE, &hdev->dev_flags))
+ settings |= MGMT_SETTING_BONDABLE;
if (test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags))
settings |= MGMT_SETTING_BREDR;
@@ -440,7 +624,7 @@ static u32 get_current_settings(struct hci_dev *hdev)
if (test_bit(HCI_SC_ENABLED, &hdev->dev_flags))
settings |= MGMT_SETTING_SECURE_CONN;
- if (test_bit(HCI_DEBUG_KEYS, &hdev->dev_flags))
+ if (test_bit(HCI_KEEP_DEBUG_KEYS, &hdev->dev_flags))
settings |= MGMT_SETTING_DEBUG_KEYS;
if (test_bit(HCI_PRIVACY, &hdev->dev_flags))
@@ -571,6 +755,22 @@ static struct pending_cmd *mgmt_pending_find(u16 opcode, struct hci_dev *hdev)
return NULL;
}
+static struct pending_cmd *mgmt_pending_find_data(u16 opcode,
+ struct hci_dev *hdev,
+ const void *data)
+{
+ struct pending_cmd *cmd;
+
+ list_for_each_entry(cmd, &hdev->mgmt_pending, list) {
+ if (cmd->user_data != data)
+ continue;
+ if (cmd->opcode == opcode)
+ return cmd;
+ }
+
+ return NULL;
+}
+
static u8 create_scan_rsp_data(struct hci_dev *hdev, u8 *ptr)
{
u8 ad_len = 0;
@@ -703,6 +903,16 @@ static void update_adv_data(struct hci_request *req)
hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp);
}
+int mgmt_update_adv_data(struct hci_dev *hdev)
+{
+ struct hci_request req;
+
+ hci_req_init(&req, hdev);
+ update_adv_data(&req);
+
+ return hci_req_run(&req, NULL);
+}
+
static void create_eir(struct hci_dev *hdev, u8 *data)
{
u8 *ptr = data;
@@ -836,6 +1046,13 @@ static bool get_connectable(struct hci_dev *hdev)
return test_bit(HCI_CONNECTABLE, &hdev->dev_flags);
}
+static void disable_advertising(struct hci_request *req)
+{
+ u8 enable = 0x00;
+
+ hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
+}
+
static void enable_advertising(struct hci_request *req)
{
struct hci_dev *hdev = req->hdev;
@@ -843,12 +1060,18 @@ static void enable_advertising(struct hci_request *req)
u8 own_addr_type, enable = 0x01;
bool connectable;
- /* Clear the HCI_ADVERTISING bit temporarily so that the
+ if (hci_conn_num(hdev, LE_LINK) > 0)
+ return;
+
+ if (test_bit(HCI_LE_ADV, &hdev->dev_flags))
+ disable_advertising(req);
+
+ /* Clear the HCI_LE_ADV bit temporarily so that the
* hci_update_random_address knows that it's safe to go ahead
* and write a new random address. The flag will be set back on
* as soon as the SET_ADV_ENABLE HCI command completes.
*/
- clear_bit(HCI_ADVERTISING, &hdev->dev_flags);
+ clear_bit(HCI_LE_ADV, &hdev->dev_flags);
connectable = get_connectable(hdev);
@@ -860,8 +1083,8 @@ static void enable_advertising(struct hci_request *req)
return;
memset(&cp, 0, sizeof(cp));
- cp.min_interval = cpu_to_le16(0x0800);
- cp.max_interval = cpu_to_le16(0x0800);
+ cp.min_interval = cpu_to_le16(hdev->le_adv_min_interval);
+ cp.max_interval = cpu_to_le16(hdev->le_adv_max_interval);
cp.type = connectable ? LE_ADV_IND : LE_ADV_NONCONN_IND;
cp.own_address_type = own_addr_type;
cp.channel_map = hdev->le_adv_channel_map;
@@ -871,13 +1094,6 @@ static void enable_advertising(struct hci_request *req)
hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
}
-static void disable_advertising(struct hci_request *req)
-{
- u8 enable = 0x00;
-
- hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
-}
-
static void service_cache_off(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev,
@@ -909,19 +1125,14 @@ static void rpa_expired(struct work_struct *work)
set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags);
- if (!test_bit(HCI_ADVERTISING, &hdev->dev_flags) ||
- hci_conn_num(hdev, LE_LINK) > 0)
+ if (!test_bit(HCI_ADVERTISING, &hdev->dev_flags))
return;
/* The generation of a new RPA and programming it into the
* controller happens in the enable_advertising() function.
*/
-
hci_req_init(&req, hdev);
-
- disable_advertising(&req);
enable_advertising(&req);
-
hci_req_run(&req, NULL);
}
@@ -938,7 +1149,7 @@ static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev)
* for mgmt we require user-space to explicitly enable
* it
*/
- clear_bit(HCI_PAIRABLE, &hdev->dev_flags);
+ clear_bit(HCI_BONDABLE, &hdev->dev_flags);
}
static int read_controller_info(struct sock *sk, struct hci_dev *hdev,
@@ -984,7 +1195,7 @@ static struct pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode,
{
struct pending_cmd *cmd;
- cmd = kmalloc(sizeof(*cmd), GFP_KERNEL);
+ cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
if (!cmd)
return NULL;
@@ -1047,7 +1258,7 @@ static void clean_up_hci_complete(struct hci_dev *hdev, u8 status)
}
}
-static void hci_stop_discovery(struct hci_request *req)
+static bool hci_stop_discovery(struct hci_request *req)
{
struct hci_dev *hdev = req->hdev;
struct hci_cp_remote_name_req_cancel cp;
@@ -1062,32 +1273,39 @@ static void hci_stop_discovery(struct hci_request *req)
hci_req_add_le_scan_disable(req);
}
- break;
+ return true;
case DISCOVERY_RESOLVING:
e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY,
NAME_PENDING);
if (!e)
- return;
+ break;
bacpy(&cp.bdaddr, &e->data.bdaddr);
hci_req_add(req, HCI_OP_REMOTE_NAME_REQ_CANCEL, sizeof(cp),
&cp);
- break;
+ return true;
default:
/* Passive scanning */
- if (test_bit(HCI_LE_SCAN, &hdev->dev_flags))
+ if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) {
hci_req_add_le_scan_disable(req);
+ return true;
+ }
+
break;
}
+
+ return false;
}
static int clean_up_hci_state(struct hci_dev *hdev)
{
struct hci_request req;
struct hci_conn *conn;
+ bool discov_stopped;
+ int err;
hci_req_init(&req, hdev);
@@ -1097,10 +1315,10 @@ static int clean_up_hci_state(struct hci_dev *hdev)
hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
}
- if (test_bit(HCI_ADVERTISING, &hdev->dev_flags))
+ if (test_bit(HCI_LE_ADV, &hdev->dev_flags))
disable_advertising(&req);
- hci_stop_discovery(&req);
+ discov_stopped = hci_stop_discovery(&req);
list_for_each_entry(conn, &hdev->conn_hash.list, list) {
struct hci_cp_disconnect dc;
@@ -1134,7 +1352,11 @@ static int clean_up_hci_state(struct hci_dev *hdev)
}
}
- return hci_req_run(&req, clean_up_hci_complete);
+ err = hci_req_run(&req, clean_up_hci_complete);
+ if (!err && discov_stopped)
+ hci_discovery_set_state(hdev, DISCOVERY_STOPPING);
+
+ return err;
}
static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data,
@@ -1203,36 +1425,6 @@ failed:
return err;
}
-static int mgmt_event(u16 event, struct hci_dev *hdev, void *data, u16 data_len,
- struct sock *skip_sk)
-{
- struct sk_buff *skb;
- struct mgmt_hdr *hdr;
-
- skb = alloc_skb(sizeof(*hdr) + data_len, GFP_KERNEL);
- if (!skb)
- return -ENOMEM;
-
- hdr = (void *) skb_put(skb, sizeof(*hdr));
- hdr->opcode = cpu_to_le16(event);
- if (hdev)
- hdr->index = cpu_to_le16(hdev->id);
- else
- hdr->index = cpu_to_le16(MGMT_INDEX_NONE);
- hdr->len = cpu_to_le16(data_len);
-
- if (data)
- memcpy(skb_put(skb, data_len), data, data_len);
-
- /* Time stamp */
- __net_timestamp(skb);
-
- hci_send_to_control(skb, skip_sk);
- kfree_skb(skb);
-
- return 0;
-}
-
static int new_settings(struct hci_dev *hdev, struct sock *skip)
{
__le32 ev;
@@ -1242,6 +1434,11 @@ static int new_settings(struct hci_dev *hdev, struct sock *skip)
return mgmt_event(MGMT_EV_NEW_SETTINGS, hdev, &ev, sizeof(ev), skip);
}
+int mgmt_new_settings(struct hci_dev *hdev)
+{
+ return new_settings(hdev, NULL);
+}
+
struct cmd_lookup {
struct sock *sk;
struct hci_dev *hdev;
@@ -1336,9 +1533,11 @@ static void set_discoverable_complete(struct hci_dev *hdev, u8 status)
/* When the discoverable mode gets changed, make sure
* that class of device has the limited discoverable
- * bit correctly set.
+ * bit correctly set. Also update page scan based on whitelist
+ * entries.
*/
hci_req_init(&req, hdev);
+ hci_update_page_scan(hdev, &req);
update_class(&req);
hci_req_run(&req, NULL);
@@ -1553,7 +1752,7 @@ static void set_connectable_complete(struct hci_dev *hdev, u8 status)
{
struct pending_cmd *cmd;
struct mgmt_mode *cp;
- bool changed;
+ bool conn_changed, discov_changed;
BT_DBG("status 0x%02x", status);
@@ -1570,15 +1769,26 @@ static void set_connectable_complete(struct hci_dev *hdev, u8 status)
}
cp = cmd->param;
- if (cp->val)
- changed = !test_and_set_bit(HCI_CONNECTABLE, &hdev->dev_flags);
- else
- changed = test_and_clear_bit(HCI_CONNECTABLE, &hdev->dev_flags);
+ if (cp->val) {
+ conn_changed = !test_and_set_bit(HCI_CONNECTABLE,
+ &hdev->dev_flags);
+ discov_changed = false;
+ } else {
+ conn_changed = test_and_clear_bit(HCI_CONNECTABLE,
+ &hdev->dev_flags);
+ discov_changed = test_and_clear_bit(HCI_DISCOVERABLE,
+ &hdev->dev_flags);
+ }
send_settings_rsp(cmd->sk, MGMT_OP_SET_CONNECTABLE, hdev);
- if (changed)
+ if (conn_changed || discov_changed) {
new_settings(hdev, cmd->sk);
+ hci_update_page_scan(hdev, NULL);
+ if (discov_changed)
+ mgmt_update_adv_data(hdev);
+ hci_update_background_scan(hdev);
+ }
remove_cmd:
mgmt_pending_remove(cmd);
@@ -1607,8 +1817,11 @@ static int set_connectable_update_settings(struct hci_dev *hdev,
if (err < 0)
return err;
- if (changed)
+ if (changed) {
+ hci_update_page_scan(hdev, NULL);
+ hci_update_background_scan(hdev);
return new_settings(hdev, sk);
+ }
return 0;
}
@@ -1669,7 +1882,18 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
if (cp->val) {
scan = SCAN_PAGE;
} else {
- scan = 0;
+ /* If we don't have any whitelist entries just
+ * disable all scanning. If there are entries
+ * and we had both page and inquiry scanning
+ * enabled then fall back to only page scanning.
+ * Otherwise no changes are needed.
+ */
+ if (list_empty(&hdev->whitelist))
+ scan = SCAN_DISABLED;
+ else if (test_bit(HCI_ISCAN, &hdev->flags))
+ scan = SCAN_PAGE;
+ else
+ goto no_scan_update;
if (test_bit(HCI_ISCAN, &hdev->flags) &&
hdev->discov_timeout > 0)
@@ -1679,6 +1903,7 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
}
+no_scan_update:
/* If we're going from non-connectable to connectable or
* vice-versa when fast connectable is enabled ensure that fast
* connectable gets disabled. write_fast_connectable won't do
@@ -1688,11 +1913,9 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
if (cp->val || test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags))
write_fast_connectable(&req, false);
- if (test_bit(HCI_ADVERTISING, &hdev->dev_flags) &&
- hci_conn_num(hdev, LE_LINK) == 0) {
- disable_advertising(&req);
+ /* Update the advertising parameters if necessary */
+ if (test_bit(HCI_ADVERTISING, &hdev->dev_flags))
enable_advertising(&req);
- }
err = hci_req_run(&req, set_connectable_complete);
if (err < 0) {
@@ -1708,7 +1931,7 @@ failed:
return err;
}
-static int set_pairable(struct sock *sk, struct hci_dev *hdev, void *data,
+static int set_bondable(struct sock *sk, struct hci_dev *hdev, void *data,
u16 len)
{
struct mgmt_mode *cp = data;
@@ -1718,17 +1941,17 @@ static int set_pairable(struct sock *sk, struct hci_dev *hdev, void *data,
BT_DBG("request for %s", hdev->name);
if (cp->val != 0x00 && cp->val != 0x01)
- return cmd_status(sk, hdev->id, MGMT_OP_SET_PAIRABLE,
+ return cmd_status(sk, hdev->id, MGMT_OP_SET_BONDABLE,
MGMT_STATUS_INVALID_PARAMS);
hci_dev_lock(hdev);
if (cp->val)
- changed = !test_and_set_bit(HCI_PAIRABLE, &hdev->dev_flags);
+ changed = !test_and_set_bit(HCI_BONDABLE, &hdev->dev_flags);
else
- changed = test_and_clear_bit(HCI_PAIRABLE, &hdev->dev_flags);
+ changed = test_and_clear_bit(HCI_BONDABLE, &hdev->dev_flags);
- err = send_settings_rsp(sk, MGMT_OP_SET_PAIRABLE, hdev);
+ err = send_settings_rsp(sk, MGMT_OP_SET_BONDABLE, hdev);
if (err < 0)
goto unlock;
@@ -1877,6 +2100,10 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
goto failed;
}
+ if (!cp->val && test_bit(HCI_USE_DEBUG_KEYS, &hdev->dev_flags))
+ hci_send_cmd(hdev, HCI_OP_WRITE_SSP_DEBUG_MODE,
+ sizeof(cp->val), &cp->val);
+
err = hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, 1, &cp->val);
if (err < 0) {
mgmt_pending_remove(cmd);
@@ -1973,6 +2200,8 @@ static void le_enable_complete(struct hci_dev *hdev, u8 status)
update_scan_rsp_data(&req);
hci_req_run(&req, NULL);
+ hci_update_background_scan(hdev);
+
hci_dev_unlock(hdev);
}
}
@@ -2048,9 +2277,9 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
if (val) {
hci_cp.le = val;
- hci_cp.simul = lmp_le_br_capable(hdev);
+ hci_cp.simul = 0x00;
} else {
- if (test_bit(HCI_ADVERTISING, &hdev->dev_flags))
+ if (test_bit(HCI_LE_ADV, &hdev->dev_flags))
disable_advertising(&req);
}
@@ -2373,6 +2602,8 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
u16 len)
{
struct mgmt_cp_load_link_keys *cp = data;
+ const u16 max_key_count = ((U16_MAX - sizeof(*cp)) /
+ sizeof(struct mgmt_link_key_info));
u16 key_count, expected_len;
bool changed;
int i;
@@ -2384,6 +2615,12 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
MGMT_STATUS_NOT_SUPPORTED);
key_count = __le16_to_cpu(cp->key_count);
+ if (key_count > max_key_count) {
+ BT_ERR("load_link_keys: too big key_count value %u",
+ key_count);
+ return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS,
+ MGMT_STATUS_INVALID_PARAMS);
+ }
expected_len = sizeof(*cp) + key_count *
sizeof(struct mgmt_link_key_info);
@@ -2414,9 +2651,11 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
hci_link_keys_clear(hdev);
if (cp->debug_keys)
- changed = !test_and_set_bit(HCI_DEBUG_KEYS, &hdev->dev_flags);
+ changed = !test_and_set_bit(HCI_KEEP_DEBUG_KEYS,
+ &hdev->dev_flags);
else
- changed = test_and_clear_bit(HCI_DEBUG_KEYS, &hdev->dev_flags);
+ changed = test_and_clear_bit(HCI_KEEP_DEBUG_KEYS,
+ &hdev->dev_flags);
if (changed)
new_settings(hdev, NULL);
@@ -2424,8 +2663,14 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
for (i = 0; i < key_count; i++) {
struct mgmt_link_key_info *key = &cp->keys[i];
- hci_add_link_key(hdev, NULL, 0, &key->addr.bdaddr, key->val,
- key->type, key->pin_len);
+ /* Always ignore debug keys and require a new pairing if
+ * the user wants to use them.
+ */
+ if (key->type == HCI_LK_DEBUG_COMBINATION)
+ continue;
+
+ hci_add_link_key(hdev, NULL, &key->addr.bdaddr, key->val,
+ key->type, key->pin_len, NULL);
}
cmd_complete(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, 0, NULL, 0);
@@ -2543,7 +2788,6 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data,
{
struct mgmt_cp_disconnect *cp = data;
struct mgmt_rp_disconnect rp;
- struct hci_cp_disconnect dc;
struct pending_cmd *cmd;
struct hci_conn *conn;
int err;
@@ -2591,10 +2835,7 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data,
goto failed;
}
- dc.handle = cpu_to_le16(conn->handle);
- dc.reason = HCI_ERROR_REMOTE_USER_TERM;
-
- err = hci_send_cmd(hdev, HCI_OP_DISCONNECT, sizeof(dc), &dc);
+ err = hci_disconnect(conn, HCI_ERROR_REMOTE_USER_TERM);
if (err < 0)
mgmt_pending_remove(cmd);
@@ -2766,6 +3007,10 @@ static int set_io_capability(struct sock *sk, struct hci_dev *hdev, void *data,
BT_DBG("");
+ if (cp->io_capability > SMP_IO_KEYBOARD_DISPLAY)
+ return cmd_complete(sk, hdev->id, MGMT_OP_SET_IO_CAPABILITY,
+ MGMT_STATUS_INVALID_PARAMS, NULL, 0);
+
hci_dev_lock(hdev);
hdev->io_capability = cp->io_capability;
@@ -2814,6 +3059,7 @@ static void pairing_complete(struct pending_cmd *cmd, u8 status)
conn->disconn_cfm_cb = NULL;
hci_conn_drop(conn);
+ hci_conn_put(conn);
mgmt_pending_remove(cmd);
}
@@ -2878,6 +3124,11 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
MGMT_STATUS_INVALID_PARAMS,
&rp, sizeof(rp));
+ if (cp->io_cap > SMP_IO_KEYBOARD_DISPLAY)
+ return cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE,
+ MGMT_STATUS_INVALID_PARAMS,
+ &rp, sizeof(rp));
+
hci_dev_lock(hdev);
if (!hdev_is_powered(hdev)) {
@@ -2902,8 +3153,20 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
else
addr_type = ADDR_LE_DEV_RANDOM;
+ /* When pairing a new device, it is expected to remember
+ * this device for future connections. Adding the connection
+ * parameter information ahead of time allows tracking
+ * of the slave preferred values and will speed up any
+ * further connection establishment.
+ *
+ * If connection parameters already exist, then they
+ * will be kept and this function does nothing.
+ */
+ hci_conn_params_add(hdev, &cp->addr.bdaddr, addr_type);
+
conn = hci_connect_le(hdev, &cp->addr.bdaddr, addr_type,
- sec_level, auth_type);
+ sec_level, HCI_LE_CONN_TIMEOUT,
+ HCI_ROLE_MASTER);
}
if (IS_ERR(conn)) {
@@ -2946,10 +3209,10 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
}
conn->io_capability = cp->io_cap;
- cmd->user_data = conn;
+ cmd->user_data = hci_conn_get(conn);
- if (conn->state == BT_CONNECTED &&
- hci_conn_security(conn, sec_level, auth_type))
+ if ((conn->state == BT_CONNECTED || conn->state == BT_CONFIG) &&
+ hci_conn_security(conn, sec_level, auth_type, true))
pairing_complete(cmd, 0);
err = 0;
@@ -3031,14 +3294,7 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev,
}
if (addr->type == BDADDR_LE_PUBLIC || addr->type == BDADDR_LE_RANDOM) {
- /* Continue with pairing via SMP. The hdev lock must be
- * released as SMP may try to recquire it for crypto
- * purposes.
- */
- hci_dev_unlock(hdev);
err = smp_user_confirm_reply(conn, mgmt_op, passkey);
- hci_dev_lock(hdev);
-
if (!err)
err = cmd_complete(sk, hdev->id, mgmt_op,
MGMT_STATUS_SUCCESS, addr,
@@ -3516,11 +3772,21 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev,
goto failed;
}
- if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) {
- err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY,
- MGMT_STATUS_REJECTED);
- mgmt_pending_remove(cmd);
- goto failed;
+ if (test_bit(HCI_LE_ADV, &hdev->dev_flags)) {
+ /* Don't let discovery abort an outgoing
+ * connection attempt that's using directed
+ * advertising.
+ */
+ if (hci_conn_hash_lookup_state(hdev, LE_LINK,
+ BT_CONNECT)) {
+ err = cmd_status(sk, hdev->id,
+ MGMT_OP_START_DISCOVERY,
+ MGMT_STATUS_REJECTED);
+ mgmt_pending_remove(cmd);
+ goto failed;
+ }
+
+ disable_advertising(&req);
}
/* If controller is scanning, it means the background scanning
@@ -3723,12 +3989,18 @@ static int block_device(struct sock *sk, struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
- err = hci_blacklist_add(hdev, &cp->addr.bdaddr, cp->addr.type);
- if (err < 0)
+ err = hci_bdaddr_list_add(&hdev->blacklist, &cp->addr.bdaddr,
+ cp->addr.type);
+ if (err < 0) {
status = MGMT_STATUS_FAILED;
- else
- status = MGMT_STATUS_SUCCESS;
+ goto done;
+ }
+
+ mgmt_event(MGMT_EV_DEVICE_BLOCKED, hdev, &cp->addr, sizeof(cp->addr),
+ sk);
+ status = MGMT_STATUS_SUCCESS;
+done:
err = cmd_complete(sk, hdev->id, MGMT_OP_BLOCK_DEVICE, status,
&cp->addr, sizeof(cp->addr));
@@ -3753,12 +4025,18 @@ static int unblock_device(struct sock *sk, struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
- err = hci_blacklist_del(hdev, &cp->addr.bdaddr, cp->addr.type);
- if (err < 0)
+ err = hci_bdaddr_list_del(&hdev->blacklist, &cp->addr.bdaddr,
+ cp->addr.type);
+ if (err < 0) {
status = MGMT_STATUS_INVALID_PARAMS;
- else
- status = MGMT_STATUS_SUCCESS;
+ goto done;
+ }
+ mgmt_event(MGMT_EV_DEVICE_UNBLOCKED, hdev, &cp->addr, sizeof(cp->addr),
+ sk);
+ status = MGMT_STATUS_SUCCESS;
+
+done:
err = cmd_complete(sk, hdev->id, MGMT_OP_UNBLOCK_DEVICE, status,
&cp->addr, sizeof(cp->addr));
@@ -3813,6 +4091,11 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status)
return;
}
+ if (test_bit(HCI_LE_ADV, &hdev->dev_flags))
+ set_bit(HCI_ADVERTISING, &hdev->dev_flags);
+ else
+ clear_bit(HCI_ADVERTISING, &hdev->dev_flags);
+
mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, settings_rsp,
&match);
@@ -3853,7 +4136,9 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data,
* necessary).
*/
if (!hdev_is_powered(hdev) || val == enabled ||
- hci_conn_num(hdev, LE_LINK) > 0) {
+ hci_conn_num(hdev, LE_LINK) > 0 ||
+ (test_bit(HCI_LE_SCAN, &hdev->dev_flags) &&
+ hdev->le_scan_type == LE_SCAN_ACTIVE)) {
bool changed = false;
if (val != test_bit(HCI_ADVERTISING, &hdev->dev_flags)) {
@@ -4094,26 +4379,6 @@ unlock:
return err;
}
-static void set_bredr_scan(struct hci_request *req)
-{
- struct hci_dev *hdev = req->hdev;
- u8 scan = 0;
-
- /* Ensure that fast connectable is disabled. This function will
- * not do anything if the page scan parameters are already what
- * they should be.
- */
- write_fast_connectable(req, false);
-
- if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags))
- scan |= SCAN_PAGE;
- if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags))
- scan |= SCAN_INQUIRY;
-
- if (scan)
- hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
-}
-
static void set_bredr_complete(struct hci_dev *hdev, u8 status)
{
struct pending_cmd *cmd;
@@ -4219,8 +4484,8 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
hci_req_init(&req, hdev);
- if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags))
- set_bredr_scan(&req);
+ write_fast_connectable(&req, false);
+ hci_update_page_scan(hdev, &req);
/* Since only the advertising data flags will change, there
* is no need to update the scan response data.
@@ -4252,7 +4517,7 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev,
status);
if (!lmp_sc_capable(hdev) &&
- !test_bit(HCI_FORCE_SC, &hdev->dev_flags))
+ !test_bit(HCI_FORCE_SC, &hdev->dbg_flags))
return cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN,
MGMT_STATUS_NOT_SUPPORTED);
@@ -4328,21 +4593,37 @@ static int set_debug_keys(struct sock *sk, struct hci_dev *hdev,
void *data, u16 len)
{
struct mgmt_mode *cp = data;
- bool changed;
+ bool changed, use_changed;
int err;
BT_DBG("request for %s", hdev->name);
- if (cp->val != 0x00 && cp->val != 0x01)
+ if (cp->val != 0x00 && cp->val != 0x01 && cp->val != 0x02)
return cmd_status(sk, hdev->id, MGMT_OP_SET_DEBUG_KEYS,
MGMT_STATUS_INVALID_PARAMS);
hci_dev_lock(hdev);
if (cp->val)
- changed = !test_and_set_bit(HCI_DEBUG_KEYS, &hdev->dev_flags);
+ changed = !test_and_set_bit(HCI_KEEP_DEBUG_KEYS,
+ &hdev->dev_flags);
else
- changed = test_and_clear_bit(HCI_DEBUG_KEYS, &hdev->dev_flags);
+ changed = test_and_clear_bit(HCI_KEEP_DEBUG_KEYS,
+ &hdev->dev_flags);
+
+ if (cp->val == 0x02)
+ use_changed = !test_and_set_bit(HCI_USE_DEBUG_KEYS,
+ &hdev->dev_flags);
+ else
+ use_changed = test_and_clear_bit(HCI_USE_DEBUG_KEYS,
+ &hdev->dev_flags);
+
+ if (hdev_is_powered(hdev) && use_changed &&
+ test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) {
+ u8 mode = (cp->val == 0x02) ? 0x01 : 0x00;
+ hci_send_cmd(hdev, HCI_OP_WRITE_SSP_DEBUG_MODE,
+ sizeof(mode), &mode);
+ }
err = send_settings_rsp(sk, MGMT_OP_SET_DEBUG_KEYS, hdev);
if (err < 0)
@@ -4426,6 +4707,8 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data,
u16 len)
{
struct mgmt_cp_load_irks *cp = cp_data;
+ const u16 max_irk_count = ((U16_MAX - sizeof(*cp)) /
+ sizeof(struct mgmt_irk_info));
u16 irk_count, expected_len;
int i, err;
@@ -4436,6 +4719,11 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data,
MGMT_STATUS_NOT_SUPPORTED);
irk_count = __le16_to_cpu(cp->irk_count);
+ if (irk_count > max_irk_count) {
+ BT_ERR("load_irks: too big irk_count value %u", irk_count);
+ return cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS,
+ MGMT_STATUS_INVALID_PARAMS);
+ }
expected_len = sizeof(*cp) + irk_count * sizeof(struct mgmt_irk_info);
if (expected_len != len) {
@@ -4505,6 +4793,8 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
void *cp_data, u16 len)
{
struct mgmt_cp_load_long_term_keys *cp = cp_data;
+ const u16 max_key_count = ((U16_MAX - sizeof(*cp)) /
+ sizeof(struct mgmt_ltk_info));
u16 key_count, expected_len;
int i, err;
@@ -4515,6 +4805,11 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
MGMT_STATUS_NOT_SUPPORTED);
key_count = __le16_to_cpu(cp->key_count);
+ if (key_count > max_key_count) {
+ BT_ERR("load_ltks: too big key_count value %u", key_count);
+ return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS,
+ MGMT_STATUS_INVALID_PARAMS);
+ }
expected_len = sizeof(*cp) + key_count *
sizeof(struct mgmt_ltk_info);
@@ -4550,9 +4845,9 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
addr_type = ADDR_LE_DEV_RANDOM;
if (key->master)
- type = HCI_SMP_LTK;
+ type = SMP_LTK;
else
- type = HCI_SMP_LTK_SLAVE;
+ type = SMP_LTK_SLAVE;
switch (key->type) {
case MGMT_LTK_UNAUTHENTICATED:
@@ -4616,6 +4911,7 @@ static void get_conn_info_complete(struct pending_cmd *cmd, void *data)
match->mgmt_status, &rp, sizeof(rp));
hci_conn_drop(conn);
+ hci_conn_put(conn);
mgmt_pending_remove(cmd);
}
@@ -4772,7 +5068,7 @@ static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data,
}
hci_conn_hold(conn);
- cmd->user_data = conn;
+ cmd->user_data = hci_conn_get(conn);
conn->conn_info_timestamp = jiffies;
} else {
@@ -4790,6 +5086,536 @@ unlock:
return err;
}
+static void get_clock_info_complete(struct hci_dev *hdev, u8 status)
+{
+ struct mgmt_cp_get_clock_info *cp;
+ struct mgmt_rp_get_clock_info rp;
+ struct hci_cp_read_clock *hci_cp;
+ struct pending_cmd *cmd;
+ struct hci_conn *conn;
+
+ BT_DBG("%s status %u", hdev->name, status);
+
+ hci_dev_lock(hdev);
+
+ hci_cp = hci_sent_cmd_data(hdev, HCI_OP_READ_CLOCK);
+ if (!hci_cp)
+ goto unlock;
+
+ if (hci_cp->which) {
+ u16 handle = __le16_to_cpu(hci_cp->handle);
+ conn = hci_conn_hash_lookup_handle(hdev, handle);
+ } else {
+ conn = NULL;
+ }
+
+ cmd = mgmt_pending_find_data(MGMT_OP_GET_CLOCK_INFO, hdev, conn);
+ if (!cmd)
+ goto unlock;
+
+ cp = cmd->param;
+
+ memset(&rp, 0, sizeof(rp));
+ memcpy(&rp.addr, &cp->addr, sizeof(rp.addr));
+
+ if (status)
+ goto send_rsp;
+
+ rp.local_clock = cpu_to_le32(hdev->clock);
+
+ if (conn) {
+ rp.piconet_clock = cpu_to_le32(conn->clock);
+ rp.accuracy = cpu_to_le16(conn->clock_accuracy);
+ }
+
+send_rsp:
+ cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(status),
+ &rp, sizeof(rp));
+ mgmt_pending_remove(cmd);
+ if (conn) {
+ hci_conn_drop(conn);
+ hci_conn_put(conn);
+ }
+
+unlock:
+ hci_dev_unlock(hdev);
+}
+
+static int get_clock_info(struct sock *sk, struct hci_dev *hdev, void *data,
+ u16 len)
+{
+ struct mgmt_cp_get_clock_info *cp = data;
+ struct mgmt_rp_get_clock_info rp;
+ struct hci_cp_read_clock hci_cp;
+ struct pending_cmd *cmd;
+ struct hci_request req;
+ struct hci_conn *conn;
+ int err;
+
+ BT_DBG("%s", hdev->name);
+
+ memset(&rp, 0, sizeof(rp));
+ bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr);
+ rp.addr.type = cp->addr.type;
+
+ if (cp->addr.type != BDADDR_BREDR)
+ return cmd_complete(sk, hdev->id, MGMT_OP_GET_CLOCK_INFO,
+ MGMT_STATUS_INVALID_PARAMS,
+ &rp, sizeof(rp));
+
+ hci_dev_lock(hdev);
+
+ if (!hdev_is_powered(hdev)) {
+ err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CLOCK_INFO,
+ MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp));
+ goto unlock;
+ }
+
+ if (bacmp(&cp->addr.bdaddr, BDADDR_ANY)) {
+ conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK,
+ &cp->addr.bdaddr);
+ if (!conn || conn->state != BT_CONNECTED) {
+ err = cmd_complete(sk, hdev->id,
+ MGMT_OP_GET_CLOCK_INFO,
+ MGMT_STATUS_NOT_CONNECTED,
+ &rp, sizeof(rp));
+ goto unlock;
+ }
+ } else {
+ conn = NULL;
+ }
+
+ cmd = mgmt_pending_add(sk, MGMT_OP_GET_CLOCK_INFO, hdev, data, len);
+ if (!cmd) {
+ err = -ENOMEM;
+ goto unlock;
+ }
+
+ hci_req_init(&req, hdev);
+
+ memset(&hci_cp, 0, sizeof(hci_cp));
+ hci_req_add(&req, HCI_OP_READ_CLOCK, sizeof(hci_cp), &hci_cp);
+
+ if (conn) {
+ hci_conn_hold(conn);
+ cmd->user_data = hci_conn_get(conn);
+
+ hci_cp.handle = cpu_to_le16(conn->handle);
+ hci_cp.which = 0x01; /* Piconet clock */
+ hci_req_add(&req, HCI_OP_READ_CLOCK, sizeof(hci_cp), &hci_cp);
+ }
+
+ err = hci_req_run(&req, get_clock_info_complete);
+ if (err < 0)
+ mgmt_pending_remove(cmd);
+
+unlock:
+ hci_dev_unlock(hdev);
+ return err;
+}
+
+static void device_added(struct sock *sk, struct hci_dev *hdev,
+ bdaddr_t *bdaddr, u8 type, u8 action)
+{
+ struct mgmt_ev_device_added ev;
+
+ bacpy(&ev.addr.bdaddr, bdaddr);
+ ev.addr.type = type;
+ ev.action = action;
+
+ mgmt_event(MGMT_EV_DEVICE_ADDED, hdev, &ev, sizeof(ev), sk);
+}
+
+static int add_device(struct sock *sk, struct hci_dev *hdev,
+ void *data, u16 len)
+{
+ struct mgmt_cp_add_device *cp = data;
+ u8 auto_conn, addr_type;
+ int err;
+
+ BT_DBG("%s", hdev->name);
+
+ if (!bdaddr_type_is_valid(cp->addr.type) ||
+ !bacmp(&cp->addr.bdaddr, BDADDR_ANY))
+ return cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE,
+ MGMT_STATUS_INVALID_PARAMS,
+ &cp->addr, sizeof(cp->addr));
+
+ if (cp->action != 0x00 && cp->action != 0x01 && cp->action != 0x02)
+ return cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE,
+ MGMT_STATUS_INVALID_PARAMS,
+ &cp->addr, sizeof(cp->addr));
+
+ hci_dev_lock(hdev);
+
+ if (cp->addr.type == BDADDR_BREDR) {
+ /* Only incoming connections action is supported for now */
+ if (cp->action != 0x01) {
+ err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE,
+ MGMT_STATUS_INVALID_PARAMS,
+ &cp->addr, sizeof(cp->addr));
+ goto unlock;
+ }
+
+ err = hci_bdaddr_list_add(&hdev->whitelist, &cp->addr.bdaddr,
+ cp->addr.type);
+ if (err)
+ goto unlock;
+
+ hci_update_page_scan(hdev, NULL);
+
+ goto added;
+ }
+
+ if (cp->addr.type == BDADDR_LE_PUBLIC)
+ addr_type = ADDR_LE_DEV_PUBLIC;
+ else
+ addr_type = ADDR_LE_DEV_RANDOM;
+
+ if (cp->action == 0x02)
+ auto_conn = HCI_AUTO_CONN_ALWAYS;
+ else if (cp->action == 0x01)
+ auto_conn = HCI_AUTO_CONN_DIRECT;
+ else
+ auto_conn = HCI_AUTO_CONN_REPORT;
+
+ /* If the connection parameters don't exist for this device,
+ * they will be created and configured with defaults.
+ */
+ if (hci_conn_params_set(hdev, &cp->addr.bdaddr, addr_type,
+ auto_conn) < 0) {
+ err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE,
+ MGMT_STATUS_FAILED,
+ &cp->addr, sizeof(cp->addr));
+ goto unlock;
+ }
+
+added:
+ device_added(sk, hdev, &cp->addr.bdaddr, cp->addr.type, cp->action);
+
+ err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE,
+ MGMT_STATUS_SUCCESS, &cp->addr, sizeof(cp->addr));
+
+unlock:
+ hci_dev_unlock(hdev);
+ return err;
+}
+
+static void device_removed(struct sock *sk, struct hci_dev *hdev,
+ bdaddr_t *bdaddr, u8 type)
+{
+ struct mgmt_ev_device_removed ev;
+
+ bacpy(&ev.addr.bdaddr, bdaddr);
+ ev.addr.type = type;
+
+ mgmt_event(MGMT_EV_DEVICE_REMOVED, hdev, &ev, sizeof(ev), sk);
+}
+
+static int remove_device(struct sock *sk, struct hci_dev *hdev,
+ void *data, u16 len)
+{
+ struct mgmt_cp_remove_device *cp = data;
+ int err;
+
+ BT_DBG("%s", hdev->name);
+
+ hci_dev_lock(hdev);
+
+ if (bacmp(&cp->addr.bdaddr, BDADDR_ANY)) {
+ struct hci_conn_params *params;
+ u8 addr_type;
+
+ if (!bdaddr_type_is_valid(cp->addr.type)) {
+ err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE,
+ MGMT_STATUS_INVALID_PARAMS,
+ &cp->addr, sizeof(cp->addr));
+ goto unlock;
+ }
+
+ if (cp->addr.type == BDADDR_BREDR) {
+ err = hci_bdaddr_list_del(&hdev->whitelist,
+ &cp->addr.bdaddr,
+ cp->addr.type);
+ if (err) {
+ err = cmd_complete(sk, hdev->id,
+ MGMT_OP_REMOVE_DEVICE,
+ MGMT_STATUS_INVALID_PARAMS,
+ &cp->addr, sizeof(cp->addr));
+ goto unlock;
+ }
+
+ hci_update_page_scan(hdev, NULL);
+
+ device_removed(sk, hdev, &cp->addr.bdaddr,
+ cp->addr.type);
+ goto complete;
+ }
+
+ if (cp->addr.type == BDADDR_LE_PUBLIC)
+ addr_type = ADDR_LE_DEV_PUBLIC;
+ else
+ addr_type = ADDR_LE_DEV_RANDOM;
+
+ params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr,
+ addr_type);
+ if (!params) {
+ err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE,
+ MGMT_STATUS_INVALID_PARAMS,
+ &cp->addr, sizeof(cp->addr));
+ goto unlock;
+ }
+
+ if (params->auto_connect == HCI_AUTO_CONN_DISABLED) {
+ err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE,
+ MGMT_STATUS_INVALID_PARAMS,
+ &cp->addr, sizeof(cp->addr));
+ goto unlock;
+ }
+
+ list_del(&params->action);
+ list_del(&params->list);
+ kfree(params);
+ hci_update_background_scan(hdev);
+
+ device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type);
+ } else {
+ struct hci_conn_params *p, *tmp;
+ struct bdaddr_list *b, *btmp;
+
+ if (cp->addr.type) {
+ err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE,
+ MGMT_STATUS_INVALID_PARAMS,
+ &cp->addr, sizeof(cp->addr));
+ goto unlock;
+ }
+
+ list_for_each_entry_safe(b, btmp, &hdev->whitelist, list) {
+ device_removed(sk, hdev, &b->bdaddr, b->bdaddr_type);
+ list_del(&b->list);
+ kfree(b);
+ }
+
+ hci_update_page_scan(hdev, NULL);
+
+ list_for_each_entry_safe(p, tmp, &hdev->le_conn_params, list) {
+ if (p->auto_connect == HCI_AUTO_CONN_DISABLED)
+ continue;
+ device_removed(sk, hdev, &p->addr, p->addr_type);
+ list_del(&p->action);
+ list_del(&p->list);
+ kfree(p);
+ }
+
+ BT_DBG("All LE connection parameters were removed");
+
+ hci_update_background_scan(hdev);
+ }
+
+complete:
+ err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE,
+ MGMT_STATUS_SUCCESS, &cp->addr, sizeof(cp->addr));
+
+unlock:
+ hci_dev_unlock(hdev);
+ return err;
+}
+
+static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
+ u16 len)
+{
+ struct mgmt_cp_load_conn_param *cp = data;
+ const u16 max_param_count = ((U16_MAX - sizeof(*cp)) /
+ sizeof(struct mgmt_conn_param));
+ u16 param_count, expected_len;
+ int i;
+
+ if (!lmp_le_capable(hdev))
+ return cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM,
+ MGMT_STATUS_NOT_SUPPORTED);
+
+ param_count = __le16_to_cpu(cp->param_count);
+ if (param_count > max_param_count) {
+ BT_ERR("load_conn_param: too big param_count value %u",
+ param_count);
+ return cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM,
+ MGMT_STATUS_INVALID_PARAMS);
+ }
+
+ expected_len = sizeof(*cp) + param_count *
+ sizeof(struct mgmt_conn_param);
+ if (expected_len != len) {
+ BT_ERR("load_conn_param: expected %u bytes, got %u bytes",
+ expected_len, len);
+ return cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM,
+ MGMT_STATUS_INVALID_PARAMS);
+ }
+
+ BT_DBG("%s param_count %u", hdev->name, param_count);
+
+ hci_dev_lock(hdev);
+
+ hci_conn_params_clear_disabled(hdev);
+
+ for (i = 0; i < param_count; i++) {
+ struct mgmt_conn_param *param = &cp->params[i];
+ struct hci_conn_params *hci_param;
+ u16 min, max, latency, timeout;
+ u8 addr_type;
+
+ BT_DBG("Adding %pMR (type %u)", &param->addr.bdaddr,
+ param->addr.type);
+
+ if (param->addr.type == BDADDR_LE_PUBLIC) {
+ addr_type = ADDR_LE_DEV_PUBLIC;
+ } else if (param->addr.type == BDADDR_LE_RANDOM) {
+ addr_type = ADDR_LE_DEV_RANDOM;
+ } else {
+ BT_ERR("Ignoring invalid connection parameters");
+ continue;
+ }
+
+ min = le16_to_cpu(param->min_interval);
+ max = le16_to_cpu(param->max_interval);
+ latency = le16_to_cpu(param->latency);
+ timeout = le16_to_cpu(param->timeout);
+
+ BT_DBG("min 0x%04x max 0x%04x latency 0x%04x timeout 0x%04x",
+ min, max, latency, timeout);
+
+ if (hci_check_conn_params(min, max, latency, timeout) < 0) {
+ BT_ERR("Ignoring invalid connection parameters");
+ continue;
+ }
+
+ hci_param = hci_conn_params_add(hdev, &param->addr.bdaddr,
+ addr_type);
+ if (!hci_param) {
+ BT_ERR("Failed to add connection parameters");
+ continue;
+ }
+
+ hci_param->conn_min_interval = min;
+ hci_param->conn_max_interval = max;
+ hci_param->conn_latency = latency;
+ hci_param->supervision_timeout = timeout;
+ }
+
+ hci_dev_unlock(hdev);
+
+ return cmd_complete(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, 0, NULL, 0);
+}
+
+static int set_external_config(struct sock *sk, struct hci_dev *hdev,
+ void *data, u16 len)
+{
+ struct mgmt_cp_set_external_config *cp = data;
+ bool changed;
+ int err;
+
+ BT_DBG("%s", hdev->name);
+
+ if (hdev_is_powered(hdev))
+ return cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG,
+ MGMT_STATUS_REJECTED);
+
+ if (cp->config != 0x00 && cp->config != 0x01)
+ return cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ if (!test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks))
+ return cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG,
+ MGMT_STATUS_NOT_SUPPORTED);
+
+ hci_dev_lock(hdev);
+
+ if (cp->config)
+ changed = !test_and_set_bit(HCI_EXT_CONFIGURED,
+ &hdev->dev_flags);
+ else
+ changed = test_and_clear_bit(HCI_EXT_CONFIGURED,
+ &hdev->dev_flags);
+
+ err = send_options_rsp(sk, MGMT_OP_SET_EXTERNAL_CONFIG, hdev);
+ if (err < 0)
+ goto unlock;
+
+ if (!changed)
+ goto unlock;
+
+ err = new_options(hdev, sk);
+
+ if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) == is_configured(hdev)) {
+ mgmt_index_removed(hdev);
+
+ if (test_and_change_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) {
+ set_bit(HCI_CONFIG, &hdev->dev_flags);
+ set_bit(HCI_AUTO_OFF, &hdev->dev_flags);
+
+ queue_work(hdev->req_workqueue, &hdev->power_on);
+ } else {
+ set_bit(HCI_RAW, &hdev->flags);
+ mgmt_index_added(hdev);
+ }
+ }
+
+unlock:
+ hci_dev_unlock(hdev);
+ return err;
+}
+
+static int set_public_address(struct sock *sk, struct hci_dev *hdev,
+ void *data, u16 len)
+{
+ struct mgmt_cp_set_public_address *cp = data;
+ bool changed;
+ int err;
+
+ BT_DBG("%s", hdev->name);
+
+ if (hdev_is_powered(hdev))
+ return cmd_status(sk, hdev->id, MGMT_OP_SET_PUBLIC_ADDRESS,
+ MGMT_STATUS_REJECTED);
+
+ if (!bacmp(&cp->bdaddr, BDADDR_ANY))
+ return cmd_status(sk, hdev->id, MGMT_OP_SET_PUBLIC_ADDRESS,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ if (!hdev->set_bdaddr)
+ return cmd_status(sk, hdev->id, MGMT_OP_SET_PUBLIC_ADDRESS,
+ MGMT_STATUS_NOT_SUPPORTED);
+
+ hci_dev_lock(hdev);
+
+ changed = !!bacmp(&hdev->public_addr, &cp->bdaddr);
+ bacpy(&hdev->public_addr, &cp->bdaddr);
+
+ err = send_options_rsp(sk, MGMT_OP_SET_PUBLIC_ADDRESS, hdev);
+ if (err < 0)
+ goto unlock;
+
+ if (!changed)
+ goto unlock;
+
+ if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags))
+ err = new_options(hdev, sk);
+
+ if (is_configured(hdev)) {
+ mgmt_index_removed(hdev);
+
+ clear_bit(HCI_UNCONFIGURED, &hdev->dev_flags);
+
+ set_bit(HCI_CONFIG, &hdev->dev_flags);
+ set_bit(HCI_AUTO_OFF, &hdev->dev_flags);
+
+ queue_work(hdev->req_workqueue, &hdev->power_on);
+ }
+
+unlock:
+ hci_dev_unlock(hdev);
+ return err;
+}
+
static const struct mgmt_handler {
int (*func) (struct sock *sk, struct hci_dev *hdev, void *data,
u16 data_len);
@@ -4805,7 +5631,7 @@ static const struct mgmt_handler {
{ set_discoverable, false, MGMT_SET_DISCOVERABLE_SIZE },
{ set_connectable, false, MGMT_SETTING_SIZE },
{ set_fast_connectable, false, MGMT_SETTING_SIZE },
- { set_pairable, false, MGMT_SETTING_SIZE },
+ { set_bondable, false, MGMT_SETTING_SIZE },
{ set_link_security, false, MGMT_SETTING_SIZE },
{ set_ssp, false, MGMT_SETTING_SIZE },
{ set_hs, false, MGMT_SETTING_SIZE },
@@ -4846,9 +5672,16 @@ static const struct mgmt_handler {
{ set_privacy, false, MGMT_SET_PRIVACY_SIZE },
{ load_irks, true, MGMT_LOAD_IRKS_SIZE },
{ get_conn_info, false, MGMT_GET_CONN_INFO_SIZE },
+ { get_clock_info, false, MGMT_GET_CLOCK_INFO_SIZE },
+ { add_device, false, MGMT_ADD_DEVICE_SIZE },
+ { remove_device, false, MGMT_REMOVE_DEVICE_SIZE },
+ { load_conn_param, true, MGMT_LOAD_CONN_PARAM_SIZE },
+ { read_unconf_index_list, false, MGMT_READ_UNCONF_INDEX_LIST_SIZE },
+ { read_config_info, false, MGMT_READ_CONFIG_INFO_SIZE },
+ { set_external_config, false, MGMT_SET_EXTERNAL_CONFIG_SIZE },
+ { set_public_address, false, MGMT_SET_PUBLIC_ADDRESS_SIZE },
};
-
int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen)
{
void *buf;
@@ -4892,11 +5725,21 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen)
}
if (test_bit(HCI_SETUP, &hdev->dev_flags) ||
+ test_bit(HCI_CONFIG, &hdev->dev_flags) ||
test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) {
err = cmd_status(sk, index, opcode,
MGMT_STATUS_INVALID_INDEX);
goto done;
}
+
+ if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) &&
+ opcode != MGMT_OP_READ_CONFIG_INFO &&
+ opcode != MGMT_OP_SET_EXTERNAL_CONFIG &&
+ opcode != MGMT_OP_SET_PUBLIC_ADDRESS) {
+ err = cmd_status(sk, index, opcode,
+ MGMT_STATUS_INVALID_INDEX);
+ goto done;
+ }
}
if (opcode >= ARRAY_SIZE(mgmt_handlers) ||
@@ -4907,8 +5750,15 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen)
goto done;
}
- if ((hdev && opcode < MGMT_OP_READ_INFO) ||
- (!hdev && opcode >= MGMT_OP_READ_INFO)) {
+ if (hdev && (opcode <= MGMT_OP_READ_INDEX_LIST ||
+ opcode == MGMT_OP_READ_UNCONF_INDEX_LIST)) {
+ err = cmd_status(sk, index, opcode,
+ MGMT_STATUS_INVALID_INDEX);
+ goto done;
+ }
+
+ if (!hdev && (opcode > MGMT_OP_READ_INDEX_LIST &&
+ opcode != MGMT_OP_READ_UNCONF_INDEX_LIST)) {
err = cmd_status(sk, index, opcode,
MGMT_STATUS_INVALID_INDEX);
goto done;
@@ -4947,7 +5797,13 @@ void mgmt_index_added(struct hci_dev *hdev)
if (hdev->dev_type != HCI_BREDR)
return;
- mgmt_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0, NULL);
+ if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
+ return;
+
+ if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags))
+ mgmt_event(MGMT_EV_UNCONF_INDEX_ADDED, hdev, NULL, 0, NULL);
+ else
+ mgmt_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0, NULL);
}
void mgmt_index_removed(struct hci_dev *hdev)
@@ -4957,20 +5813,42 @@ void mgmt_index_removed(struct hci_dev *hdev)
if (hdev->dev_type != HCI_BREDR)
return;
+ if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
+ return;
+
mgmt_pending_foreach(0, hdev, cmd_status_rsp, &status);
- mgmt_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0, NULL);
+ if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags))
+ mgmt_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, NULL, 0, NULL);
+ else
+ mgmt_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0, NULL);
}
/* This function requires the caller holds hdev->lock */
-static void restart_le_auto_conns(struct hci_dev *hdev)
+static void restart_le_actions(struct hci_dev *hdev)
{
struct hci_conn_params *p;
list_for_each_entry(p, &hdev->le_conn_params, list) {
- if (p->auto_connect == HCI_AUTO_CONN_ALWAYS)
- hci_pend_le_conn_add(hdev, &p->addr, p->addr_type);
+ /* Needed for AUTO_OFF case where might not "really"
+ * have been powered off.
+ */
+ list_del_init(&p->action);
+
+ switch (p->auto_connect) {
+ case HCI_AUTO_CONN_DIRECT:
+ case HCI_AUTO_CONN_ALWAYS:
+ list_add(&p->action, &hdev->pend_le_conns);
+ break;
+ case HCI_AUTO_CONN_REPORT:
+ list_add(&p->action, &hdev->pend_le_reports);
+ break;
+ default:
+ break;
+ }
}
+
+ hci_update_background_scan(hdev);
}
static void powered_complete(struct hci_dev *hdev, u8 status)
@@ -4981,7 +5859,7 @@ static void powered_complete(struct hci_dev *hdev, u8 status)
hci_dev_lock(hdev);
- restart_le_auto_conns(hdev);
+ restart_le_actions(hdev);
mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
@@ -5011,8 +5889,8 @@ static int powered_update_hci(struct hci_dev *hdev)
lmp_bredr_capable(hdev)) {
struct hci_cp_write_le_host_supported cp;
- cp.le = 1;
- cp.simul = lmp_le_br_capable(hdev);
+ cp.le = 0x01;
+ cp.simul = 0x00;
/* Check first if we already have the right
* host state (host features set)
@@ -5043,8 +5921,8 @@ static int powered_update_hci(struct hci_dev *hdev)
sizeof(link_sec), &link_sec);
if (lmp_bredr_capable(hdev)) {
- if (test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags))
- set_bredr_scan(&req);
+ write_fast_connectable(&req, false);
+ hci_update_page_scan(hdev, &req);
update_class(&req);
update_name(&req);
update_eir(&req);
@@ -5138,92 +6016,6 @@ void mgmt_discoverable_timeout(struct hci_dev *hdev)
hci_dev_unlock(hdev);
}
-void mgmt_discoverable(struct hci_dev *hdev, u8 discoverable)
-{
- bool changed;
-
- /* Nothing needed here if there's a pending command since that
- * commands request completion callback takes care of everything
- * necessary.
- */
- if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, hdev))
- return;
-
- /* Powering off may clear the scan mode - don't let that interfere */
- if (!discoverable && mgmt_pending_find(MGMT_OP_SET_POWERED, hdev))
- return;
-
- if (discoverable) {
- changed = !test_and_set_bit(HCI_DISCOVERABLE, &hdev->dev_flags);
- } else {
- clear_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags);
- changed = test_and_clear_bit(HCI_DISCOVERABLE, &hdev->dev_flags);
- }
-
- if (changed) {
- struct hci_request req;
-
- /* In case this change in discoverable was triggered by
- * a disabling of connectable there could be a need to
- * update the advertising flags.
- */
- hci_req_init(&req, hdev);
- update_adv_data(&req);
- hci_req_run(&req, NULL);
-
- new_settings(hdev, NULL);
- }
-}
-
-void mgmt_connectable(struct hci_dev *hdev, u8 connectable)
-{
- bool changed;
-
- /* Nothing needed here if there's a pending command since that
- * commands request completion callback takes care of everything
- * necessary.
- */
- if (mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev))
- return;
-
- /* Powering off may clear the scan mode - don't let that interfere */
- if (!connectable && mgmt_pending_find(MGMT_OP_SET_POWERED, hdev))
- return;
-
- if (connectable)
- changed = !test_and_set_bit(HCI_CONNECTABLE, &hdev->dev_flags);
- else
- changed = test_and_clear_bit(HCI_CONNECTABLE, &hdev->dev_flags);
-
- if (changed)
- new_settings(hdev, NULL);
-}
-
-void mgmt_advertising(struct hci_dev *hdev, u8 advertising)
-{
- /* Powering off may stop advertising - don't let that interfere */
- if (!advertising && mgmt_pending_find(MGMT_OP_SET_POWERED, hdev))
- return;
-
- if (advertising)
- set_bit(HCI_ADVERTISING, &hdev->dev_flags);
- else
- clear_bit(HCI_ADVERTISING, &hdev->dev_flags);
-}
-
-void mgmt_write_scan_failed(struct hci_dev *hdev, u8 scan, u8 status)
-{
- u8 mgmt_err = mgmt_status(status);
-
- if (scan & SCAN_PAGE)
- mgmt_pending_foreach(MGMT_OP_SET_CONNECTABLE, hdev,
- cmd_status_rsp, &mgmt_err);
-
- if (scan & SCAN_INQUIRY)
- mgmt_pending_foreach(MGMT_OP_SET_DISCOVERABLE, hdev,
- cmd_status_rsp, &mgmt_err);
-}
-
void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key,
bool persistent)
{
@@ -5279,7 +6071,7 @@ void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent)
ev.key.ediv = key->ediv;
ev.key.rand = key->rand;
- if (key->type == HCI_SMP_LTK)
+ if (key->type == SMP_LTK)
ev.key.master = 1;
memcpy(ev.key.val, key->val, sizeof(key->val));
@@ -5347,6 +6139,27 @@ void mgmt_new_csrk(struct hci_dev *hdev, struct smp_csrk *csrk,
mgmt_event(MGMT_EV_NEW_CSRK, hdev, &ev, sizeof(ev), NULL);
}
+void mgmt_new_conn_param(struct hci_dev *hdev, bdaddr_t *bdaddr,
+ u8 bdaddr_type, u8 store_hint, u16 min_interval,
+ u16 max_interval, u16 latency, u16 timeout)
+{
+ struct mgmt_ev_new_conn_param ev;
+
+ if (!hci_is_identity_address(bdaddr, bdaddr_type))
+ return;
+
+ memset(&ev, 0, sizeof(ev));
+ bacpy(&ev.addr.bdaddr, bdaddr);
+ ev.addr.type = link_to_bdaddr(LE_LINK, bdaddr_type);
+ ev.store_hint = store_hint;
+ ev.min_interval = cpu_to_le16(min_interval);
+ ev.max_interval = cpu_to_le16(max_interval);
+ ev.latency = cpu_to_le16(latency);
+ ev.timeout = cpu_to_le16(timeout);
+
+ mgmt_event(MGMT_EV_NEW_CONN_PARAM, hdev, &ev, sizeof(ev), NULL);
+}
+
static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data,
u8 data_len)
{
@@ -5420,25 +6233,35 @@ static void unpair_device_rsp(struct pending_cmd *cmd, void *data)
mgmt_pending_remove(cmd);
}
+bool mgmt_powering_down(struct hci_dev *hdev)
+{
+ struct pending_cmd *cmd;
+ struct mgmt_mode *cp;
+
+ cmd = mgmt_pending_find(MGMT_OP_SET_POWERED, hdev);
+ if (!cmd)
+ return false;
+
+ cp = cmd->param;
+ if (!cp->val)
+ return true;
+
+ return false;
+}
+
void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr,
u8 link_type, u8 addr_type, u8 reason,
bool mgmt_connected)
{
struct mgmt_ev_device_disconnected ev;
- struct pending_cmd *power_off;
struct sock *sk = NULL;
- power_off = mgmt_pending_find(MGMT_OP_SET_POWERED, hdev);
- if (power_off) {
- struct mgmt_mode *cp = power_off->param;
-
- /* The connection is still in hci_conn_hash so test for 1
- * instead of 0 to know if this is the last one.
- */
- if (!cp->val && hci_conn_count(hdev) == 1) {
- cancel_delayed_work(&hdev->power_off);
- queue_work(hdev->req_workqueue, &hdev->power_off.work);
- }
+ /* The connection is still in hci_conn_hash so test for 1
+ * instead of 0 to know if this is the last one.
+ */
+ if (mgmt_powering_down(hdev) && hci_conn_count(hdev) == 1) {
+ cancel_delayed_work(&hdev->power_off);
+ queue_work(hdev->req_workqueue, &hdev->power_off.work);
}
if (!mgmt_connected)
@@ -5498,19 +6321,13 @@ void mgmt_connect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
u8 addr_type, u8 status)
{
struct mgmt_ev_connect_failed ev;
- struct pending_cmd *power_off;
- power_off = mgmt_pending_find(MGMT_OP_SET_POWERED, hdev);
- if (power_off) {
- struct mgmt_mode *cp = power_off->param;
-
- /* The connection is still in hci_conn_hash so test for 1
- * instead of 0 to know if this is the last one.
- */
- if (!cp->val && hci_conn_count(hdev) == 1) {
- cancel_delayed_work(&hdev->power_off);
- queue_work(hdev->req_workqueue, &hdev->power_off.work);
- }
+ /* The connection is still in hci_conn_hash so test for 1
+ * instead of 0 to know if this is the last one.
+ */
+ if (mgmt_powering_down(hdev) && hci_conn_count(hdev) == 1) {
+ cancel_delayed_work(&hdev->power_off);
+ queue_work(hdev->req_workqueue, &hdev->power_off.work);
}
bacpy(&ev.addr.bdaddr, bdaddr);
@@ -5668,16 +6485,23 @@ int mgmt_user_passkey_notify(struct hci_dev *hdev, bdaddr_t *bdaddr,
return mgmt_event(MGMT_EV_PASSKEY_NOTIFY, hdev, &ev, sizeof(ev), NULL);
}
-void mgmt_auth_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
- u8 addr_type, u8 status)
+void mgmt_auth_failed(struct hci_conn *conn, u8 hci_status)
{
struct mgmt_ev_auth_failed ev;
+ struct pending_cmd *cmd;
+ u8 status = mgmt_status(hci_status);
- bacpy(&ev.addr.bdaddr, bdaddr);
- ev.addr.type = link_to_bdaddr(link_type, addr_type);
- ev.status = mgmt_status(status);
+ bacpy(&ev.addr.bdaddr, &conn->dst);
+ ev.addr.type = link_to_bdaddr(conn->type, conn->dst_type);
+ ev.status = status;
+
+ cmd = find_pairing(conn);
+
+ mgmt_event(MGMT_EV_AUTH_FAILED, conn->hdev, &ev, sizeof(ev),
+ cmd ? cmd->sk : NULL);
- mgmt_event(MGMT_EV_AUTH_FAILED, hdev, &ev, sizeof(ev), NULL);
+ if (cmd)
+ pairing_complete(cmd, status);
}
void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status)
@@ -5765,10 +6589,14 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status)
hci_req_init(&req, hdev);
- if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags))
+ if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) {
+ if (test_bit(HCI_USE_DEBUG_KEYS, &hdev->dev_flags))
+ hci_req_add(&req, HCI_OP_WRITE_SSP_DEBUG_MODE,
+ sizeof(enable), &enable);
update_eir(&req);
- else
+ } else {
clear_eir(&req);
+ }
hci_req_run(&req, NULL);
}
@@ -5912,17 +6740,23 @@ void mgmt_read_local_oob_data_complete(struct hci_dev *hdev, u8 *hash192,
}
void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
- u8 addr_type, u8 *dev_class, s8 rssi, u8 cfm_name,
- u8 ssp, u8 *eir, u16 eir_len, u8 *scan_rsp,
- u8 scan_rsp_len)
+ u8 addr_type, u8 *dev_class, s8 rssi, u32 flags,
+ u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len)
{
char buf[512];
struct mgmt_ev_device_found *ev = (void *) buf;
- struct smp_irk *irk;
size_t ev_size;
- if (!hci_discovery_active(hdev))
- return;
+ /* Don't send events for a non-kernel initiated discovery. With
+ * LE one exception is if we have pend_le_reports > 0 in which
+ * case we're doing passive scanning and want these events.
+ */
+ if (!hci_discovery_active(hdev)) {
+ if (link_type == ACL_LINK)
+ return;
+ if (link_type == LE_LINK && list_empty(&hdev->pend_le_reports))
+ return;
+ }
/* Make sure that the buffer is big enough. The 5 extra bytes
* are for the potential CoD field.
@@ -5932,20 +6766,10 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
memset(buf, 0, sizeof(buf));
- irk = hci_get_irk(hdev, bdaddr, addr_type);
- if (irk) {
- bacpy(&ev->addr.bdaddr, &irk->bdaddr);
- ev->addr.type = link_to_bdaddr(link_type, irk->addr_type);
- } else {
- bacpy(&ev->addr.bdaddr, bdaddr);
- ev->addr.type = link_to_bdaddr(link_type, addr_type);
- }
-
+ bacpy(&ev->addr.bdaddr, bdaddr);
+ ev->addr.type = link_to_bdaddr(link_type, addr_type);
ev->rssi = rssi;
- if (cfm_name)
- ev->flags |= cpu_to_le32(MGMT_DEV_FOUND_CONFIRM_NAME);
- if (!ssp)
- ev->flags |= cpu_to_le32(MGMT_DEV_FOUND_LEGACY_PAIRING);
+ ev->flags = cpu_to_le32(flags);
if (eir_len > 0)
memcpy(ev->eir, eir, eir_len);
@@ -6013,63 +6837,19 @@ void mgmt_discovering(struct hci_dev *hdev, u8 discovering)
mgmt_event(MGMT_EV_DISCOVERING, hdev, &ev, sizeof(ev), NULL);
}
-int mgmt_device_blocked(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
-{
- struct pending_cmd *cmd;
- struct mgmt_ev_device_blocked ev;
-
- cmd = mgmt_pending_find(MGMT_OP_BLOCK_DEVICE, hdev);
-
- bacpy(&ev.addr.bdaddr, bdaddr);
- ev.addr.type = type;
-
- return mgmt_event(MGMT_EV_DEVICE_BLOCKED, hdev, &ev, sizeof(ev),
- cmd ? cmd->sk : NULL);
-}
-
-int mgmt_device_unblocked(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
-{
- struct pending_cmd *cmd;
- struct mgmt_ev_device_unblocked ev;
-
- cmd = mgmt_pending_find(MGMT_OP_UNBLOCK_DEVICE, hdev);
-
- bacpy(&ev.addr.bdaddr, bdaddr);
- ev.addr.type = type;
-
- return mgmt_event(MGMT_EV_DEVICE_UNBLOCKED, hdev, &ev, sizeof(ev),
- cmd ? cmd->sk : NULL);
-}
-
static void adv_enable_complete(struct hci_dev *hdev, u8 status)
{
BT_DBG("%s status %u", hdev->name, status);
-
- /* Clear the advertising mgmt setting if we failed to re-enable it */
- if (status) {
- clear_bit(HCI_ADVERTISING, &hdev->dev_flags);
- new_settings(hdev, NULL);
- }
}
void mgmt_reenable_advertising(struct hci_dev *hdev)
{
struct hci_request req;
- if (hci_conn_num(hdev, LE_LINK) > 0)
- return;
-
if (!test_bit(HCI_ADVERTISING, &hdev->dev_flags))
return;
hci_req_init(&req, hdev);
enable_advertising(&req);
-
- /* If this fails we have no option but to let user space know
- * that we've disabled advertising.
- */
- if (hci_req_run(&req, adv_enable_complete) < 0) {
- clear_bit(HCI_ADVERTISING, &hdev->dev_flags);
- new_settings(hdev, NULL);
- }
+ hci_req_run(&req, adv_enable_complete);
}
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 754b6fe4f742..af73bc3acb40 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -227,7 +227,8 @@ static int rfcomm_check_security(struct rfcomm_dlc *d)
break;
}
- return hci_conn_security(conn->hcon, d->sec_level, auth_type);
+ return hci_conn_security(conn->hcon, d->sec_level, auth_type,
+ d->out);
}
static void rfcomm_session_timeout(unsigned long arg)
@@ -1909,10 +1910,13 @@ static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s)
/* Get data directly from socket receive queue without copying it. */
while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
skb_orphan(skb);
- if (!skb_linearize(skb))
+ if (!skb_linearize(skb)) {
s = rfcomm_recv_frame(s, skb);
- else
+ if (!s)
+ break;
+ } else {
kfree_skb(skb);
+ }
}
if (s && (sk->sk_state == BT_CLOSED))
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index c603a5eb4720..8bbbb5ec468c 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -918,7 +918,8 @@ static int rfcomm_sock_shutdown(struct socket *sock, int how)
sk->sk_shutdown = SHUTDOWN_MASK;
__rfcomm_sock_close(sk);
- if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime)
+ if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime &&
+ !(current->flags & PF_EXITING))
err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime);
}
release_sock(sk);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index c06dbd3938e8..7ee9e4ab00f8 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -40,13 +40,38 @@ static struct bt_sock_list sco_sk_list = {
.lock = __RW_LOCK_UNLOCKED(sco_sk_list.lock)
};
-static void __sco_chan_add(struct sco_conn *conn, struct sock *sk, struct sock *parent);
-static void sco_chan_del(struct sock *sk, int err);
+/* ---- SCO connections ---- */
+struct sco_conn {
+ struct hci_conn *hcon;
+
+ spinlock_t lock;
+ struct sock *sk;
+
+ unsigned int mtu;
+};
+
+#define sco_conn_lock(c) spin_lock(&c->lock);
+#define sco_conn_unlock(c) spin_unlock(&c->lock);
static void sco_sock_close(struct sock *sk);
static void sco_sock_kill(struct sock *sk);
+/* ----- SCO socket info ----- */
+#define sco_pi(sk) ((struct sco_pinfo *) sk)
+
+struct sco_pinfo {
+ struct bt_sock bt;
+ bdaddr_t src;
+ bdaddr_t dst;
+ __u32 flags;
+ __u16 setting;
+ struct sco_conn *conn;
+};
+
/* ---- SCO timers ---- */
+#define SCO_CONN_TIMEOUT (HZ * 40)
+#define SCO_DISCONN_TIMEOUT (HZ * 2)
+
static void sco_sock_timeout(unsigned long arg)
{
struct sock *sk = (struct sock *) arg;
@@ -102,13 +127,31 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon)
return conn;
}
-static struct sock *sco_chan_get(struct sco_conn *conn)
+/* Delete channel.
+ * Must be called on the locked socket. */
+static void sco_chan_del(struct sock *sk, int err)
{
- struct sock *sk = NULL;
- sco_conn_lock(conn);
- sk = conn->sk;
- sco_conn_unlock(conn);
- return sk;
+ struct sco_conn *conn;
+
+ conn = sco_pi(sk)->conn;
+
+ BT_DBG("sk %p, conn %p, err %d", sk, conn, err);
+
+ if (conn) {
+ sco_conn_lock(conn);
+ conn->sk = NULL;
+ sco_pi(sk)->conn = NULL;
+ sco_conn_unlock(conn);
+
+ if (conn->hcon)
+ hci_conn_drop(conn->hcon);
+ }
+
+ sk->sk_state = BT_CLOSED;
+ sk->sk_err = err;
+ sk->sk_state_change(sk);
+
+ sock_set_flag(sk, SOCK_ZAPPED);
}
static int sco_conn_del(struct hci_conn *hcon, int err)
@@ -122,7 +165,10 @@ static int sco_conn_del(struct hci_conn *hcon, int err)
BT_DBG("hcon %p conn %p, err %d", hcon, conn, err);
/* Kill socket */
- sk = sco_chan_get(conn);
+ sco_conn_lock(conn);
+ sk = conn->sk;
+ sco_conn_unlock(conn);
+
if (sk) {
bh_lock_sock(sk);
sco_sock_clear_timer(sk);
@@ -136,6 +182,17 @@ static int sco_conn_del(struct hci_conn *hcon, int err)
return 0;
}
+static void __sco_chan_add(struct sco_conn *conn, struct sock *sk, struct sock *parent)
+{
+ BT_DBG("conn %p", conn);
+
+ sco_pi(sk)->conn = conn;
+ conn->sk = sk;
+
+ if (parent)
+ bt_accept_enqueue(parent, sk);
+}
+
static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
struct sock *parent)
{
@@ -240,7 +297,11 @@ static int sco_send_frame(struct sock *sk, struct msghdr *msg, int len)
static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb)
{
- struct sock *sk = sco_chan_get(conn);
+ struct sock *sk;
+
+ sco_conn_lock(conn);
+ sk = conn->sk;
+ sco_conn_unlock(conn);
if (!sk)
goto drop;
@@ -909,7 +970,8 @@ static int sco_sock_shutdown(struct socket *sock, int how)
sco_sock_clear_timer(sk);
__sco_sock_close(sk);
- if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime)
+ if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime &&
+ !(current->flags & PF_EXITING))
err = bt_sock_wait_state(sk, BT_CLOSED,
sk->sk_lingertime);
}
@@ -929,7 +991,8 @@ static int sco_sock_release(struct socket *sock)
sco_sock_close(sk);
- if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) {
+ if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime &&
+ !(current->flags & PF_EXITING)) {
lock_sock(sk);
err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime);
release_sock(sk);
@@ -940,44 +1003,6 @@ static int sco_sock_release(struct socket *sock)
return err;
}
-static void __sco_chan_add(struct sco_conn *conn, struct sock *sk, struct sock *parent)
-{
- BT_DBG("conn %p", conn);
-
- sco_pi(sk)->conn = conn;
- conn->sk = sk;
-
- if (parent)
- bt_accept_enqueue(parent, sk);
-}
-
-/* Delete channel.
- * Must be called on the locked socket. */
-static void sco_chan_del(struct sock *sk, int err)
-{
- struct sco_conn *conn;
-
- conn = sco_pi(sk)->conn;
-
- BT_DBG("sk %p, conn %p, err %d", sk, conn, err);
-
- if (conn) {
- sco_conn_lock(conn);
- conn->sk = NULL;
- sco_pi(sk)->conn = NULL;
- sco_conn_unlock(conn);
-
- if (conn->hcon)
- hci_conn_drop(conn->hcon);
- }
-
- sk->sk_state = BT_CLOSED;
- sk->sk_err = err;
- sk->sk_state_change(sk);
-
- sock_set_flag(sk, SOCK_ZAPPED);
-}
-
static void sco_conn_ready(struct sco_conn *conn)
{
struct sock *parent;
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index e33a982161c1..f09b6b65cf6b 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -31,18 +31,26 @@
#include "smp.h"
+#define SMP_ALLOW_CMD(smp, code) set_bit(code, &smp->allow_cmd)
+
#define SMP_TIMEOUT msecs_to_jiffies(30000)
#define AUTH_REQ_MASK 0x07
-
-#define SMP_FLAG_TK_VALID 1
-#define SMP_FLAG_CFM_PENDING 2
-#define SMP_FLAG_MITM_AUTH 3
-#define SMP_FLAG_COMPLETE 4
-#define SMP_FLAG_INITIATOR 5
+#define KEY_DIST_MASK 0x07
+
+enum {
+ SMP_FLAG_TK_VALID,
+ SMP_FLAG_CFM_PENDING,
+ SMP_FLAG_MITM_AUTH,
+ SMP_FLAG_COMPLETE,
+ SMP_FLAG_INITIATOR,
+};
struct smp_chan {
- struct l2cap_conn *conn;
+ struct l2cap_conn *conn;
+ struct delayed_work security_timer;
+ unsigned long allow_cmd; /* Bitmask of allowed commands */
+
u8 preq[7]; /* SMP Pairing Request */
u8 prsp[7]; /* SMP Pairing Response */
u8 prnd[16]; /* SMP Pairing Random (local) */
@@ -60,20 +68,16 @@ struct smp_chan {
struct smp_ltk *slave_ltk;
struct smp_irk *remote_irk;
unsigned long flags;
+
+ struct crypto_blkcipher *tfm_aes;
};
-static inline void swap128(const u8 src[16], u8 dst[16])
+static inline void swap_buf(const u8 *src, u8 *dst, size_t len)
{
- int i;
- for (i = 0; i < 16; i++)
- dst[15 - i] = src[i];
-}
+ size_t i;
-static inline void swap56(const u8 src[7], u8 dst[7])
-{
- int i;
- for (i = 0; i < 7; i++)
- dst[6 - i] = src[i];
+ for (i = 0; i < len; i++)
+ dst[len - 1 - i] = src[i];
}
static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r)
@@ -92,7 +96,7 @@ static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r)
desc.flags = 0;
/* The most significant octet of key corresponds to k[0] */
- swap128(k, tmp);
+ swap_buf(k, tmp, 16);
err = crypto_blkcipher_setkey(tfm, tmp, 16);
if (err) {
@@ -101,7 +105,7 @@ static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r)
}
/* Most significant octet of plaintextData corresponds to data[0] */
- swap128(r, data);
+ swap_buf(r, data, 16);
sg_init_one(&sg, data, 16);
@@ -110,7 +114,7 @@ static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r)
BT_ERR("Encrypt data error %d", err);
/* Most significant octet of encryptedData corresponds to data[0] */
- swap128(data, r);
+ swap_buf(data, r, 16);
return err;
}
@@ -141,12 +145,18 @@ static int smp_ah(struct crypto_blkcipher *tfm, u8 irk[16], u8 r[3], u8 res[3])
return 0;
}
-bool smp_irk_matches(struct crypto_blkcipher *tfm, u8 irk[16],
- bdaddr_t *bdaddr)
+bool smp_irk_matches(struct hci_dev *hdev, u8 irk[16], bdaddr_t *bdaddr)
{
+ struct l2cap_chan *chan = hdev->smp_data;
+ struct crypto_blkcipher *tfm;
u8 hash[3];
int err;
+ if (!chan || !chan->data)
+ return false;
+
+ tfm = chan->data;
+
BT_DBG("RPA %pMR IRK %*phN", bdaddr, 16, irk);
err = smp_ah(tfm, irk, &bdaddr->b[3], hash);
@@ -156,10 +166,17 @@ bool smp_irk_matches(struct crypto_blkcipher *tfm, u8 irk[16],
return !memcmp(bdaddr->b, hash, 3);
}
-int smp_generate_rpa(struct crypto_blkcipher *tfm, u8 irk[16], bdaddr_t *rpa)
+int smp_generate_rpa(struct hci_dev *hdev, u8 irk[16], bdaddr_t *rpa)
{
+ struct l2cap_chan *chan = hdev->smp_data;
+ struct crypto_blkcipher *tfm;
int err;
+ if (!chan || !chan->data)
+ return -EOPNOTSUPP;
+
+ tfm = chan->data;
+
get_random_bytes(&rpa->b[3], 3);
rpa->b[5] &= 0x3f; /* Clear two most significant bits */
@@ -174,13 +191,16 @@ int smp_generate_rpa(struct crypto_blkcipher *tfm, u8 irk[16], bdaddr_t *rpa)
return 0;
}
-static int smp_c1(struct crypto_blkcipher *tfm, u8 k[16], u8 r[16],
- u8 preq[7], u8 pres[7], u8 _iat, bdaddr_t *ia,
- u8 _rat, bdaddr_t *ra, u8 res[16])
+static int smp_c1(struct smp_chan *smp, u8 k[16], u8 r[16], u8 preq[7],
+ u8 pres[7], u8 _iat, bdaddr_t *ia, u8 _rat, bdaddr_t *ra,
+ u8 res[16])
{
+ struct hci_dev *hdev = smp->conn->hcon->hdev;
u8 p1[16], p2[16];
int err;
+ BT_DBG("%s", hdev->name);
+
memset(p1, 0, 16);
/* p1 = pres || preq || _rat || _iat */
@@ -198,7 +218,7 @@ static int smp_c1(struct crypto_blkcipher *tfm, u8 k[16], u8 r[16],
u128_xor((u128 *) res, (u128 *) r, (u128 *) p1);
/* res = e(k, res) */
- err = smp_e(tfm, k, res);
+ err = smp_e(smp->tfm_aes, k, res);
if (err) {
BT_ERR("Encrypt data error");
return err;
@@ -208,70 +228,64 @@ static int smp_c1(struct crypto_blkcipher *tfm, u8 k[16], u8 r[16],
u128_xor((u128 *) res, (u128 *) res, (u128 *) p2);
/* res = e(k, res) */
- err = smp_e(tfm, k, res);
+ err = smp_e(smp->tfm_aes, k, res);
if (err)
BT_ERR("Encrypt data error");
return err;
}
-static int smp_s1(struct crypto_blkcipher *tfm, u8 k[16], u8 r1[16],
- u8 r2[16], u8 _r[16])
+static int smp_s1(struct smp_chan *smp, u8 k[16], u8 r1[16], u8 r2[16],
+ u8 _r[16])
{
+ struct hci_dev *hdev = smp->conn->hcon->hdev;
int err;
+ BT_DBG("%s", hdev->name);
+
/* Just least significant octets from r1 and r2 are considered */
memcpy(_r, r2, 8);
memcpy(_r + 8, r1, 8);
- err = smp_e(tfm, k, _r);
+ err = smp_e(smp->tfm_aes, k, _r);
if (err)
BT_ERR("Encrypt data error");
return err;
}
-static struct sk_buff *smp_build_cmd(struct l2cap_conn *conn, u8 code,
- u16 dlen, void *data)
+static void smp_send_cmd(struct l2cap_conn *conn, u8 code, u16 len, void *data)
{
- struct sk_buff *skb;
- struct l2cap_hdr *lh;
- int len;
-
- len = L2CAP_HDR_SIZE + sizeof(code) + dlen;
+ struct l2cap_chan *chan = conn->smp;
+ struct smp_chan *smp;
+ struct kvec iv[2];
+ struct msghdr msg;
- if (len > conn->mtu)
- return NULL;
+ if (!chan)
+ return;
- skb = bt_skb_alloc(len, GFP_ATOMIC);
- if (!skb)
- return NULL;
+ BT_DBG("code 0x%2.2x", code);
- lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
- lh->len = cpu_to_le16(sizeof(code) + dlen);
- lh->cid = cpu_to_le16(L2CAP_CID_SMP);
+ iv[0].iov_base = &code;
+ iv[0].iov_len = 1;
- memcpy(skb_put(skb, sizeof(code)), &code, sizeof(code));
+ iv[1].iov_base = data;
+ iv[1].iov_len = len;
- memcpy(skb_put(skb, dlen), data, dlen);
+ memset(&msg, 0, sizeof(msg));
- return skb;
-}
+ msg.msg_iov = (struct iovec *) &iv;
+ msg.msg_iovlen = 2;
-static void smp_send_cmd(struct l2cap_conn *conn, u8 code, u16 len, void *data)
-{
- struct sk_buff *skb = smp_build_cmd(conn, code, len, data);
-
- BT_DBG("code 0x%2.2x", code);
+ l2cap_chan_send(chan, &msg, 1 + len);
- if (!skb)
+ if (!chan->data)
return;
- skb->priority = HCI_PRIO_MAX;
- hci_send_acl(conn->hchan, skb, 0);
+ smp = chan->data;
- cancel_delayed_work_sync(&conn->security_timer);
- schedule_delayed_work(&conn->security_timer, SMP_TIMEOUT);
+ cancel_delayed_work_sync(&smp->security_timer);
+ schedule_delayed_work(&smp->security_timer, SMP_TIMEOUT);
}
static __u8 authreq_to_seclevel(__u8 authreq)
@@ -298,12 +312,13 @@ static void build_pairing_cmd(struct l2cap_conn *conn,
struct smp_cmd_pairing *req,
struct smp_cmd_pairing *rsp, __u8 authreq)
{
- struct smp_chan *smp = conn->smp_chan;
+ struct l2cap_chan *chan = conn->smp;
+ struct smp_chan *smp = chan->data;
struct hci_conn *hcon = conn->hcon;
struct hci_dev *hdev = hcon->hdev;
u8 local_dist = 0, remote_dist = 0;
- if (test_bit(HCI_PAIRABLE, &conn->hcon->hdev->dev_flags)) {
+ if (test_bit(HCI_BONDABLE, &conn->hcon->hdev->dev_flags)) {
local_dist = SMP_DIST_ENC_KEY | SMP_DIST_SIGN;
remote_dist = SMP_DIST_ENC_KEY | SMP_DIST_SIGN;
authreq |= SMP_AUTH_BONDING;
@@ -341,7 +356,8 @@ static void build_pairing_cmd(struct l2cap_conn *conn,
static u8 check_enc_key_size(struct l2cap_conn *conn, __u8 max_key_size)
{
- struct smp_chan *smp = conn->smp_chan;
+ struct l2cap_chan *chan = conn->smp;
+ struct smp_chan *smp = chan->data;
if ((max_key_size > SMP_MAX_ENC_KEY_SIZE) ||
(max_key_size < SMP_MIN_ENC_KEY_SIZE))
@@ -352,21 +368,60 @@ static u8 check_enc_key_size(struct l2cap_conn *conn, __u8 max_key_size)
return 0;
}
+static void smp_chan_destroy(struct l2cap_conn *conn)
+{
+ struct l2cap_chan *chan = conn->smp;
+ struct smp_chan *smp = chan->data;
+ bool complete;
+
+ BUG_ON(!smp);
+
+ cancel_delayed_work_sync(&smp->security_timer);
+
+ complete = test_bit(SMP_FLAG_COMPLETE, &smp->flags);
+ mgmt_smp_complete(conn->hcon, complete);
+
+ kfree(smp->csrk);
+ kfree(smp->slave_csrk);
+
+ crypto_free_blkcipher(smp->tfm_aes);
+
+ /* If pairing failed clean up any keys we might have */
+ if (!complete) {
+ if (smp->ltk) {
+ list_del(&smp->ltk->list);
+ kfree(smp->ltk);
+ }
+
+ if (smp->slave_ltk) {
+ list_del(&smp->slave_ltk->list);
+ kfree(smp->slave_ltk);
+ }
+
+ if (smp->remote_irk) {
+ list_del(&smp->remote_irk->list);
+ kfree(smp->remote_irk);
+ }
+ }
+
+ chan->data = NULL;
+ kfree(smp);
+ hci_conn_drop(conn->hcon);
+}
+
static void smp_failure(struct l2cap_conn *conn, u8 reason)
{
struct hci_conn *hcon = conn->hcon;
+ struct l2cap_chan *chan = conn->smp;
if (reason)
smp_send_cmd(conn, SMP_CMD_PAIRING_FAIL, sizeof(reason),
&reason);
clear_bit(HCI_CONN_ENCRYPT_PEND, &hcon->flags);
- mgmt_auth_failed(hcon->hdev, &hcon->dst, hcon->type, hcon->dst_type,
- HCI_ERROR_AUTH_FAILURE);
-
- cancel_delayed_work_sync(&conn->security_timer);
+ mgmt_auth_failed(hcon, HCI_ERROR_AUTH_FAILURE);
- if (test_and_clear_bit(HCI_CONN_LE_SMP_PEND, &hcon->flags))
+ if (chan->data)
smp_chan_destroy(conn);
}
@@ -387,10 +442,12 @@ static const u8 gen_method[5][5] = {
static u8 get_auth_method(struct smp_chan *smp, u8 local_io, u8 remote_io)
{
- /* If either side has unknown io_caps, use JUST WORKS */
+ /* If either side has unknown io_caps, use JUST_CFM (which gets
+ * converted later to JUST_WORKS if we're initiators.
+ */
if (local_io > SMP_IO_KEYBOARD_DISPLAY ||
remote_io > SMP_IO_KEYBOARD_DISPLAY)
- return JUST_WORKS;
+ return JUST_CFM;
return gen_method[remote_io][local_io];
}
@@ -399,7 +456,8 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth,
u8 local_io, u8 remote_io)
{
struct hci_conn *hcon = conn->hcon;
- struct smp_chan *smp = conn->smp_chan;
+ struct l2cap_chan *chan = conn->smp;
+ struct smp_chan *smp = chan->data;
u8 method;
u32 passkey = 0;
int ret = 0;
@@ -410,21 +468,25 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth,
BT_DBG("tk_request: auth:%d lcl:%d rem:%d", auth, local_io, remote_io);
- /* If neither side wants MITM, use JUST WORKS */
- /* Otherwise, look up method from the table */
+ /* If neither side wants MITM, either "just" confirm an incoming
+ * request or use just-works for outgoing ones. The JUST_CFM
+ * will be converted to JUST_WORKS if necessary later in this
+ * function. If either side has MITM look up the method from the
+ * table.
+ */
if (!(auth & SMP_AUTH_MITM))
- method = JUST_WORKS;
+ method = JUST_CFM;
else
method = get_auth_method(smp, local_io, remote_io);
- /* If not bonding, don't ask user to confirm a Zero TK */
- if (!(auth & SMP_AUTH_BONDING) && method == JUST_CFM)
- method = JUST_WORKS;
-
/* Don't confirm locally initiated pairing attempts */
if (method == JUST_CFM && test_bit(SMP_FLAG_INITIATOR, &smp->flags))
method = JUST_WORKS;
+ /* Don't bother user space with no IO capabilities */
+ if (method == JUST_CFM && hcon->io_capability == HCI_IO_NO_INPUT_OUTPUT)
+ method = JUST_WORKS;
+
/* If Just Works, Continue with Zero TK */
if (method == JUST_WORKS) {
set_bit(SMP_FLAG_TK_VALID, &smp->flags);
@@ -432,14 +494,17 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth,
}
/* Not Just Works/Confirm results in MITM Authentication */
- if (method != JUST_CFM)
+ if (method != JUST_CFM) {
set_bit(SMP_FLAG_MITM_AUTH, &smp->flags);
+ if (hcon->pending_sec_level < BT_SECURITY_HIGH)
+ hcon->pending_sec_level = BT_SECURITY_HIGH;
+ }
/* If both devices have Keyoard-Display I/O, the master
* Confirms and the slave Enters the passkey.
*/
if (method == OVERLAP) {
- if (hcon->link_mode & HCI_LM_MASTER)
+ if (hcon->role == HCI_ROLE_MASTER)
method = CFM_PASSKEY;
else
method = REQ_PASSKEY;
@@ -477,23 +542,15 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth,
static u8 smp_confirm(struct smp_chan *smp)
{
struct l2cap_conn *conn = smp->conn;
- struct hci_dev *hdev = conn->hcon->hdev;
- struct crypto_blkcipher *tfm = hdev->tfm_aes;
struct smp_cmd_pairing_confirm cp;
int ret;
BT_DBG("conn %p", conn);
- /* Prevent mutual access to hdev->tfm_aes */
- hci_dev_lock(hdev);
-
- ret = smp_c1(tfm, smp->tk, smp->prnd, smp->preq, smp->prsp,
+ ret = smp_c1(smp, smp->tk, smp->prnd, smp->preq, smp->prsp,
conn->hcon->init_addr_type, &conn->hcon->init_addr,
conn->hcon->resp_addr_type, &conn->hcon->resp_addr,
cp.confirm_val);
-
- hci_dev_unlock(hdev);
-
if (ret)
return SMP_UNSPECIFIED;
@@ -501,6 +558,11 @@ static u8 smp_confirm(struct smp_chan *smp)
smp_send_cmd(smp->conn, SMP_CMD_PAIRING_CONFIRM, sizeof(cp), &cp);
+ if (conn->hcon->out)
+ SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_CONFIRM);
+ else
+ SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RANDOM);
+
return 0;
}
@@ -508,25 +570,17 @@ static u8 smp_random(struct smp_chan *smp)
{
struct l2cap_conn *conn = smp->conn;
struct hci_conn *hcon = conn->hcon;
- struct hci_dev *hdev = hcon->hdev;
- struct crypto_blkcipher *tfm = hdev->tfm_aes;
u8 confirm[16];
int ret;
- if (IS_ERR_OR_NULL(tfm))
+ if (IS_ERR_OR_NULL(smp->tfm_aes))
return SMP_UNSPECIFIED;
BT_DBG("conn %p %s", conn, conn->hcon->out ? "master" : "slave");
- /* Prevent mutual access to hdev->tfm_aes */
- hci_dev_lock(hdev);
-
- ret = smp_c1(tfm, smp->tk, smp->rrnd, smp->preq, smp->prsp,
+ ret = smp_c1(smp, smp->tk, smp->rrnd, smp->preq, smp->prsp,
hcon->init_addr_type, &hcon->init_addr,
hcon->resp_addr_type, &hcon->resp_addr, confirm);
-
- hci_dev_unlock(hdev);
-
if (ret)
return SMP_UNSPECIFIED;
@@ -540,7 +594,7 @@ static u8 smp_random(struct smp_chan *smp)
__le64 rand = 0;
__le16 ediv = 0;
- smp_s1(tfm, smp->tk, smp->rrnd, smp->prnd, stk);
+ smp_s1(smp, smp->tk, smp->rrnd, smp->prnd, stk);
memset(stk + smp->enc_key_size, 0,
SMP_MAX_ENC_KEY_SIZE - smp->enc_key_size);
@@ -550,6 +604,7 @@ static u8 smp_random(struct smp_chan *smp)
hci_le_start_enc(hcon, ediv, rand, stk);
hcon->enc_key_size = smp->enc_key_size;
+ set_bit(HCI_CONN_STK_ENCRYPT, &hcon->flags);
} else {
u8 stk[16], auth;
__le64 rand = 0;
@@ -558,7 +613,7 @@ static u8 smp_random(struct smp_chan *smp)
smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd),
smp->prnd);
- smp_s1(tfm, smp->tk, smp->prnd, smp->rrnd, stk);
+ smp_s1(smp, smp->tk, smp->prnd, smp->rrnd, stk);
memset(stk + smp->enc_key_size, 0,
SMP_MAX_ENC_KEY_SIZE - smp->enc_key_size);
@@ -568,80 +623,273 @@ static u8 smp_random(struct smp_chan *smp)
else
auth = 0;
+ /* Even though there's no _SLAVE suffix this is the
+ * slave STK we're adding for later lookup (the master
+ * STK never needs to be stored).
+ */
hci_add_ltk(hcon->hdev, &hcon->dst, hcon->dst_type,
- HCI_SMP_STK_SLAVE, auth, stk, smp->enc_key_size,
- ediv, rand);
+ SMP_STK, auth, stk, smp->enc_key_size, ediv, rand);
}
return 0;
}
-static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
+static void smp_notify_keys(struct l2cap_conn *conn)
{
- struct smp_chan *smp;
+ struct l2cap_chan *chan = conn->smp;
+ struct smp_chan *smp = chan->data;
+ struct hci_conn *hcon = conn->hcon;
+ struct hci_dev *hdev = hcon->hdev;
+ struct smp_cmd_pairing *req = (void *) &smp->preq[1];
+ struct smp_cmd_pairing *rsp = (void *) &smp->prsp[1];
+ bool persistent;
- smp = kzalloc(sizeof(*smp), GFP_ATOMIC);
- if (!smp)
- return NULL;
+ if (smp->remote_irk) {
+ mgmt_new_irk(hdev, smp->remote_irk);
+ /* Now that user space can be considered to know the
+ * identity address track the connection based on it
+ * from now on.
+ */
+ bacpy(&hcon->dst, &smp->remote_irk->bdaddr);
+ hcon->dst_type = smp->remote_irk->addr_type;
+ queue_work(hdev->workqueue, &conn->id_addr_update_work);
- smp->conn = conn;
- conn->smp_chan = smp;
- conn->hcon->smp_conn = conn;
+ /* When receiving an indentity resolving key for
+ * a remote device that does not use a resolvable
+ * private address, just remove the key so that
+ * it is possible to use the controller white
+ * list for scanning.
+ *
+ * Userspace will have been told to not store
+ * this key at this point. So it is safe to
+ * just remove it.
+ */
+ if (!bacmp(&smp->remote_irk->rpa, BDADDR_ANY)) {
+ list_del(&smp->remote_irk->list);
+ kfree(smp->remote_irk);
+ smp->remote_irk = NULL;
+ }
+ }
- hci_conn_hold(conn->hcon);
+ /* The LTKs and CSRKs should be persistent only if both sides
+ * had the bonding bit set in their authentication requests.
+ */
+ persistent = !!((req->auth_req & rsp->auth_req) & SMP_AUTH_BONDING);
- return smp;
+ if (smp->csrk) {
+ smp->csrk->bdaddr_type = hcon->dst_type;
+ bacpy(&smp->csrk->bdaddr, &hcon->dst);
+ mgmt_new_csrk(hdev, smp->csrk, persistent);
+ }
+
+ if (smp->slave_csrk) {
+ smp->slave_csrk->bdaddr_type = hcon->dst_type;
+ bacpy(&smp->slave_csrk->bdaddr, &hcon->dst);
+ mgmt_new_csrk(hdev, smp->slave_csrk, persistent);
+ }
+
+ if (smp->ltk) {
+ smp->ltk->bdaddr_type = hcon->dst_type;
+ bacpy(&smp->ltk->bdaddr, &hcon->dst);
+ mgmt_new_ltk(hdev, smp->ltk, persistent);
+ }
+
+ if (smp->slave_ltk) {
+ smp->slave_ltk->bdaddr_type = hcon->dst_type;
+ bacpy(&smp->slave_ltk->bdaddr, &hcon->dst);
+ mgmt_new_ltk(hdev, smp->slave_ltk, persistent);
+ }
}
-void smp_chan_destroy(struct l2cap_conn *conn)
+static void smp_allow_key_dist(struct smp_chan *smp)
{
- struct smp_chan *smp = conn->smp_chan;
- bool complete;
+ /* Allow the first expected phase 3 PDU. The rest of the PDUs
+ * will be allowed in each PDU handler to ensure we receive
+ * them in the correct order.
+ */
+ if (smp->remote_key_dist & SMP_DIST_ENC_KEY)
+ SMP_ALLOW_CMD(smp, SMP_CMD_ENCRYPT_INFO);
+ else if (smp->remote_key_dist & SMP_DIST_ID_KEY)
+ SMP_ALLOW_CMD(smp, SMP_CMD_IDENT_INFO);
+ else if (smp->remote_key_dist & SMP_DIST_SIGN)
+ SMP_ALLOW_CMD(smp, SMP_CMD_SIGN_INFO);
+}
- BUG_ON(!smp);
+static void smp_distribute_keys(struct smp_chan *smp)
+{
+ struct smp_cmd_pairing *req, *rsp;
+ struct l2cap_conn *conn = smp->conn;
+ struct hci_conn *hcon = conn->hcon;
+ struct hci_dev *hdev = hcon->hdev;
+ __u8 *keydist;
- complete = test_bit(SMP_FLAG_COMPLETE, &smp->flags);
- mgmt_smp_complete(conn->hcon, complete);
+ BT_DBG("conn %p", conn);
- kfree(smp->csrk);
- kfree(smp->slave_csrk);
+ rsp = (void *) &smp->prsp[1];
- /* If pairing failed clean up any keys we might have */
- if (!complete) {
- if (smp->ltk) {
- list_del(&smp->ltk->list);
- kfree(smp->ltk);
- }
+ /* The responder sends its keys first */
+ if (hcon->out && (smp->remote_key_dist & KEY_DIST_MASK)) {
+ smp_allow_key_dist(smp);
+ return;
+ }
- if (smp->slave_ltk) {
- list_del(&smp->slave_ltk->list);
- kfree(smp->slave_ltk);
- }
+ req = (void *) &smp->preq[1];
- if (smp->remote_irk) {
- list_del(&smp->remote_irk->list);
- kfree(smp->remote_irk);
+ if (hcon->out) {
+ keydist = &rsp->init_key_dist;
+ *keydist &= req->init_key_dist;
+ } else {
+ keydist = &rsp->resp_key_dist;
+ *keydist &= req->resp_key_dist;
+ }
+
+ BT_DBG("keydist 0x%x", *keydist);
+
+ if (*keydist & SMP_DIST_ENC_KEY) {
+ struct smp_cmd_encrypt_info enc;
+ struct smp_cmd_master_ident ident;
+ struct smp_ltk *ltk;
+ u8 authenticated;
+ __le16 ediv;
+ __le64 rand;
+
+ get_random_bytes(enc.ltk, sizeof(enc.ltk));
+ get_random_bytes(&ediv, sizeof(ediv));
+ get_random_bytes(&rand, sizeof(rand));
+
+ smp_send_cmd(conn, SMP_CMD_ENCRYPT_INFO, sizeof(enc), &enc);
+
+ authenticated = hcon->sec_level == BT_SECURITY_HIGH;
+ ltk = hci_add_ltk(hdev, &hcon->dst, hcon->dst_type,
+ SMP_LTK_SLAVE, authenticated, enc.ltk,
+ smp->enc_key_size, ediv, rand);
+ smp->slave_ltk = ltk;
+
+ ident.ediv = ediv;
+ ident.rand = rand;
+
+ smp_send_cmd(conn, SMP_CMD_MASTER_IDENT, sizeof(ident), &ident);
+
+ *keydist &= ~SMP_DIST_ENC_KEY;
+ }
+
+ if (*keydist & SMP_DIST_ID_KEY) {
+ struct smp_cmd_ident_addr_info addrinfo;
+ struct smp_cmd_ident_info idinfo;
+
+ memcpy(idinfo.irk, hdev->irk, sizeof(idinfo.irk));
+
+ smp_send_cmd(conn, SMP_CMD_IDENT_INFO, sizeof(idinfo), &idinfo);
+
+ /* The hci_conn contains the local identity address
+ * after the connection has been established.
+ *
+ * This is true even when the connection has been
+ * established using a resolvable random address.
+ */
+ bacpy(&addrinfo.bdaddr, &hcon->src);
+ addrinfo.addr_type = hcon->src_type;
+
+ smp_send_cmd(conn, SMP_CMD_IDENT_ADDR_INFO, sizeof(addrinfo),
+ &addrinfo);
+
+ *keydist &= ~SMP_DIST_ID_KEY;
+ }
+
+ if (*keydist & SMP_DIST_SIGN) {
+ struct smp_cmd_sign_info sign;
+ struct smp_csrk *csrk;
+
+ /* Generate a new random key */
+ get_random_bytes(sign.csrk, sizeof(sign.csrk));
+
+ csrk = kzalloc(sizeof(*csrk), GFP_KERNEL);
+ if (csrk) {
+ csrk->master = 0x00;
+ memcpy(csrk->val, sign.csrk, sizeof(csrk->val));
}
+ smp->slave_csrk = csrk;
+
+ smp_send_cmd(conn, SMP_CMD_SIGN_INFO, sizeof(sign), &sign);
+
+ *keydist &= ~SMP_DIST_SIGN;
}
- kfree(smp);
- conn->smp_chan = NULL;
- conn->hcon->smp_conn = NULL;
- hci_conn_drop(conn->hcon);
+ /* If there are still keys to be received wait for them */
+ if (smp->remote_key_dist & KEY_DIST_MASK) {
+ smp_allow_key_dist(smp);
+ return;
+ }
+
+ set_bit(SMP_FLAG_COMPLETE, &smp->flags);
+ smp_notify_keys(conn);
+
+ smp_chan_destroy(conn);
+}
+
+static void smp_timeout(struct work_struct *work)
+{
+ struct smp_chan *smp = container_of(work, struct smp_chan,
+ security_timer.work);
+ struct l2cap_conn *conn = smp->conn;
+
+ BT_DBG("conn %p", conn);
+
+ hci_disconnect(conn->hcon, HCI_ERROR_REMOTE_USER_TERM);
+}
+
+static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
+{
+ struct l2cap_chan *chan = conn->smp;
+ struct smp_chan *smp;
+
+ smp = kzalloc(sizeof(*smp), GFP_ATOMIC);
+ if (!smp)
+ return NULL;
+
+ smp->tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(smp->tfm_aes)) {
+ BT_ERR("Unable to create ECB crypto context");
+ kfree(smp);
+ return NULL;
+ }
+
+ smp->conn = conn;
+ chan->data = smp;
+
+ SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_FAIL);
+
+ INIT_DELAYED_WORK(&smp->security_timer, smp_timeout);
+
+ hci_conn_hold(conn->hcon);
+
+ return smp;
}
int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey)
{
- struct l2cap_conn *conn = hcon->smp_conn;
+ struct l2cap_conn *conn = hcon->l2cap_data;
+ struct l2cap_chan *chan;
struct smp_chan *smp;
u32 value;
+ int err;
BT_DBG("");
if (!conn)
return -ENOTCONN;
- smp = conn->smp_chan;
+ chan = conn->smp;
+ if (!chan)
+ return -ENOTCONN;
+
+ l2cap_chan_lock(chan);
+ if (!chan->data) {
+ err = -ENOTCONN;
+ goto unlock;
+ }
+
+ smp = chan->data;
switch (mgmt_op) {
case MGMT_OP_USER_PASSKEY_REPLY:
@@ -656,12 +904,16 @@ int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey)
case MGMT_OP_USER_PASSKEY_NEG_REPLY:
case MGMT_OP_USER_CONFIRM_NEG_REPLY:
smp_failure(conn, SMP_PASSKEY_ENTRY_FAILED);
- return 0;
+ err = 0;
+ goto unlock;
default:
smp_failure(conn, SMP_PASSKEY_ENTRY_FAILED);
- return -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+ goto unlock;
}
+ err = 0;
+
/* If it is our turn to send Pairing Confirm, do so now */
if (test_bit(SMP_FLAG_CFM_PENDING, &smp->flags)) {
u8 rsp = smp_confirm(smp);
@@ -669,12 +921,16 @@ int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey)
smp_failure(conn, rsp);
}
- return 0;
+unlock:
+ l2cap_chan_unlock(chan);
+ return err;
}
static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
{
struct smp_cmd_pairing rsp, *req = (void *) skb->data;
+ struct l2cap_chan *chan = conn->smp;
+ struct hci_dev *hdev = conn->hcon->hdev;
struct smp_chan *smp;
u8 key_size, auth, sec_level;
int ret;
@@ -684,25 +940,33 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
if (skb->len < sizeof(*req))
return SMP_INVALID_PARAMS;
- if (conn->hcon->link_mode & HCI_LM_MASTER)
+ if (conn->hcon->role != HCI_ROLE_SLAVE)
return SMP_CMD_NOTSUPP;
- if (!test_and_set_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->flags))
+ if (!chan->data)
smp = smp_chan_create(conn);
else
- smp = conn->smp_chan;
+ smp = chan->data;
if (!smp)
return SMP_UNSPECIFIED;
+ /* We didn't start the pairing, so match remote */
+ auth = req->auth_req & AUTH_REQ_MASK;
+
+ if (!test_bit(HCI_BONDABLE, &hdev->dev_flags) &&
+ (auth & SMP_AUTH_BONDING))
+ return SMP_PAIRING_NOTSUPP;
+
smp->preq[0] = SMP_CMD_PAIRING_REQ;
memcpy(&smp->preq[1], req, sizeof(*req));
skb_pull(skb, sizeof(*req));
- /* We didn't start the pairing, so match remote */
- auth = req->auth_req;
+ if (conn->hcon->io_capability == HCI_IO_NO_INPUT_OUTPUT)
+ sec_level = BT_SECURITY_MEDIUM;
+ else
+ sec_level = authreq_to_seclevel(auth);
- sec_level = authreq_to_seclevel(auth);
if (sec_level > conn->hcon->pending_sec_level)
conn->hcon->pending_sec_level = sec_level;
@@ -728,22 +992,22 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
memcpy(&smp->prsp[1], &rsp, sizeof(rsp));
smp_send_cmd(conn, SMP_CMD_PAIRING_RSP, sizeof(rsp), &rsp);
+ SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_CONFIRM);
/* Request setup of TK */
ret = tk_request(conn, 0, auth, rsp.io_capability, req->io_capability);
if (ret)
return SMP_UNSPECIFIED;
- clear_bit(SMP_FLAG_INITIATOR, &smp->flags);
-
return 0;
}
static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb)
{
struct smp_cmd_pairing *req, *rsp = (void *) skb->data;
- struct smp_chan *smp = conn->smp_chan;
- u8 key_size, auth = SMP_AUTH_NONE;
+ struct l2cap_chan *chan = conn->smp;
+ struct smp_chan *smp = chan->data;
+ u8 key_size, auth;
int ret;
BT_DBG("conn %p", conn);
@@ -751,7 +1015,7 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb)
if (skb->len < sizeof(*rsp))
return SMP_INVALID_PARAMS;
- if (!(conn->hcon->link_mode & HCI_LM_MASTER))
+ if (conn->hcon->role != HCI_ROLE_MASTER)
return SMP_CMD_NOTSUPP;
skb_pull(skb, sizeof(*rsp));
@@ -762,6 +1026,8 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb)
if (check_enc_key_size(conn, key_size))
return SMP_ENC_KEY_SIZE;
+ auth = rsp->auth_req & AUTH_REQ_MASK;
+
/* If we need MITM check that it can be acheived */
if (conn->hcon->pending_sec_level >= BT_SECURITY_HIGH) {
u8 method;
@@ -782,11 +1048,7 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb)
*/
smp->remote_key_dist &= rsp->resp_key_dist;
- if ((req->auth_req & SMP_AUTH_BONDING) &&
- (rsp->auth_req & SMP_AUTH_BONDING))
- auth = SMP_AUTH_BONDING;
-
- auth |= (req->auth_req | rsp->auth_req) & SMP_AUTH_MITM;
+ auth |= req->auth_req;
ret = tk_request(conn, 0, auth, req->io_capability, rsp->io_capability);
if (ret)
@@ -803,7 +1065,8 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb)
static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb)
{
- struct smp_chan *smp = conn->smp_chan;
+ struct l2cap_chan *chan = conn->smp;
+ struct smp_chan *smp = chan->data;
BT_DBG("conn %p %s", conn, conn->hcon->out ? "master" : "slave");
@@ -813,10 +1076,14 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb)
memcpy(smp->pcnf, skb->data, sizeof(smp->pcnf));
skb_pull(skb, sizeof(smp->pcnf));
- if (conn->hcon->out)
+ if (conn->hcon->out) {
smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd),
smp->prnd);
- else if (test_bit(SMP_FLAG_TK_VALID, &smp->flags))
+ SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RANDOM);
+ return 0;
+ }
+
+ if (test_bit(SMP_FLAG_TK_VALID, &smp->flags))
return smp_confirm(smp);
else
set_bit(SMP_FLAG_CFM_PENDING, &smp->flags);
@@ -826,7 +1093,8 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb)
static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb)
{
- struct smp_chan *smp = conn->smp_chan;
+ struct l2cap_chan *chan = conn->smp;
+ struct smp_chan *smp = chan->data;
BT_DBG("conn %p", conn);
@@ -839,26 +1107,51 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb)
return smp_random(smp);
}
-static u8 smp_ltk_encrypt(struct l2cap_conn *conn, u8 sec_level)
+static bool smp_ltk_encrypt(struct l2cap_conn *conn, u8 sec_level)
{
struct smp_ltk *key;
struct hci_conn *hcon = conn->hcon;
key = hci_find_ltk_by_addr(hcon->hdev, &hcon->dst, hcon->dst_type,
- hcon->out);
+ hcon->role);
if (!key)
- return 0;
+ return false;
- if (sec_level > BT_SECURITY_MEDIUM && !key->authenticated)
- return 0;
+ if (smp_ltk_sec_level(key) < sec_level)
+ return false;
if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &hcon->flags))
- return 1;
+ return true;
hci_le_start_enc(hcon, key->ediv, key->rand, key->val);
hcon->enc_key_size = key->enc_size;
- return 1;
+ /* We never store STKs for master role, so clear this flag */
+ clear_bit(HCI_CONN_STK_ENCRYPT, &hcon->flags);
+
+ return true;
+}
+
+bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level)
+{
+ if (sec_level == BT_SECURITY_LOW)
+ return true;
+
+ /* If we're encrypted with an STK always claim insufficient
+ * security. This way we allow the connection to be re-encrypted
+ * with an LTK, even if the LTK provides the same level of
+ * security. Only exception is if we don't have an LTK (e.g.
+ * because of key distribution bits).
+ */
+ if (test_bit(HCI_CONN_STK_ENCRYPT, &hcon->flags) &&
+ hci_find_ltk_by_addr(hcon->hdev, &hcon->dst, hcon->dst_type,
+ hcon->role))
+ return false;
+
+ if (hcon->sec_level >= sec_level)
+ return true;
+
+ return false;
}
static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb)
@@ -867,59 +1160,61 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb)
struct smp_cmd_pairing cp;
struct hci_conn *hcon = conn->hcon;
struct smp_chan *smp;
- u8 sec_level;
+ u8 sec_level, auth;
BT_DBG("conn %p", conn);
if (skb->len < sizeof(*rp))
return SMP_INVALID_PARAMS;
- if (!(conn->hcon->link_mode & HCI_LM_MASTER))
+ if (hcon->role != HCI_ROLE_MASTER)
return SMP_CMD_NOTSUPP;
- sec_level = authreq_to_seclevel(rp->auth_req);
+ auth = rp->auth_req & AUTH_REQ_MASK;
+
+ if (hcon->io_capability == HCI_IO_NO_INPUT_OUTPUT)
+ sec_level = BT_SECURITY_MEDIUM;
+ else
+ sec_level = authreq_to_seclevel(auth);
+
+ if (smp_sufficient_security(hcon, sec_level))
+ return 0;
+
if (sec_level > hcon->pending_sec_level)
hcon->pending_sec_level = sec_level;
if (smp_ltk_encrypt(conn, hcon->pending_sec_level))
return 0;
- if (test_and_set_bit(HCI_CONN_LE_SMP_PEND, &hcon->flags))
- return 0;
-
smp = smp_chan_create(conn);
+ if (!smp)
+ return SMP_UNSPECIFIED;
+
+ if (!test_bit(HCI_BONDABLE, &hcon->hdev->dev_flags) &&
+ (auth & SMP_AUTH_BONDING))
+ return SMP_PAIRING_NOTSUPP;
skb_pull(skb, sizeof(*rp));
memset(&cp, 0, sizeof(cp));
- build_pairing_cmd(conn, &cp, NULL, rp->auth_req);
+ build_pairing_cmd(conn, &cp, NULL, auth);
smp->preq[0] = SMP_CMD_PAIRING_REQ;
memcpy(&smp->preq[1], &cp, sizeof(cp));
smp_send_cmd(conn, SMP_CMD_PAIRING_REQ, sizeof(cp), &cp);
-
- clear_bit(SMP_FLAG_INITIATOR, &smp->flags);
+ SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RSP);
return 0;
}
-bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level)
-{
- if (sec_level == BT_SECURITY_LOW)
- return true;
-
- if (hcon->sec_level >= sec_level)
- return true;
-
- return false;
-}
-
int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
{
struct l2cap_conn *conn = hcon->l2cap_data;
+ struct l2cap_chan *chan;
struct smp_chan *smp;
__u8 authreq;
+ int ret;
BT_DBG("conn %p hcon %p level 0x%2.2x", conn, hcon, sec_level);
@@ -927,6 +1222,8 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
if (!conn)
return 1;
+ chan = conn->smp;
+
if (!test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags))
return 1;
@@ -936,16 +1233,23 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
if (sec_level > hcon->pending_sec_level)
hcon->pending_sec_level = sec_level;
- if (hcon->link_mode & HCI_LM_MASTER)
+ if (hcon->role == HCI_ROLE_MASTER)
if (smp_ltk_encrypt(conn, hcon->pending_sec_level))
return 0;
- if (test_and_set_bit(HCI_CONN_LE_SMP_PEND, &hcon->flags))
- return 0;
+ l2cap_chan_lock(chan);
+
+ /* If SMP is already in progress ignore this request */
+ if (chan->data) {
+ ret = 0;
+ goto unlock;
+ }
smp = smp_chan_create(conn);
- if (!smp)
- return 1;
+ if (!smp) {
+ ret = 1;
+ goto unlock;
+ }
authreq = seclevel_to_authreq(sec_level);
@@ -956,7 +1260,7 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
hcon->pending_sec_level > BT_SECURITY_MEDIUM)
authreq |= SMP_AUTH_MITM;
- if (hcon->link_mode & HCI_LM_MASTER) {
+ if (hcon->role == HCI_ROLE_MASTER) {
struct smp_cmd_pairing cp;
build_pairing_cmd(conn, &cp, NULL, authreq);
@@ -964,30 +1268,34 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
memcpy(&smp->preq[1], &cp, sizeof(cp));
smp_send_cmd(conn, SMP_CMD_PAIRING_REQ, sizeof(cp), &cp);
+ SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RSP);
} else {
struct smp_cmd_security_req cp;
cp.auth_req = authreq;
smp_send_cmd(conn, SMP_CMD_SECURITY_REQ, sizeof(cp), &cp);
+ SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_REQ);
}
set_bit(SMP_FLAG_INITIATOR, &smp->flags);
+ ret = 0;
- return 0;
+unlock:
+ l2cap_chan_unlock(chan);
+ return ret;
}
static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb)
{
struct smp_cmd_encrypt_info *rp = (void *) skb->data;
- struct smp_chan *smp = conn->smp_chan;
+ struct l2cap_chan *chan = conn->smp;
+ struct smp_chan *smp = chan->data;
BT_DBG("conn %p", conn);
if (skb->len < sizeof(*rp))
return SMP_INVALID_PARAMS;
- /* Ignore this PDU if it wasn't requested */
- if (!(smp->remote_key_dist & SMP_DIST_ENC_KEY))
- return 0;
+ SMP_ALLOW_CMD(smp, SMP_CMD_MASTER_IDENT);
skb_pull(skb, sizeof(*rp));
@@ -999,7 +1307,8 @@ static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb)
static int smp_cmd_master_ident(struct l2cap_conn *conn, struct sk_buff *skb)
{
struct smp_cmd_master_ident *rp = (void *) skb->data;
- struct smp_chan *smp = conn->smp_chan;
+ struct l2cap_chan *chan = conn->smp;
+ struct smp_chan *smp = chan->data;
struct hci_dev *hdev = conn->hcon->hdev;
struct hci_conn *hcon = conn->hcon;
struct smp_ltk *ltk;
@@ -1010,23 +1319,24 @@ static int smp_cmd_master_ident(struct l2cap_conn *conn, struct sk_buff *skb)
if (skb->len < sizeof(*rp))
return SMP_INVALID_PARAMS;
- /* Ignore this PDU if it wasn't requested */
- if (!(smp->remote_key_dist & SMP_DIST_ENC_KEY))
- return 0;
-
/* Mark the information as received */
smp->remote_key_dist &= ~SMP_DIST_ENC_KEY;
+ if (smp->remote_key_dist & SMP_DIST_ID_KEY)
+ SMP_ALLOW_CMD(smp, SMP_CMD_IDENT_INFO);
+ else if (smp->remote_key_dist & SMP_DIST_SIGN)
+ SMP_ALLOW_CMD(smp, SMP_CMD_SIGN_INFO);
+
skb_pull(skb, sizeof(*rp));
hci_dev_lock(hdev);
authenticated = (hcon->sec_level == BT_SECURITY_HIGH);
- ltk = hci_add_ltk(hdev, &hcon->dst, hcon->dst_type, HCI_SMP_LTK,
+ ltk = hci_add_ltk(hdev, &hcon->dst, hcon->dst_type, SMP_LTK,
authenticated, smp->tk, smp->enc_key_size,
rp->ediv, rp->rand);
smp->ltk = ltk;
- if (!(smp->remote_key_dist & SMP_DIST_ID_KEY))
- smp_distribute_keys(conn);
+ if (!(smp->remote_key_dist & KEY_DIST_MASK))
+ smp_distribute_keys(smp);
hci_dev_unlock(hdev);
return 0;
@@ -1035,16 +1345,15 @@ static int smp_cmd_master_ident(struct l2cap_conn *conn, struct sk_buff *skb)
static int smp_cmd_ident_info(struct l2cap_conn *conn, struct sk_buff *skb)
{
struct smp_cmd_ident_info *info = (void *) skb->data;
- struct smp_chan *smp = conn->smp_chan;
+ struct l2cap_chan *chan = conn->smp;
+ struct smp_chan *smp = chan->data;
BT_DBG("");
if (skb->len < sizeof(*info))
return SMP_INVALID_PARAMS;
- /* Ignore this PDU if it wasn't requested */
- if (!(smp->remote_key_dist & SMP_DIST_ID_KEY))
- return 0;
+ SMP_ALLOW_CMD(smp, SMP_CMD_IDENT_ADDR_INFO);
skb_pull(skb, sizeof(*info));
@@ -1057,7 +1366,8 @@ static int smp_cmd_ident_addr_info(struct l2cap_conn *conn,
struct sk_buff *skb)
{
struct smp_cmd_ident_addr_info *info = (void *) skb->data;
- struct smp_chan *smp = conn->smp_chan;
+ struct l2cap_chan *chan = conn->smp;
+ struct smp_chan *smp = chan->data;
struct hci_conn *hcon = conn->hcon;
bdaddr_t rpa;
@@ -1066,15 +1376,16 @@ static int smp_cmd_ident_addr_info(struct l2cap_conn *conn,
if (skb->len < sizeof(*info))
return SMP_INVALID_PARAMS;
- /* Ignore this PDU if it wasn't requested */
- if (!(smp->remote_key_dist & SMP_DIST_ID_KEY))
- return 0;
-
/* Mark the information as received */
smp->remote_key_dist &= ~SMP_DIST_ID_KEY;
+ if (smp->remote_key_dist & SMP_DIST_SIGN)
+ SMP_ALLOW_CMD(smp, SMP_CMD_SIGN_INFO);
+
skb_pull(skb, sizeof(*info));
+ hci_dev_lock(hcon->hdev);
+
/* Strictly speaking the Core Specification (4.1) allows sending
* an empty address which would force us to rely on just the IRK
* as "identity information". However, since such
@@ -1084,8 +1395,7 @@ static int smp_cmd_ident_addr_info(struct l2cap_conn *conn,
*/
if (!bacmp(&info->bdaddr, BDADDR_ANY)) {
BT_ERR("Ignoring IRK with no identity address");
- smp_distribute_keys(conn);
- return 0;
+ goto distribute;
}
bacpy(&smp->id_addr, &info->bdaddr);
@@ -1099,7 +1409,11 @@ static int smp_cmd_ident_addr_info(struct l2cap_conn *conn,
smp->remote_irk = hci_add_irk(conn->hcon->hdev, &smp->id_addr,
smp->id_addr_type, smp->irk, &rpa);
- smp_distribute_keys(conn);
+distribute:
+ if (!(smp->remote_key_dist & KEY_DIST_MASK))
+ smp_distribute_keys(smp);
+
+ hci_dev_unlock(hcon->hdev);
return 0;
}
@@ -1107,7 +1421,8 @@ static int smp_cmd_ident_addr_info(struct l2cap_conn *conn,
static int smp_cmd_sign_info(struct l2cap_conn *conn, struct sk_buff *skb)
{
struct smp_cmd_sign_info *rp = (void *) skb->data;
- struct smp_chan *smp = conn->smp_chan;
+ struct l2cap_chan *chan = conn->smp;
+ struct smp_chan *smp = chan->data;
struct hci_dev *hdev = conn->hcon->hdev;
struct smp_csrk *csrk;
@@ -1116,10 +1431,6 @@ static int smp_cmd_sign_info(struct l2cap_conn *conn, struct sk_buff *skb)
if (skb->len < sizeof(*rp))
return SMP_INVALID_PARAMS;
- /* Ignore this PDU if it wasn't requested */
- if (!(smp->remote_key_dist & SMP_DIST_SIGN))
- return 0;
-
/* Mark the information as received */
smp->remote_key_dist &= ~SMP_DIST_SIGN;
@@ -1132,16 +1443,17 @@ static int smp_cmd_sign_info(struct l2cap_conn *conn, struct sk_buff *skb)
memcpy(csrk->val, rp->csrk, sizeof(csrk->val));
}
smp->csrk = csrk;
- if (!(smp->remote_key_dist & SMP_DIST_SIGN))
- smp_distribute_keys(conn);
+ smp_distribute_keys(smp);
hci_dev_unlock(hdev);
return 0;
}
-int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb)
+static int smp_sig_channel(struct l2cap_chan *chan, struct sk_buff *skb)
{
+ struct l2cap_conn *conn = chan->conn;
struct hci_conn *hcon = conn->hcon;
+ struct smp_chan *smp;
__u8 code, reason;
int err = 0;
@@ -1150,13 +1462,10 @@ int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb)
return 0;
}
- if (skb->len < 1) {
- kfree_skb(skb);
+ if (skb->len < 1)
return -EILSEQ;
- }
if (!test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags)) {
- err = -ENOTSUPP;
reason = SMP_PAIRING_NOTSUPP;
goto done;
}
@@ -1164,18 +1473,19 @@ int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb)
code = skb->data[0];
skb_pull(skb, sizeof(code));
- /*
- * The SMP context must be initialized for all other PDUs except
- * pairing and security requests. If we get any other PDU when
- * not initialized simply disconnect (done if this function
- * returns an error).
+ smp = chan->data;
+
+ if (code > SMP_CMD_MAX)
+ goto drop;
+
+ if (smp && !test_and_clear_bit(code, &smp->allow_cmd))
+ goto drop;
+
+ /* If we don't have a context the only allowed commands are
+ * pairing request and security request.
*/
- if (code != SMP_CMD_PAIRING_REQ && code != SMP_CMD_SECURITY_REQ &&
- !conn->smp_chan) {
- BT_ERR("Unexpected SMP command 0x%02x. Disconnecting.", code);
- kfree_skb(skb);
- return -ENOTSUPP;
- }
+ if (!smp && code != SMP_CMD_PAIRING_REQ && code != SMP_CMD_SECURITY_REQ)
+ goto drop;
switch (code) {
case SMP_CMD_PAIRING_REQ:
@@ -1184,7 +1494,6 @@ int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb)
case SMP_CMD_PAIRING_FAIL:
smp_failure(conn, 0);
- reason = 0;
err = -EPERM;
break;
@@ -1226,181 +1535,217 @@ int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb)
default:
BT_DBG("Unknown command code 0x%2.2x", code);
-
reason = SMP_CMD_NOTSUPP;
- err = -EOPNOTSUPP;
goto done;
}
done:
- if (reason)
- smp_failure(conn, reason);
+ if (!err) {
+ if (reason)
+ smp_failure(conn, reason);
+ kfree_skb(skb);
+ }
- kfree_skb(skb);
return err;
+
+drop:
+ BT_ERR("%s unexpected SMP command 0x%02x from %pMR", hcon->hdev->name,
+ code, &hcon->dst);
+ kfree_skb(skb);
+ return 0;
}
-static void smp_notify_keys(struct l2cap_conn *conn)
+static void smp_teardown_cb(struct l2cap_chan *chan, int err)
{
- struct smp_chan *smp = conn->smp_chan;
- struct hci_conn *hcon = conn->hcon;
- struct hci_dev *hdev = hcon->hdev;
- struct smp_cmd_pairing *req = (void *) &smp->preq[1];
- struct smp_cmd_pairing *rsp = (void *) &smp->prsp[1];
- bool persistent;
+ struct l2cap_conn *conn = chan->conn;
- if (smp->remote_irk) {
- mgmt_new_irk(hdev, smp->remote_irk);
- /* Now that user space can be considered to know the
- * identity address track the connection based on it
- * from now on.
- */
- bacpy(&hcon->dst, &smp->remote_irk->bdaddr);
- hcon->dst_type = smp->remote_irk->addr_type;
- l2cap_conn_update_id_addr(hcon);
- }
+ BT_DBG("chan %p", chan);
- /* The LTKs and CSRKs should be persistent only if both sides
- * had the bonding bit set in their authentication requests.
- */
- persistent = !!((req->auth_req & rsp->auth_req) & SMP_AUTH_BONDING);
+ if (chan->data)
+ smp_chan_destroy(conn);
- if (smp->csrk) {
- smp->csrk->bdaddr_type = hcon->dst_type;
- bacpy(&smp->csrk->bdaddr, &hcon->dst);
- mgmt_new_csrk(hdev, smp->csrk, persistent);
- }
+ conn->smp = NULL;
+ l2cap_chan_put(chan);
+}
- if (smp->slave_csrk) {
- smp->slave_csrk->bdaddr_type = hcon->dst_type;
- bacpy(&smp->slave_csrk->bdaddr, &hcon->dst);
- mgmt_new_csrk(hdev, smp->slave_csrk, persistent);
- }
+static void smp_resume_cb(struct l2cap_chan *chan)
+{
+ struct smp_chan *smp = chan->data;
+ struct l2cap_conn *conn = chan->conn;
+ struct hci_conn *hcon = conn->hcon;
- if (smp->ltk) {
- smp->ltk->bdaddr_type = hcon->dst_type;
- bacpy(&smp->ltk->bdaddr, &hcon->dst);
- mgmt_new_ltk(hdev, smp->ltk, persistent);
- }
+ BT_DBG("chan %p", chan);
- if (smp->slave_ltk) {
- smp->slave_ltk->bdaddr_type = hcon->dst_type;
- bacpy(&smp->slave_ltk->bdaddr, &hcon->dst);
- mgmt_new_ltk(hdev, smp->slave_ltk, persistent);
- }
+ if (!smp)
+ return;
+
+ if (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags))
+ return;
+
+ cancel_delayed_work(&smp->security_timer);
+
+ smp_distribute_keys(smp);
}
-int smp_distribute_keys(struct l2cap_conn *conn)
+static void smp_ready_cb(struct l2cap_chan *chan)
{
- struct smp_cmd_pairing *req, *rsp;
- struct smp_chan *smp = conn->smp_chan;
- struct hci_conn *hcon = conn->hcon;
- struct hci_dev *hdev = hcon->hdev;
- __u8 *keydist;
+ struct l2cap_conn *conn = chan->conn;
- BT_DBG("conn %p", conn);
+ BT_DBG("chan %p", chan);
- if (!test_bit(HCI_CONN_LE_SMP_PEND, &hcon->flags))
- return 0;
+ conn->smp = chan;
+ l2cap_chan_hold(chan);
+}
- rsp = (void *) &smp->prsp[1];
+static int smp_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
+{
+ int err;
- /* The responder sends its keys first */
- if (hcon->out && (smp->remote_key_dist & 0x07))
- return 0;
+ BT_DBG("chan %p", chan);
- req = (void *) &smp->preq[1];
+ err = smp_sig_channel(chan, skb);
+ if (err) {
+ struct smp_chan *smp = chan->data;
- if (hcon->out) {
- keydist = &rsp->init_key_dist;
- *keydist &= req->init_key_dist;
- } else {
- keydist = &rsp->resp_key_dist;
- *keydist &= req->resp_key_dist;
+ if (smp)
+ cancel_delayed_work_sync(&smp->security_timer);
+
+ hci_disconnect(chan->conn->hcon, HCI_ERROR_AUTH_FAILURE);
}
- BT_DBG("keydist 0x%x", *keydist);
+ return err;
+}
- if (*keydist & SMP_DIST_ENC_KEY) {
- struct smp_cmd_encrypt_info enc;
- struct smp_cmd_master_ident ident;
- struct smp_ltk *ltk;
- u8 authenticated;
- __le16 ediv;
- __le64 rand;
+static struct sk_buff *smp_alloc_skb_cb(struct l2cap_chan *chan,
+ unsigned long hdr_len,
+ unsigned long len, int nb)
+{
+ struct sk_buff *skb;
- get_random_bytes(enc.ltk, sizeof(enc.ltk));
- get_random_bytes(&ediv, sizeof(ediv));
- get_random_bytes(&rand, sizeof(rand));
+ skb = bt_skb_alloc(hdr_len + len, GFP_KERNEL);
+ if (!skb)
+ return ERR_PTR(-ENOMEM);
- smp_send_cmd(conn, SMP_CMD_ENCRYPT_INFO, sizeof(enc), &enc);
+ skb->priority = HCI_PRIO_MAX;
+ bt_cb(skb)->chan = chan;
- authenticated = hcon->sec_level == BT_SECURITY_HIGH;
- ltk = hci_add_ltk(hdev, &hcon->dst, hcon->dst_type,
- HCI_SMP_LTK_SLAVE, authenticated, enc.ltk,
- smp->enc_key_size, ediv, rand);
- smp->slave_ltk = ltk;
+ return skb;
+}
- ident.ediv = ediv;
- ident.rand = rand;
+static const struct l2cap_ops smp_chan_ops = {
+ .name = "Security Manager",
+ .ready = smp_ready_cb,
+ .recv = smp_recv_cb,
+ .alloc_skb = smp_alloc_skb_cb,
+ .teardown = smp_teardown_cb,
+ .resume = smp_resume_cb,
+
+ .new_connection = l2cap_chan_no_new_connection,
+ .state_change = l2cap_chan_no_state_change,
+ .close = l2cap_chan_no_close,
+ .defer = l2cap_chan_no_defer,
+ .suspend = l2cap_chan_no_suspend,
+ .set_shutdown = l2cap_chan_no_set_shutdown,
+ .get_sndtimeo = l2cap_chan_no_get_sndtimeo,
+ .memcpy_fromiovec = l2cap_chan_no_memcpy_fromiovec,
+};
- smp_send_cmd(conn, SMP_CMD_MASTER_IDENT, sizeof(ident), &ident);
+static inline struct l2cap_chan *smp_new_conn_cb(struct l2cap_chan *pchan)
+{
+ struct l2cap_chan *chan;
- *keydist &= ~SMP_DIST_ENC_KEY;
- }
+ BT_DBG("pchan %p", pchan);
- if (*keydist & SMP_DIST_ID_KEY) {
- struct smp_cmd_ident_addr_info addrinfo;
- struct smp_cmd_ident_info idinfo;
+ chan = l2cap_chan_create();
+ if (!chan)
+ return NULL;
- memcpy(idinfo.irk, hdev->irk, sizeof(idinfo.irk));
+ chan->chan_type = pchan->chan_type;
+ chan->ops = &smp_chan_ops;
+ chan->scid = pchan->scid;
+ chan->dcid = chan->scid;
+ chan->imtu = pchan->imtu;
+ chan->omtu = pchan->omtu;
+ chan->mode = pchan->mode;
- smp_send_cmd(conn, SMP_CMD_IDENT_INFO, sizeof(idinfo), &idinfo);
+ BT_DBG("created chan %p", chan);
- /* The hci_conn contains the local identity address
- * after the connection has been established.
- *
- * This is true even when the connection has been
- * established using a resolvable random address.
- */
- bacpy(&addrinfo.bdaddr, &hcon->src);
- addrinfo.addr_type = hcon->src_type;
+ return chan;
+}
- smp_send_cmd(conn, SMP_CMD_IDENT_ADDR_INFO, sizeof(addrinfo),
- &addrinfo);
+static const struct l2cap_ops smp_root_chan_ops = {
+ .name = "Security Manager Root",
+ .new_connection = smp_new_conn_cb,
+
+ /* None of these are implemented for the root channel */
+ .close = l2cap_chan_no_close,
+ .alloc_skb = l2cap_chan_no_alloc_skb,
+ .recv = l2cap_chan_no_recv,
+ .state_change = l2cap_chan_no_state_change,
+ .teardown = l2cap_chan_no_teardown,
+ .ready = l2cap_chan_no_ready,
+ .defer = l2cap_chan_no_defer,
+ .suspend = l2cap_chan_no_suspend,
+ .resume = l2cap_chan_no_resume,
+ .set_shutdown = l2cap_chan_no_set_shutdown,
+ .get_sndtimeo = l2cap_chan_no_get_sndtimeo,
+ .memcpy_fromiovec = l2cap_chan_no_memcpy_fromiovec,
+};
- *keydist &= ~SMP_DIST_ID_KEY;
- }
+int smp_register(struct hci_dev *hdev)
+{
+ struct l2cap_chan *chan;
+ struct crypto_blkcipher *tfm_aes;
- if (*keydist & SMP_DIST_SIGN) {
- struct smp_cmd_sign_info sign;
- struct smp_csrk *csrk;
+ BT_DBG("%s", hdev->name);
- /* Generate a new random key */
- get_random_bytes(sign.csrk, sizeof(sign.csrk));
+ tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(tfm_aes)) {
+ int err = PTR_ERR(tfm_aes);
+ BT_ERR("Unable to create crypto context");
+ return err;
+ }
- csrk = kzalloc(sizeof(*csrk), GFP_KERNEL);
- if (csrk) {
- csrk->master = 0x00;
- memcpy(csrk->val, sign.csrk, sizeof(csrk->val));
- }
- smp->slave_csrk = csrk;
+ chan = l2cap_chan_create();
+ if (!chan) {
+ crypto_free_blkcipher(tfm_aes);
+ return -ENOMEM;
+ }
- smp_send_cmd(conn, SMP_CMD_SIGN_INFO, sizeof(sign), &sign);
+ chan->data = tfm_aes;
- *keydist &= ~SMP_DIST_SIGN;
- }
+ l2cap_add_scid(chan, L2CAP_CID_SMP);
- /* If there are still keys to be received wait for them */
- if ((smp->remote_key_dist & 0x07))
- return 0;
+ l2cap_chan_set_defaults(chan);
- clear_bit(HCI_CONN_LE_SMP_PEND, &hcon->flags);
- cancel_delayed_work_sync(&conn->security_timer);
- set_bit(SMP_FLAG_COMPLETE, &smp->flags);
- smp_notify_keys(conn);
+ bacpy(&chan->src, &hdev->bdaddr);
+ chan->src_type = BDADDR_LE_PUBLIC;
+ chan->state = BT_LISTEN;
+ chan->mode = L2CAP_MODE_BASIC;
+ chan->imtu = L2CAP_DEFAULT_MTU;
+ chan->ops = &smp_root_chan_ops;
- smp_chan_destroy(conn);
+ hdev->smp_data = chan;
return 0;
}
+
+void smp_unregister(struct hci_dev *hdev)
+{
+ struct l2cap_chan *chan = hdev->smp_data;
+ struct crypto_blkcipher *tfm_aes;
+
+ if (!chan)
+ return;
+
+ BT_DBG("%s chan %p", hdev->name, chan);
+
+ tfm_aes = chan->data;
+ if (tfm_aes) {
+ chan->data = NULL;
+ crypto_free_blkcipher(tfm_aes);
+ }
+
+ hdev->smp_data = NULL;
+ l2cap_chan_put(chan);
+}
diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h
index 5a8dc36460a1..86a683a8b491 100644
--- a/net/bluetooth/smp.h
+++ b/net/bluetooth/smp.h
@@ -102,6 +102,8 @@ struct smp_cmd_security_req {
__u8 auth_req;
} __packed;
+#define SMP_CMD_MAX 0x0b
+
#define SMP_PASSKEY_ENTRY_FAILED 0x01
#define SMP_OOB_NOT_AVAIL 0x02
#define SMP_AUTH_REQUIREMENTS 0x03
@@ -116,17 +118,30 @@ struct smp_cmd_security_req {
#define SMP_MIN_ENC_KEY_SIZE 7
#define SMP_MAX_ENC_KEY_SIZE 16
+/* LTK types used in internal storage (struct smp_ltk) */
+enum {
+ SMP_STK,
+ SMP_LTK,
+ SMP_LTK_SLAVE,
+};
+
+static inline u8 smp_ltk_sec_level(struct smp_ltk *key)
+{
+ if (key->authenticated)
+ return BT_SECURITY_HIGH;
+
+ return BT_SECURITY_MEDIUM;
+}
+
/* SMP Commands */
bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level);
int smp_conn_security(struct hci_conn *hcon, __u8 sec_level);
-int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb);
-int smp_distribute_keys(struct l2cap_conn *conn);
int smp_user_confirm_reply(struct hci_conn *conn, u16 mgmt_op, __le32 passkey);
-void smp_chan_destroy(struct l2cap_conn *conn);
+bool smp_irk_matches(struct hci_dev *hdev, u8 irk[16], bdaddr_t *bdaddr);
+int smp_generate_rpa(struct hci_dev *hdev, u8 irk[16], bdaddr_t *rpa);
-bool smp_irk_matches(struct crypto_blkcipher *tfm, u8 irk[16],
- bdaddr_t *bdaddr);
-int smp_generate_rpa(struct crypto_blkcipher *tfm, u8 irk[16], bdaddr_t *rpa);
+int smp_register(struct hci_dev *hdev);
+void smp_unregister(struct hci_dev *hdev);
#endif /* __SMP_H */
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index 8590b942bffa..fd7ee03c59b3 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -10,7 +10,9 @@ bridge-y := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \
bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o
-bridge-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o
+bridge-$(subst m,y,$(CONFIG_BRIDGE_NETFILTER)) += br_nf_core.o
+
+obj-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o
bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 1a755a1e5410..44425aff7cba 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -161,7 +161,7 @@ static int __init br_init(void)
if (err)
goto err_out1;
- err = br_netfilter_init();
+ err = br_nf_core_init();
if (err)
goto err_out2;
@@ -179,11 +179,16 @@ static int __init br_init(void)
br_fdb_test_addr_hook = br_fdb_test_addr;
#endif
+ pr_info("bridge: automatic filtering via arp/ip/ip6tables has been "
+ "deprecated. Update your scripts to load br_netfilter if you "
+ "need this.\n");
+
return 0;
+
err_out4:
unregister_netdevice_notifier(&br_device_notifier);
err_out3:
- br_netfilter_fini();
+ br_nf_core_fini();
err_out2:
unregister_pernet_subsys(&br_net_ops);
err_out1:
@@ -196,20 +201,17 @@ err_out:
static void __exit br_deinit(void)
{
stp_proto_unregister(&br_stp_proto);
-
br_netlink_fini();
unregister_netdevice_notifier(&br_device_notifier);
brioctl_set(NULL);
-
unregister_pernet_subsys(&br_net_ops);
rcu_barrier(); /* Wait for completion of call_rcu()'s */
- br_netfilter_fini();
+ br_nf_core_fini();
#if IS_ENABLED(CONFIG_ATM_LANE)
br_fdb_test_addr_hook = NULL;
#endif
-
br_fdb_fini();
}
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 568cccd39a3d..ffd379db5938 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -36,7 +36,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
u16 vid = 0;
rcu_read_lock();
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
br_nf_pre_routing_finish_bridge_slow(skb);
rcu_read_unlock();
@@ -88,12 +88,17 @@ out:
static int br_dev_init(struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
+ int err;
br->stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!br->stats)
return -ENOMEM;
- return 0;
+ err = br_vlan_init(br);
+ if (err)
+ free_percpu(br->stats);
+
+ return err;
}
static int br_dev_open(struct net_device *dev)
@@ -167,7 +172,7 @@ static int br_change_mtu(struct net_device *dev, int new_mtu)
dev->mtu = new_mtu;
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
/* remember the MTU in the rtable for PMTU */
dst_metric_set(&br->fake_rtable.dst, RTAX_MTU, new_mtu);
#endif
@@ -389,5 +394,4 @@ void br_dev_setup(struct net_device *dev)
br_netfilter_rtable_init(br);
br_stp_timer_init(br);
br_multicast_init(br);
- br_vlan_init(br);
}
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index b524c36c1273..6f6c95cfe8f2 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -93,7 +93,7 @@ static void fdb_rcu_free(struct rcu_head *head)
static void fdb_add_hw(struct net_bridge *br, const unsigned char *addr)
{
int err;
- struct net_bridge_port *p, *tmp;
+ struct net_bridge_port *p;
ASSERT_RTNL();
@@ -107,11 +107,9 @@ static void fdb_add_hw(struct net_bridge *br, const unsigned char *addr)
return;
undo:
- list_for_each_entry(tmp, &br->port_list, list) {
- if (tmp == p)
- break;
- if (!br_promisc_port(tmp))
- dev_uc_del(tmp->dev, addr);
+ list_for_each_entry_continue_reverse(p, &br->port_list, list) {
+ if (!br_promisc_port(p))
+ dev_uc_del(p->dev, addr);
}
}
@@ -631,7 +629,7 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
goto nla_put_failure;
- if (nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id))
+ if (fdb->vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id))
goto nla_put_failure;
return nlmsg_end(skb, nlh);
@@ -678,6 +676,7 @@ errout:
int br_fdb_dump(struct sk_buff *skb,
struct netlink_callback *cb,
struct net_device *dev,
+ struct net_device *filter_dev,
int idx)
{
struct net_bridge *br = netdev_priv(dev);
@@ -693,6 +692,19 @@ int br_fdb_dump(struct sk_buff *skb,
if (idx < cb->args[0])
goto skip;
+ if (filter_dev &&
+ (!f->dst || f->dst->dev != filter_dev)) {
+ if (filter_dev != dev)
+ goto skip;
+ /* !f->dst is a speacial case for bridge
+ * It means the MAC belongs to the bridge
+ * Therefore need a little more filtering
+ * we only want to dump the !f->dst case
+ */
+ if (f->dst)
+ goto skip;
+ }
+
if (fdb_fill_info(skb, br, f,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 056b67b0e277..992ec49a96aa 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -49,6 +49,7 @@ int br_dev_queue_push_xmit(struct sk_buff *skb)
return 0;
}
+EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit);
int br_forward_finish(struct sk_buff *skb)
{
@@ -56,6 +57,7 @@ int br_forward_finish(struct sk_buff *skb)
br_dev_queue_push_xmit);
}
+EXPORT_SYMBOL_GPL(br_forward_finish);
static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
{
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 3eca3fdf8fe1..ed307db7a12b 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -252,12 +252,12 @@ static void del_nbp(struct net_bridge_port *p)
br_fdb_delete_by_port(br, p, 1);
nbp_update_port_count(br);
+ netdev_upper_dev_unlink(dev, br->dev);
+
dev->priv_flags &= ~IFF_BRIDGE_PORT;
netdev_rx_handler_unregister(dev);
- netdev_upper_dev_unlink(dev, br->dev);
-
br_multicast_del_port(p);
kobject_uevent(&p->kobj, KOBJ_REMOVE);
@@ -332,7 +332,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
p->port_no = index;
p->flags = BR_LEARNING | BR_FLOOD;
br_init_port(p);
- p->state = BR_STATE_DISABLED;
+ br_set_state(p, BR_STATE_DISABLED);
br_stp_port_timer_init(p);
br_multicast_add_port(p);
@@ -344,7 +344,7 @@ int br_add_bridge(struct net *net, const char *name)
struct net_device *dev;
int res;
- dev = alloc_netdev(sizeof(struct net_bridge), name,
+ dev = alloc_netdev(sizeof(struct net_bridge), name, NET_NAME_UNKNOWN,
br_dev_setup);
if (!dev)
@@ -476,16 +476,16 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
if (err)
goto err3;
- err = netdev_master_upper_dev_link(dev, br->dev);
+ err = netdev_rx_handler_register(dev, br_handle_frame, p);
if (err)
goto err4;
- err = netdev_rx_handler_register(dev, br_handle_frame, p);
+ dev->priv_flags |= IFF_BRIDGE_PORT;
+
+ err = netdev_master_upper_dev_link(dev, br->dev);
if (err)
goto err5;
- dev->priv_flags |= IFF_BRIDGE_PORT;
-
dev_disable_lro(dev);
list_add_rcu(&p->list, &br->port_list);
@@ -500,6 +500,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
if (br_fdb_insert(br, p, dev->dev_addr, 0))
netdev_err(dev, "failed insert local address bridge forwarding table\n");
+ if (nbp_vlan_init(p))
+ netdev_err(dev, "failed to initialize vlan filtering on this port\n");
+
spin_lock_bh(&br->lock);
changed_addr = br_stp_recalculate_bridge_id(br);
@@ -520,7 +523,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
return 0;
err5:
- netdev_upper_dev_unlink(dev, br->dev);
+ dev->priv_flags &= ~IFF_BRIDGE_PORT;
+ netdev_rx_handler_unregister(dev);
err4:
br_netpoll_disable(p);
err3:
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 366c43649079..6fd5522df696 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -140,6 +140,7 @@ drop:
kfree_skb(skb);
goto out;
}
+EXPORT_SYMBOL_GPL(br_handle_frame_finish);
/* note: already called with rcu_read_lock */
static int br_handle_local_finish(struct sk_buff *skb)
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index abfa0b65a111..648d79ccf462 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1174,7 +1174,7 @@ static void br_multicast_add_router(struct net_bridge *br,
}
if (slot)
- hlist_add_after_rcu(slot, &port->rlist);
+ hlist_add_behind_rcu(&port->rlist, slot);
else
hlist_add_head_rcu(&port->rlist, &br->router_list);
}
@@ -1822,7 +1822,7 @@ static void br_multicast_query_expired(struct net_bridge *br,
if (query->startup_sent < br->multicast_startup_query_count)
query->startup_sent++;
- rcu_assign_pointer(querier, NULL);
+ RCU_INIT_POINTER(querier, NULL);
br_multicast_send_query(br, NULL, query);
spin_unlock(&br->multicast_lock);
}
@@ -2216,6 +2216,43 @@ unlock:
EXPORT_SYMBOL_GPL(br_multicast_list_adjacent);
/**
+ * br_multicast_has_querier_anywhere - Checks for a querier on a bridge
+ * @dev: The bridge port providing the bridge on which to check for a querier
+ * @proto: The protocol family to check for: IGMP -> ETH_P_IP, MLD -> ETH_P_IPV6
+ *
+ * Checks whether the given interface has a bridge on top and if so returns
+ * true if a valid querier exists anywhere on the bridged link layer.
+ * Otherwise returns false.
+ */
+bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto)
+{
+ struct net_bridge *br;
+ struct net_bridge_port *port;
+ struct ethhdr eth;
+ bool ret = false;
+
+ rcu_read_lock();
+ if (!br_port_exists(dev))
+ goto unlock;
+
+ port = br_port_get_rcu(dev);
+ if (!port || !port->br)
+ goto unlock;
+
+ br = port->br;
+
+ memset(&eth, 0, sizeof(eth));
+ eth.h_proto = htons(proto);
+
+ ret = br_multicast_querier_exists(br, &eth);
+
+unlock:
+ rcu_read_unlock();
+ return ret;
+}
+EXPORT_SYMBOL_GPL(br_multicast_has_querier_anywhere);
+
+/**
* br_multicast_has_querier_adjacent - Checks for a querier behind a bridge port
* @dev: The bridge port adjacent to which to check for a querier
* @proto: The protocol family to check for: IGMP -> ETH_P_IP, MLD -> ETH_P_IPV6
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index a615264cf01a..1bada53bb195 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -111,66 +111,6 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb)
pppoe_proto(skb) == htons(PPP_IPV6) && \
brnf_filter_pppoe_tagged)
-static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb, u32 mtu)
-{
-}
-
-static void fake_redirect(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb)
-{
-}
-
-static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old)
-{
- return NULL;
-}
-
-static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst,
- struct sk_buff *skb,
- const void *daddr)
-{
- return NULL;
-}
-
-static unsigned int fake_mtu(const struct dst_entry *dst)
-{
- return dst->dev->mtu;
-}
-
-static struct dst_ops fake_dst_ops = {
- .family = AF_INET,
- .protocol = cpu_to_be16(ETH_P_IP),
- .update_pmtu = fake_update_pmtu,
- .redirect = fake_redirect,
- .cow_metrics = fake_cow_metrics,
- .neigh_lookup = fake_neigh_lookup,
- .mtu = fake_mtu,
-};
-
-/*
- * Initialize bogus route table used to keep netfilter happy.
- * Currently, we fill in the PMTU entry because netfilter
- * refragmentation needs it, and the rt_flags entry because
- * ipt_REJECT needs it. Future netfilter modules might
- * require us to fill additional fields.
- */
-static const u32 br_dst_default_metrics[RTAX_MAX] = {
- [RTAX_MTU - 1] = 1500,
-};
-
-void br_netfilter_rtable_init(struct net_bridge *br)
-{
- struct rtable *rt = &br->fake_rtable;
-
- atomic_set(&rt->dst.__refcnt, 1);
- rt->dst.dev = br->dev;
- rt->dst.path = &rt->dst;
- dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
- rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE;
- rt->dst.ops = &fake_dst_ops;
-}
-
static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
{
struct net_bridge_port *port;
@@ -245,14 +185,6 @@ static inline void nf_bridge_save_header(struct sk_buff *skb)
skb->nf_bridge->data, header_size);
}
-static inline void nf_bridge_update_protocol(struct sk_buff *skb)
-{
- if (skb->nf_bridge->mask & BRNF_8021Q)
- skb->protocol = htons(ETH_P_8021Q);
- else if (skb->nf_bridge->mask & BRNF_PPPoE)
- skb->protocol = htons(ETH_P_PPP_SES);
-}
-
/* When handing a packet over to the IP layer
* check whether we have a skb that is in the
* expected format
@@ -320,26 +252,6 @@ drop:
return -1;
}
-/* Fill in the header for fragmented IP packets handled by
- * the IPv4 connection tracking code.
- */
-int nf_bridge_copy_header(struct sk_buff *skb)
-{
- int err;
- unsigned int header_size;
-
- nf_bridge_update_protocol(skb);
- header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
- err = skb_cow_head(skb, header_size);
- if (err)
- return err;
-
- skb_copy_to_linear_data_offset(skb, -header_size,
- skb->nf_bridge->data, header_size);
- __skb_push(skb, nf_bridge_encap_header_len(skb));
- return 0;
-}
-
/* PF_BRIDGE/PRE_ROUTING *********************************************/
/* Undo the changes made for ip6tables PREROUTING and continue the
* bridge PRE_ROUTING hook. */
@@ -404,6 +316,7 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
ETH_HLEN-ETH_ALEN);
/* tell br_dev_xmit to continue with forwarding */
nf_bridge->mask |= BRNF_BRIDGED_DNAT;
+ /* FIXME Need to refragment */
ret = neigh->output(neigh, skb);
}
neigh_release(neigh);
@@ -459,6 +372,10 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
struct nf_bridge_info *nf_bridge = skb->nf_bridge;
struct rtable *rt;
int err;
+ int frag_max_size;
+
+ frag_max_size = IPCB(skb)->frag_max_size;
+ BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size;
if (nf_bridge->mask & BRNF_PKT_TYPE) {
skb->pkt_type = PACKET_OTHERHOST;
@@ -863,13 +780,19 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
static int br_nf_dev_queue_xmit(struct sk_buff *skb)
{
int ret;
+ int frag_max_size;
+ /* This is wrong! We should preserve the original fragment
+ * boundaries by preserving frag_list rather than refragmenting.
+ */
if (skb->protocol == htons(ETH_P_IP) &&
skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu &&
!skb_is_gso(skb)) {
+ frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
if (br_parse_ip_options(skb))
/* Drop invalid packet */
return NF_DROP;
+ IPCB(skb)->frag_max_size = frag_max_size;
ret = ip_fragment(skb, br_dev_queue_push_xmit);
} else
ret = br_dev_queue_push_xmit(skb);
@@ -944,6 +867,11 @@ static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops,
return NF_ACCEPT;
}
+void br_netfilter_enable(void)
+{
+}
+EXPORT_SYMBOL_GPL(br_netfilter_enable);
+
/* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
* br_dev_queue_push_xmit is called afterwards */
static struct nf_hook_ops br_nf_ops[] __read_mostly = {
@@ -1059,38 +987,42 @@ static struct ctl_table brnf_table[] = {
};
#endif
-int __init br_netfilter_init(void)
+static int __init br_netfilter_init(void)
{
int ret;
- ret = dst_entries_init(&fake_dst_ops);
+ ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
if (ret < 0)
return ret;
- ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
- if (ret < 0) {
- dst_entries_destroy(&fake_dst_ops);
- return ret;
- }
#ifdef CONFIG_SYSCTL
brnf_sysctl_header = register_net_sysctl(&init_net, "net/bridge", brnf_table);
if (brnf_sysctl_header == NULL) {
printk(KERN_WARNING
"br_netfilter: can't register to sysctl.\n");
- nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
- dst_entries_destroy(&fake_dst_ops);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto err1;
}
#endif
printk(KERN_NOTICE "Bridge firewalling registered\n");
return 0;
+err1:
+ nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
+ return ret;
}
-void br_netfilter_fini(void)
+static void __exit br_netfilter_fini(void)
{
nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
#ifdef CONFIG_SYSCTL
unregister_net_sysctl_table(brnf_sysctl_header);
#endif
- dst_entries_destroy(&fake_dst_ops);
}
+
+module_init(br_netfilter_init);
+module_exit(br_netfilter_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Lennert Buytenhek <buytenh@gnu.org>");
+MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
+MODULE_DESCRIPTION("Linux ethernet netfilter firewall bridge");
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 26edb518b839..2ff9706647f2 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -208,7 +208,6 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
int err = 0;
struct net_bridge_port *port = br_port_get_rtnl(dev);
- /* not a bridge port and */
if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN))
goto out;
@@ -258,9 +257,6 @@ static int br_afspec(struct net_bridge *br,
} else
err = br_vlan_add(br, vinfo->vid, vinfo->flags);
- if (err)
- break;
-
break;
case RTM_DELLINK:
@@ -277,7 +273,7 @@ static int br_afspec(struct net_bridge *br,
return err;
}
-static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = {
+static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
[IFLA_BRPORT_STATE] = { .type = NLA_U8 },
[IFLA_BRPORT_COST] = { .type = NLA_U32 },
[IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 },
@@ -305,7 +301,7 @@ static int br_set_port_state(struct net_bridge_port *p, u8 state)
(!netif_oper_up(p->dev) && state != BR_STATE_DISABLED))
return -ENETDOWN;
- p->state = state;
+ br_set_state(p, state);
br_log_state(p);
br_port_state_selection(p->br);
return 0;
@@ -383,7 +379,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh)
if (p && protinfo) {
if (protinfo->nla_type & NLA_F_NESTED) {
err = nla_parse_nested(tb, IFLA_BRPORT_MAX,
- protinfo, ifla_brport_policy);
+ protinfo, br_port_policy);
if (err)
return err;
@@ -462,6 +458,88 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev,
return register_netdevice(dev);
}
+static int br_port_slave_changelink(struct net_device *brdev,
+ struct net_device *dev,
+ struct nlattr *tb[],
+ struct nlattr *data[])
+{
+ if (!data)
+ return 0;
+ return br_setport(br_port_get_rtnl(dev), data);
+}
+
+static int br_port_fill_slave_info(struct sk_buff *skb,
+ const struct net_device *brdev,
+ const struct net_device *dev)
+{
+ return br_port_fill_attrs(skb, br_port_get_rtnl(dev));
+}
+
+static size_t br_port_get_slave_size(const struct net_device *brdev,
+ const struct net_device *dev)
+{
+ return br_port_info_size();
+}
+
+static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
+ [IFLA_BR_FORWARD_DELAY] = { .type = NLA_U32 },
+ [IFLA_BR_HELLO_TIME] = { .type = NLA_U32 },
+ [IFLA_BR_MAX_AGE] = { .type = NLA_U32 },
+};
+
+static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
+ struct nlattr *data[])
+{
+ struct net_bridge *br = netdev_priv(brdev);
+ int err;
+
+ if (!data)
+ return 0;
+
+ if (data[IFLA_BR_FORWARD_DELAY]) {
+ err = br_set_forward_delay(br, nla_get_u32(data[IFLA_BR_FORWARD_DELAY]));
+ if (err)
+ return err;
+ }
+
+ if (data[IFLA_BR_HELLO_TIME]) {
+ err = br_set_hello_time(br, nla_get_u32(data[IFLA_BR_HELLO_TIME]));
+ if (err)
+ return err;
+ }
+
+ if (data[IFLA_BR_MAX_AGE]) {
+ err = br_set_max_age(br, nla_get_u32(data[IFLA_BR_MAX_AGE]));
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static size_t br_get_size(const struct net_device *brdev)
+{
+ return nla_total_size(sizeof(u32)) + /* IFLA_BR_FORWARD_DELAY */
+ nla_total_size(sizeof(u32)) + /* IFLA_BR_HELLO_TIME */
+ nla_total_size(sizeof(u32)) + /* IFLA_BR_MAX_AGE */
+ 0;
+}
+
+static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
+{
+ struct net_bridge *br = netdev_priv(brdev);
+ u32 forward_delay = jiffies_to_clock_t(br->forward_delay);
+ u32 hello_time = jiffies_to_clock_t(br->hello_time);
+ u32 age_time = jiffies_to_clock_t(br->max_age);
+
+ if (nla_put_u32(skb, IFLA_BR_FORWARD_DELAY, forward_delay) ||
+ nla_put_u32(skb, IFLA_BR_HELLO_TIME, hello_time) ||
+ nla_put_u32(skb, IFLA_BR_MAX_AGE, age_time))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
static size_t br_get_link_af_size(const struct net_device *dev)
{
struct net_port_vlans *pv;
@@ -486,12 +564,23 @@ static struct rtnl_af_ops br_af_ops = {
};
struct rtnl_link_ops br_link_ops __read_mostly = {
- .kind = "bridge",
- .priv_size = sizeof(struct net_bridge),
- .setup = br_dev_setup,
- .validate = br_validate,
- .newlink = br_dev_newlink,
- .dellink = br_dev_delete,
+ .kind = "bridge",
+ .priv_size = sizeof(struct net_bridge),
+ .setup = br_dev_setup,
+ .maxtype = IFLA_BRPORT_MAX,
+ .policy = br_policy,
+ .validate = br_validate,
+ .newlink = br_dev_newlink,
+ .changelink = br_changelink,
+ .dellink = br_dev_delete,
+ .get_size = br_get_size,
+ .fill_info = br_fill_info,
+
+ .slave_maxtype = IFLA_BRPORT_MAX,
+ .slave_policy = br_port_policy,
+ .slave_changelink = br_port_slave_changelink,
+ .get_slave_size = br_port_get_slave_size,
+ .fill_slave_info = br_port_fill_slave_info,
};
int __init br_netlink_init(void)
@@ -513,7 +602,7 @@ out_af:
return err;
}
-void __exit br_netlink_fini(void)
+void br_netlink_fini(void)
{
br_mdb_uninit();
rtnl_af_unregister(&br_af_ops);
diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c
new file mode 100644
index 000000000000..387cb3bd017c
--- /dev/null
+++ b/net/bridge/br_nf_core.c
@@ -0,0 +1,96 @@
+/*
+ * Handle firewalling core
+ * Linux ethernet bridge
+ *
+ * Authors:
+ * Lennert Buytenhek <buytenh@gnu.org>
+ * Bart De Schuymer <bdschuym@pandora.be>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Lennert dedicates this file to Kerstin Wurdinger.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/in_route.h>
+#include <linux/inetdevice.h>
+#include <net/route.h>
+
+#include "br_private.h"
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+
+static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu)
+{
+}
+
+static void fake_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb)
+{
+}
+
+static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old)
+{
+ return NULL;
+}
+
+static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst,
+ struct sk_buff *skb,
+ const void *daddr)
+{
+ return NULL;
+}
+
+static unsigned int fake_mtu(const struct dst_entry *dst)
+{
+ return dst->dev->mtu;
+}
+
+static struct dst_ops fake_dst_ops = {
+ .family = AF_INET,
+ .protocol = cpu_to_be16(ETH_P_IP),
+ .update_pmtu = fake_update_pmtu,
+ .redirect = fake_redirect,
+ .cow_metrics = fake_cow_metrics,
+ .neigh_lookup = fake_neigh_lookup,
+ .mtu = fake_mtu,
+};
+
+/*
+ * Initialize bogus route table used to keep netfilter happy.
+ * Currently, we fill in the PMTU entry because netfilter
+ * refragmentation needs it, and the rt_flags entry because
+ * ipt_REJECT needs it. Future netfilter modules might
+ * require us to fill additional fields.
+ */
+static const u32 br_dst_default_metrics[RTAX_MAX] = {
+ [RTAX_MTU - 1] = 1500,
+};
+
+void br_netfilter_rtable_init(struct net_bridge *br)
+{
+ struct rtable *rt = &br->fake_rtable;
+
+ atomic_set(&rt->dst.__refcnt, 1);
+ rt->dst.dev = br->dev;
+ rt->dst.path = &rt->dst;
+ dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
+ rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE;
+ rt->dst.ops = &fake_dst_ops;
+}
+
+int __init br_nf_core_init(void)
+{
+ return dst_entries_init(&fake_dst_ops);
+}
+
+void br_nf_core_fini(void)
+{
+ dst_entries_destroy(&fake_dst_ops);
+}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 23caf5b0309e..4d783d071305 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -221,7 +221,7 @@ struct net_bridge
struct pcpu_sw_netstats __percpu *stats;
spinlock_t hash_lock;
struct hlist_head hash[BR_HASH_SIZE];
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
struct rtable fake_rtable;
bool nf_call_iptables;
bool nf_call_ip6tables;
@@ -299,16 +299,24 @@ struct net_bridge
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
u8 vlan_enabled;
__be16 vlan_proto;
+ u16 default_pvid;
struct net_port_vlans __rcu *vlan_info;
#endif
};
struct br_input_skb_cb {
struct net_device *brdev;
+
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
int igmp;
int mrouters_only;
#endif
+
+ u16 frag_max_size;
+
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+ bool vlan_filtered;
+#endif
};
#define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb)
@@ -399,7 +407,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev,
const unsigned char *addr, u16 nlh_flags);
int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
- struct net_device *dev, int idx);
+ struct net_device *dev, struct net_device *fdev, int idx);
int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p);
void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p);
@@ -601,11 +609,13 @@ bool br_vlan_find(struct net_bridge *br, u16 vid);
void br_recalculate_fwd_mask(struct net_bridge *br);
int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
int br_vlan_set_proto(struct net_bridge *br, unsigned long val);
-void br_vlan_init(struct net_bridge *br);
+int br_vlan_init(struct net_bridge *br);
+int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val);
int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags);
int nbp_vlan_delete(struct net_bridge_port *port, u16 vid);
void nbp_vlan_flush(struct net_bridge_port *port);
bool nbp_vlan_find(struct net_bridge_port *port, u16 vid);
+int nbp_vlan_init(struct net_bridge_port *port);
static inline struct net_port_vlans *br_get_vlan_info(
const struct net_bridge *br)
@@ -638,11 +648,11 @@ static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid)
static inline u16 br_get_pvid(const struct net_port_vlans *v)
{
- /* Return just the VID if it is set, or VLAN_N_VID (invalid vid) if
- * vid wasn't set
- */
+ if (!v)
+ return 0;
+
smp_rmb();
- return v->pvid ?: VLAN_N_VID;
+ return v->pvid;
}
static inline int br_vlan_enabled(struct net_bridge *br)
@@ -701,8 +711,9 @@ static inline void br_recalculate_fwd_mask(struct net_bridge *br)
{
}
-static inline void br_vlan_init(struct net_bridge *br)
+static inline int br_vlan_init(struct net_bridge *br)
{
+ return 0;
}
static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
@@ -735,13 +746,18 @@ static inline bool nbp_vlan_find(struct net_bridge_port *port, u16 vid)
return false;
}
+static inline int nbp_vlan_init(struct net_bridge_port *port)
+{
+ return 0;
+}
+
static inline u16 br_vlan_get_tag(const struct sk_buff *skb, u16 *tag)
{
return 0;
}
static inline u16 br_get_pvid(const struct net_port_vlans *v)
{
- return VLAN_N_VID; /* Returns invalid vid */
+ return 0;
}
static inline int br_vlan_enabled(struct net_bridge *br)
@@ -751,18 +767,19 @@ static inline int br_vlan_enabled(struct net_bridge *br)
#endif
/* br_netfilter.c */
-#ifdef CONFIG_BRIDGE_NETFILTER
-int br_netfilter_init(void);
-void br_netfilter_fini(void);
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+int br_nf_core_init(void);
+void br_nf_core_fini(void);
void br_netfilter_rtable_init(struct net_bridge *);
#else
-#define br_netfilter_init() (0)
-#define br_netfilter_fini() do { } while (0)
+static inline int br_nf_core_init(void) { return 0; }
+static inline void br_nf_core_fini(void) {}
#define br_netfilter_rtable_init(x)
#endif
/* br_stp.c */
void br_log_state(const struct net_bridge_port *p);
+void br_set_state(struct net_bridge_port *p, unsigned int state);
struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no);
void br_init_port(struct net_bridge_port *p);
void br_become_designated_port(struct net_bridge_port *p);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 3c86f0538cbb..2b047bcf42a4 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -36,6 +36,11 @@ void br_log_state(const struct net_bridge_port *p)
br_port_state_names[p->state]);
}
+void br_set_state(struct net_bridge_port *p, unsigned int state)
+{
+ p->state = state;
+}
+
/* called under bridge lock */
struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no)
{
@@ -107,7 +112,7 @@ static void br_root_port_block(const struct net_bridge *br,
br_notice(br, "port %u(%s) tried to become root port (blocked)",
(unsigned int) p->port_no, p->dev->name);
- p->state = BR_STATE_LISTENING;
+ br_set_state(p, BR_STATE_LISTENING);
br_log_state(p);
br_ifinfo_notify(RTM_NEWLINK, p);
@@ -387,7 +392,7 @@ static void br_make_blocking(struct net_bridge_port *p)
p->state == BR_STATE_LEARNING)
br_topology_change_detection(p->br);
- p->state = BR_STATE_BLOCKING;
+ br_set_state(p, BR_STATE_BLOCKING);
br_log_state(p);
br_ifinfo_notify(RTM_NEWLINK, p);
@@ -404,13 +409,13 @@ static void br_make_forwarding(struct net_bridge_port *p)
return;
if (br->stp_enabled == BR_NO_STP || br->forward_delay == 0) {
- p->state = BR_STATE_FORWARDING;
+ br_set_state(p, BR_STATE_FORWARDING);
br_topology_change_detection(br);
del_timer(&p->forward_delay_timer);
} else if (br->stp_enabled == BR_KERNEL_STP)
- p->state = BR_STATE_LISTENING;
+ br_set_state(p, BR_STATE_LISTENING);
else
- p->state = BR_STATE_LEARNING;
+ br_set_state(p, BR_STATE_LEARNING);
br_multicast_enable_port(p);
br_log_state(p);
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 189ba1e7d851..41146872c1b4 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -37,7 +37,7 @@ void br_init_port(struct net_bridge_port *p)
{
p->port_id = br_make_port_id(p->priority, p->port_no);
br_become_designated_port(p);
- p->state = BR_STATE_BLOCKING;
+ br_set_state(p, BR_STATE_BLOCKING);
p->topology_change_ack = 0;
p->config_pending = 0;
}
@@ -100,7 +100,7 @@ void br_stp_disable_port(struct net_bridge_port *p)
wasroot = br_is_root_bridge(br);
br_become_designated_port(p);
- p->state = BR_STATE_DISABLED;
+ br_set_state(p, BR_STATE_DISABLED);
p->topology_change_ack = 0;
p->config_pending = 0;
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index 558c46d19e05..4fcaa67750fd 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -87,11 +87,11 @@ static void br_forward_delay_timer_expired(unsigned long arg)
(unsigned int) p->port_no, p->dev->name);
spin_lock(&br->lock);
if (p->state == BR_STATE_LISTENING) {
- p->state = BR_STATE_LEARNING;
+ br_set_state(p, BR_STATE_LEARNING);
mod_timer(&p->forward_delay_timer,
jiffies + br->forward_delay);
} else if (p->state == BR_STATE_LEARNING) {
- p->state = BR_STATE_FORWARDING;
+ br_set_state(p, BR_STATE_FORWARDING);
if (br_is_designated_for_some_port(br))
br_topology_change_detection(br);
netif_carrier_on(br->dev);
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index c9e2572b15f4..4c97fc50fb70 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -629,7 +629,7 @@ static ssize_t multicast_startup_query_interval_store(
}
static DEVICE_ATTR_RW(multicast_startup_query_interval);
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
static ssize_t nf_call_iptables_show(
struct device *d, struct device_attribute *attr, char *buf)
{
@@ -725,6 +725,22 @@ static ssize_t vlan_protocol_store(struct device *d,
return store_bridge_parm(d, buf, len, br_vlan_set_proto);
}
static DEVICE_ATTR_RW(vlan_protocol);
+
+static ssize_t default_pvid_show(struct device *d,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct net_bridge *br = to_bridge(d);
+ return sprintf(buf, "%d\n", br->default_pvid);
+}
+
+static ssize_t default_pvid_store(struct device *d,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ return store_bridge_parm(d, buf, len, br_vlan_set_default_pvid);
+}
+static DEVICE_ATTR_RW(default_pvid);
#endif
static struct attribute *bridge_attrs[] = {
@@ -763,7 +779,7 @@ static struct attribute *bridge_attrs[] = {
&dev_attr_multicast_query_response_interval.attr,
&dev_attr_multicast_startup_query_interval.attr,
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
&dev_attr_nf_call_iptables.attr,
&dev_attr_nf_call_ip6tables.attr,
&dev_attr_nf_call_arptables.attr,
@@ -771,6 +787,7 @@ static struct attribute *bridge_attrs[] = {
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
&dev_attr_vlan_filtering.attr,
&dev_attr_vlan_protocol.attr,
+ &dev_attr_default_pvid.attr,
#endif
NULL
};
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 2b2774fe0703..150048fb99b0 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -27,9 +27,13 @@ static void __vlan_add_flags(struct net_port_vlans *v, u16 vid, u16 flags)
{
if (flags & BRIDGE_VLAN_INFO_PVID)
__vlan_add_pvid(v, vid);
+ else
+ __vlan_delete_pvid(v, vid);
if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
set_bit(vid, v->untagged_bitmap);
+ else
+ clear_bit(vid, v->untagged_bitmap);
}
static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
@@ -55,10 +59,8 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
if (p) {
/* Add VLAN to the device filter if it is supported.
- * Stricly speaking, this is not necessary now, since
- * devices are made promiscuous by the bridge, but if
- * that ever changes this code will allow tagged
- * traffic to enter the bridge.
+ * This ensures tagged traffic enters the bridge when
+ * promiscuous mode is disabled by br_manage_promisc().
*/
err = vlan_vid_add(dev, br->vlan_proto, vid);
if (err)
@@ -127,7 +129,8 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
{
u16 vid;
- if (!br->vlan_enabled)
+ /* If this packet was not filtered at input, let it pass */
+ if (!BR_INPUT_SKB_CB(skb)->vlan_filtered)
goto out;
/* Vlan filter table must be configured at this point. The
@@ -166,8 +169,10 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
/* If VLAN filtering is disabled on the bridge, all packets are
* permitted.
*/
- if (!br->vlan_enabled)
+ if (!br->vlan_enabled) {
+ BR_INPUT_SKB_CB(skb)->vlan_filtered = false;
return true;
+ }
/* If there are no vlan in the permitted list, all packets are
* rejected.
@@ -175,6 +180,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
if (!v)
goto drop;
+ BR_INPUT_SKB_CB(skb)->vlan_filtered = true;
proto = br->vlan_proto;
/* If vlan tx offload is disabled on bridge device and frame was
@@ -183,7 +189,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
*/
if (unlikely(!vlan_tx_tag_present(skb) &&
skb->protocol == proto)) {
- skb = vlan_untag(skb);
+ skb = skb_vlan_untag(skb);
if (unlikely(!skb))
return false;
}
@@ -217,7 +223,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
* See if pvid is set on this port. That tells us which
* vlan untagged or priority-tagged traffic belongs to.
*/
- if (pvid == VLAN_N_VID)
+ if (!pvid)
goto drop;
/* PVID is set on this port. Any untagged or priority-tagged
@@ -253,7 +259,8 @@ bool br_allowed_egress(struct net_bridge *br,
{
u16 vid;
- if (!br->vlan_enabled)
+ /* If this packet was not filtered at input, let it pass */
+ if (!BR_INPUT_SKB_CB(skb)->vlan_filtered)
return true;
if (!v)
@@ -272,6 +279,7 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid)
struct net_bridge *br = p->br;
struct net_port_vlans *v;
+ /* If filtering was disabled at input, let it pass. */
if (!br->vlan_enabled)
return true;
@@ -284,7 +292,7 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid)
if (!*vid) {
*vid = br_get_pvid(v);
- if (*vid == VLAN_N_VID)
+ if (!*vid)
return false;
return true;
@@ -491,9 +499,141 @@ err_filt:
goto unlock;
}
-void br_vlan_init(struct net_bridge *br)
+static bool vlan_default_pvid(struct net_port_vlans *pv, u16 vid)
+{
+ return pv && vid == pv->pvid && test_bit(vid, pv->untagged_bitmap);
+}
+
+static void br_vlan_disable_default_pvid(struct net_bridge *br)
+{
+ struct net_bridge_port *p;
+ u16 pvid = br->default_pvid;
+
+ /* Disable default_pvid on all ports where it is still
+ * configured.
+ */
+ if (vlan_default_pvid(br_get_vlan_info(br), pvid))
+ br_vlan_delete(br, pvid);
+
+ list_for_each_entry(p, &br->port_list, list) {
+ if (vlan_default_pvid(nbp_get_vlan_info(p), pvid))
+ nbp_vlan_delete(p, pvid);
+ }
+
+ br->default_pvid = 0;
+}
+
+static int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
+{
+ struct net_bridge_port *p;
+ u16 old_pvid;
+ int err = 0;
+ unsigned long *changed;
+
+ changed = kcalloc(BITS_TO_LONGS(BR_MAX_PORTS), sizeof(unsigned long),
+ GFP_KERNEL);
+ if (!changed)
+ return -ENOMEM;
+
+ old_pvid = br->default_pvid;
+
+ /* Update default_pvid config only if we do not conflict with
+ * user configuration.
+ */
+ if ((!old_pvid || vlan_default_pvid(br_get_vlan_info(br), old_pvid)) &&
+ !br_vlan_find(br, pvid)) {
+ err = br_vlan_add(br, pvid,
+ BRIDGE_VLAN_INFO_PVID |
+ BRIDGE_VLAN_INFO_UNTAGGED);
+ if (err)
+ goto out;
+ br_vlan_delete(br, old_pvid);
+ set_bit(0, changed);
+ }
+
+ list_for_each_entry(p, &br->port_list, list) {
+ /* Update default_pvid config only if we do not conflict with
+ * user configuration.
+ */
+ if ((old_pvid &&
+ !vlan_default_pvid(nbp_get_vlan_info(p), old_pvid)) ||
+ nbp_vlan_find(p, pvid))
+ continue;
+
+ err = nbp_vlan_add(p, pvid,
+ BRIDGE_VLAN_INFO_PVID |
+ BRIDGE_VLAN_INFO_UNTAGGED);
+ if (err)
+ goto err_port;
+ nbp_vlan_delete(p, old_pvid);
+ set_bit(p->port_no, changed);
+ }
+
+ br->default_pvid = pvid;
+
+out:
+ kfree(changed);
+ return err;
+
+err_port:
+ list_for_each_entry_continue_reverse(p, &br->port_list, list) {
+ if (!test_bit(p->port_no, changed))
+ continue;
+
+ if (old_pvid)
+ nbp_vlan_add(p, old_pvid,
+ BRIDGE_VLAN_INFO_PVID |
+ BRIDGE_VLAN_INFO_UNTAGGED);
+ nbp_vlan_delete(p, pvid);
+ }
+
+ if (test_bit(0, changed)) {
+ if (old_pvid)
+ br_vlan_add(br, old_pvid,
+ BRIDGE_VLAN_INFO_PVID |
+ BRIDGE_VLAN_INFO_UNTAGGED);
+ br_vlan_delete(br, pvid);
+ }
+ goto out;
+}
+
+int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val)
+{
+ u16 pvid = val;
+ int err = 0;
+
+ if (val >= VLAN_VID_MASK)
+ return -EINVAL;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ if (pvid == br->default_pvid)
+ goto unlock;
+
+ /* Only allow default pvid change when filtering is disabled */
+ if (br->vlan_enabled) {
+ pr_info_once("Please disable vlan filtering to change default_pvid\n");
+ err = -EPERM;
+ goto unlock;
+ }
+
+ if (!pvid)
+ br_vlan_disable_default_pvid(br);
+ else
+ err = __br_vlan_set_default_pvid(br, pvid);
+
+unlock:
+ rtnl_unlock();
+ return err;
+}
+
+int br_vlan_init(struct net_bridge *br)
{
br->vlan_proto = htons(ETH_P_8021Q);
+ br->default_pvid = 1;
+ return br_vlan_add(br, 1,
+ BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED);
}
/* Must be protected by RTNL.
@@ -585,3 +725,12 @@ out:
rcu_read_unlock();
return found;
}
+
+int nbp_vlan_init(struct net_bridge_port *p)
+{
+ return p->br->default_pvid ?
+ nbp_vlan_add(p, p->br->default_pvid,
+ BRIDGE_VLAN_INFO_PVID |
+ BRIDGE_VLAN_INFO_UNTAGGED) :
+ 0;
+}
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 629dc77874a9..9cebf47ac840 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -14,6 +14,15 @@ config NFT_BRIDGE_META
help
Add support for bridge dedicated meta key.
+config NFT_BRIDGE_REJECT
+ tristate "Netfilter nf_tables bridge reject support"
+ depends on NFT_REJECT && NFT_REJECT_IPV4 && NFT_REJECT_IPV6
+ help
+ Add support to reject packets.
+
+config NF_LOG_BRIDGE
+ tristate "Bridge packet logging"
+
endif # NF_TABLES_BRIDGE
menuconfig BRIDGE_NF_EBTABLES
@@ -202,22 +211,6 @@ config BRIDGE_EBT_LOG
To compile it as a module, choose M here. If unsure, say N.
-config BRIDGE_EBT_ULOG
- tristate "ebt: ulog support (OBSOLETE)"
- help
- This option enables the old bridge-specific "ebt_ulog" implementation
- which has been obsoleted by the new "nfnetlink_log" code (see
- CONFIG_NETFILTER_NETLINK_LOG).
-
- This option adds the ulog watcher, that you can use in any rule
- in any ebtables table. The packet is passed to a userspace
- logging daemon using netlink multicast sockets. This differs
- from the log watcher in the sense that the complete packet is
- sent to userspace instead of a descriptive text and that
- netlink multicast sockets are used instead of the syslog.
-
- To compile it as a module, choose M here. If unsure, say N.
-
config BRIDGE_EBT_NFLOG
tristate "ebt: nflog support"
help
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index 6f2f3943d66f..be4d0cea78ce 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -4,6 +4,10 @@
obj-$(CONFIG_NF_TABLES_BRIDGE) += nf_tables_bridge.o
obj-$(CONFIG_NFT_BRIDGE_META) += nft_meta_bridge.o
+obj-$(CONFIG_NFT_BRIDGE_REJECT) += nft_reject_bridge.o
+
+# packet logging
+obj-$(CONFIG_NF_LOG_BRIDGE) += nf_log_bridge.o
obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o
@@ -33,5 +37,4 @@ obj-$(CONFIG_BRIDGE_EBT_SNAT) += ebt_snat.o
# watchers
obj-$(CONFIG_BRIDGE_EBT_LOG) += ebt_log.o
-obj-$(CONFIG_BRIDGE_EBT_ULOG) += ebt_ulog.o
obj-$(CONFIG_BRIDGE_EBT_NFLOG) += ebt_nflog.o
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 5322a36867a3..17f2e4bc2a29 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -186,6 +186,10 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par)
li.u.log.level = info->loglevel;
li.u.log.logflags = info->bitmask;
+ /* Remember that we have to use ebt_log_packet() not to break backward
+ * compatibility. We cannot use the default bridge packet logger via
+ * nf_log_packet() with NFT_LOG_TYPE_LOG here. --Pablo
+ */
if (info->bitmask & EBT_LOG_NFLOG)
nf_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb,
par->in, par->out, &li, "%s", info->prefix);
@@ -205,54 +209,13 @@ static struct xt_target ebt_log_tg_reg __read_mostly = {
.me = THIS_MODULE,
};
-static struct nf_logger ebt_log_logger __read_mostly = {
- .name = "ebt_log",
- .logfn = &ebt_log_packet,
- .me = THIS_MODULE,
-};
-
-static int __net_init ebt_log_net_init(struct net *net)
-{
- nf_log_set(net, NFPROTO_BRIDGE, &ebt_log_logger);
- return 0;
-}
-
-static void __net_exit ebt_log_net_fini(struct net *net)
-{
- nf_log_unset(net, &ebt_log_logger);
-}
-
-static struct pernet_operations ebt_log_net_ops = {
- .init = ebt_log_net_init,
- .exit = ebt_log_net_fini,
-};
-
static int __init ebt_log_init(void)
{
- int ret;
-
- ret = register_pernet_subsys(&ebt_log_net_ops);
- if (ret < 0)
- goto err_pernet;
-
- ret = xt_register_target(&ebt_log_tg_reg);
- if (ret < 0)
- goto err_target;
-
- nf_log_register(NFPROTO_BRIDGE, &ebt_log_logger);
-
- return ret;
-
-err_target:
- unregister_pernet_subsys(&ebt_log_net_ops);
-err_pernet:
- return ret;
+ return xt_register_target(&ebt_log_tg_reg);
}
static void __exit ebt_log_fini(void)
{
- unregister_pernet_subsys(&ebt_log_net_ops);
- nf_log_unregister(&ebt_log_logger);
xt_unregister_target(&ebt_log_tg_reg);
}
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
deleted file mode 100644
index 7c470c371e14..000000000000
--- a/net/bridge/netfilter/ebt_ulog.c
+++ /dev/null
@@ -1,393 +0,0 @@
-/*
- * netfilter module for userspace bridged Ethernet frames logging daemons
- *
- * Authors:
- * Bart De Schuymer <bdschuym@pandora.be>
- * Harald Welte <laforge@netfilter.org>
- *
- * November, 2004
- *
- * Based on ipt_ULOG.c, which is
- * (C) 2000-2002 by Harald Welte <laforge@netfilter.org>
- *
- * This module accepts two parameters:
- *
- * nlbufsiz:
- * The parameter specifies how big the buffer for each netlink multicast
- * group is. e.g. If you say nlbufsiz=8192, up to eight kb of packets will
- * get accumulated in the kernel until they are sent to userspace. It is
- * NOT possible to allocate more than 128kB, and it is strongly discouraged,
- * because atomically allocating 128kB inside the network rx softirq is not
- * reliable. Please also keep in mind that this buffer size is allocated for
- * each nlgroup you are using, so the total kernel memory usage increases
- * by that factor.
- *
- * flushtimeout:
- * Specify, after how many hundredths of a second the queue should be
- * flushed even if it is not full yet.
- *
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/socket.h>
-#include <linux/skbuff.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <net/netlink.h>
-#include <linux/netdevice.h>
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter_bridge/ebtables.h>
-#include <linux/netfilter_bridge/ebt_ulog.h>
-#include <net/netfilter/nf_log.h>
-#include <net/netns/generic.h>
-#include <net/sock.h>
-#include "../br_private.h"
-
-static unsigned int nlbufsiz = NLMSG_GOODSIZE;
-module_param(nlbufsiz, uint, 0600);
-MODULE_PARM_DESC(nlbufsiz, "netlink buffer size (number of bytes) "
- "(defaults to 4096)");
-
-static unsigned int flushtimeout = 10;
-module_param(flushtimeout, uint, 0600);
-MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths ofa second) "
- "(defaults to 10)");
-
-typedef struct {
- unsigned int qlen; /* number of nlmsgs' in the skb */
- struct nlmsghdr *lastnlh; /* netlink header of last msg in skb */
- struct sk_buff *skb; /* the pre-allocated skb */
- struct timer_list timer; /* the timer function */
- spinlock_t lock; /* the per-queue lock */
-} ebt_ulog_buff_t;
-
-static int ebt_ulog_net_id __read_mostly;
-struct ebt_ulog_net {
- unsigned int nlgroup[EBT_ULOG_MAXNLGROUPS];
- ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS];
- struct sock *ebtulognl;
-};
-
-static struct ebt_ulog_net *ebt_ulog_pernet(struct net *net)
-{
- return net_generic(net, ebt_ulog_net_id);
-}
-
-/* send one ulog_buff_t to userspace */
-static void ulog_send(struct ebt_ulog_net *ebt, unsigned int nlgroup)
-{
- ebt_ulog_buff_t *ub = &ebt->ulog_buffers[nlgroup];
-
- del_timer(&ub->timer);
-
- if (!ub->skb)
- return;
-
- /* last nlmsg needs NLMSG_DONE */
- if (ub->qlen > 1)
- ub->lastnlh->nlmsg_type = NLMSG_DONE;
-
- NETLINK_CB(ub->skb).dst_group = nlgroup + 1;
- netlink_broadcast(ebt->ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC);
-
- ub->qlen = 0;
- ub->skb = NULL;
-}
-
-/* timer function to flush queue in flushtimeout time */
-static void ulog_timer(unsigned long data)
-{
- struct ebt_ulog_net *ebt = container_of((void *)data,
- struct ebt_ulog_net,
- nlgroup[*(unsigned int *)data]);
-
- ebt_ulog_buff_t *ub = &ebt->ulog_buffers[*(unsigned int *)data];
- spin_lock_bh(&ub->lock);
- if (ub->skb)
- ulog_send(ebt, *(unsigned int *)data);
- spin_unlock_bh(&ub->lock);
-}
-
-static struct sk_buff *ulog_alloc_skb(unsigned int size)
-{
- struct sk_buff *skb;
- unsigned int n;
-
- n = max(size, nlbufsiz);
- skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN);
- if (!skb) {
- if (n > size) {
- /* try to allocate only as much as we need for
- * current packet */
- skb = alloc_skb(size, GFP_ATOMIC);
- if (!skb)
- pr_debug("cannot even allocate buffer of size %ub\n",
- size);
- }
- }
-
- return skb;
-}
-
-static void ebt_ulog_packet(struct net *net, unsigned int hooknr,
- const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const struct ebt_ulog_info *uloginfo,
- const char *prefix)
-{
- ebt_ulog_packet_msg_t *pm;
- size_t size, copy_len;
- struct nlmsghdr *nlh;
- struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
- unsigned int group = uloginfo->nlgroup;
- ebt_ulog_buff_t *ub = &ebt->ulog_buffers[group];
- spinlock_t *lock = &ub->lock;
- ktime_t kt;
-
- if ((uloginfo->cprange == 0) ||
- (uloginfo->cprange > skb->len + ETH_HLEN))
- copy_len = skb->len + ETH_HLEN;
- else
- copy_len = uloginfo->cprange;
-
- size = nlmsg_total_size(sizeof(*pm) + copy_len);
- if (size > nlbufsiz) {
- pr_debug("Size %Zd needed, but nlbufsiz=%d\n", size, nlbufsiz);
- return;
- }
-
- spin_lock_bh(lock);
-
- if (!ub->skb) {
- if (!(ub->skb = ulog_alloc_skb(size)))
- goto unlock;
- } else if (size > skb_tailroom(ub->skb)) {
- ulog_send(ebt, group);
-
- if (!(ub->skb = ulog_alloc_skb(size)))
- goto unlock;
- }
-
- nlh = nlmsg_put(ub->skb, 0, ub->qlen, 0,
- size - NLMSG_ALIGN(sizeof(*nlh)), 0);
- if (!nlh) {
- kfree_skb(ub->skb);
- ub->skb = NULL;
- goto unlock;
- }
- ub->qlen++;
-
- pm = nlmsg_data(nlh);
- memset(pm, 0, sizeof(*pm));
-
- /* Fill in the ulog data */
- pm->version = EBT_ULOG_VERSION;
- kt = ktime_get_real();
- pm->stamp = ktime_to_timeval(kt);
- if (ub->qlen == 1)
- ub->skb->tstamp = kt;
- pm->data_len = copy_len;
- pm->mark = skb->mark;
- pm->hook = hooknr;
- if (uloginfo->prefix != NULL)
- strcpy(pm->prefix, uloginfo->prefix);
-
- if (in) {
- strcpy(pm->physindev, in->name);
- /* If in isn't a bridge, then physindev==indev */
- if (br_port_exists(in))
- /* rcu_read_lock()ed by nf_hook_slow */
- strcpy(pm->indev, br_port_get_rcu(in)->br->dev->name);
- else
- strcpy(pm->indev, in->name);
- }
-
- if (out) {
- /* If out exists, then out is a bridge port */
- strcpy(pm->physoutdev, out->name);
- /* rcu_read_lock()ed by nf_hook_slow */
- strcpy(pm->outdev, br_port_get_rcu(out)->br->dev->name);
- }
-
- if (skb_copy_bits(skb, -ETH_HLEN, pm->data, copy_len) < 0)
- BUG();
-
- if (ub->qlen > 1)
- ub->lastnlh->nlmsg_flags |= NLM_F_MULTI;
-
- ub->lastnlh = nlh;
-
- if (ub->qlen >= uloginfo->qthreshold)
- ulog_send(ebt, group);
- else if (!timer_pending(&ub->timer)) {
- ub->timer.expires = jiffies + flushtimeout * HZ / 100;
- add_timer(&ub->timer);
- }
-
-unlock:
- spin_unlock_bh(lock);
-}
-
-/* this function is registered with the netfilter core */
-static void ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
- const struct sk_buff *skb, const struct net_device *in,
- const struct net_device *out, const struct nf_loginfo *li,
- const char *prefix)
-{
- struct ebt_ulog_info loginfo;
-
- if (!li || li->type != NF_LOG_TYPE_ULOG) {
- loginfo.nlgroup = EBT_ULOG_DEFAULT_NLGROUP;
- loginfo.cprange = 0;
- loginfo.qthreshold = EBT_ULOG_DEFAULT_QTHRESHOLD;
- loginfo.prefix[0] = '\0';
- } else {
- loginfo.nlgroup = li->u.ulog.group;
- loginfo.cprange = li->u.ulog.copy_len;
- loginfo.qthreshold = li->u.ulog.qthreshold;
- strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix));
- }
-
- ebt_ulog_packet(net, hooknum, skb, in, out, &loginfo, prefix);
-}
-
-static unsigned int
-ebt_ulog_tg(struct sk_buff *skb, const struct xt_action_param *par)
-{
- struct net *net = dev_net(par->in ? par->in : par->out);
-
- ebt_ulog_packet(net, par->hooknum, skb, par->in, par->out,
- par->targinfo, NULL);
- return EBT_CONTINUE;
-}
-
-static int ebt_ulog_tg_check(const struct xt_tgchk_param *par)
-{
- struct ebt_ulog_info *uloginfo = par->targinfo;
-
- if (!par->net->xt.ebt_ulog_warn_deprecated) {
- pr_info("ebt_ulog is deprecated and it will be removed soon, "
- "use ebt_nflog instead\n");
- par->net->xt.ebt_ulog_warn_deprecated = true;
- }
-
- if (uloginfo->nlgroup > 31)
- return -EINVAL;
-
- uloginfo->prefix[EBT_ULOG_PREFIX_LEN - 1] = '\0';
-
- if (uloginfo->qthreshold > EBT_ULOG_MAX_QLEN)
- uloginfo->qthreshold = EBT_ULOG_MAX_QLEN;
-
- return 0;
-}
-
-static struct xt_target ebt_ulog_tg_reg __read_mostly = {
- .name = "ulog",
- .revision = 0,
- .family = NFPROTO_BRIDGE,
- .target = ebt_ulog_tg,
- .checkentry = ebt_ulog_tg_check,
- .targetsize = sizeof(struct ebt_ulog_info),
- .me = THIS_MODULE,
-};
-
-static struct nf_logger ebt_ulog_logger __read_mostly = {
- .name = "ebt_ulog",
- .logfn = &ebt_log_packet,
- .me = THIS_MODULE,
-};
-
-static int __net_init ebt_ulog_net_init(struct net *net)
-{
- int i;
- struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
-
- struct netlink_kernel_cfg cfg = {
- .groups = EBT_ULOG_MAXNLGROUPS,
- };
-
- /* initialize ulog_buffers */
- for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
- ebt->nlgroup[i] = i;
- setup_timer(&ebt->ulog_buffers[i].timer, ulog_timer,
- (unsigned long)&ebt->nlgroup[i]);
- spin_lock_init(&ebt->ulog_buffers[i].lock);
- }
-
- ebt->ebtulognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg);
- if (!ebt->ebtulognl)
- return -ENOMEM;
-
- nf_log_set(net, NFPROTO_BRIDGE, &ebt_ulog_logger);
- return 0;
-}
-
-static void __net_exit ebt_ulog_net_fini(struct net *net)
-{
- int i;
- struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
-
- nf_log_unset(net, &ebt_ulog_logger);
- for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
- ebt_ulog_buff_t *ub = &ebt->ulog_buffers[i];
- del_timer(&ub->timer);
-
- if (ub->skb) {
- kfree_skb(ub->skb);
- ub->skb = NULL;
- }
- }
- netlink_kernel_release(ebt->ebtulognl);
-}
-
-static struct pernet_operations ebt_ulog_net_ops = {
- .init = ebt_ulog_net_init,
- .exit = ebt_ulog_net_fini,
- .id = &ebt_ulog_net_id,
- .size = sizeof(struct ebt_ulog_net),
-};
-
-static int __init ebt_ulog_init(void)
-{
- int ret;
-
- if (nlbufsiz >= 128*1024) {
- pr_warn("Netlink buffer has to be <= 128kB,"
- "please try a smaller nlbufsiz parameter.\n");
- return -EINVAL;
- }
-
- ret = register_pernet_subsys(&ebt_ulog_net_ops);
- if (ret)
- goto out_pernet;
-
- ret = xt_register_target(&ebt_ulog_tg_reg);
- if (ret)
- goto out_target;
-
- nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger);
-
- return 0;
-
-out_target:
- unregister_pernet_subsys(&ebt_ulog_net_ops);
-out_pernet:
- return ret;
-}
-
-static void __exit ebt_ulog_fini(void)
-{
- nf_log_unregister(&ebt_ulog_logger);
- xt_unregister_target(&ebt_ulog_tg_reg);
- unregister_pernet_subsys(&ebt_ulog_net_ops);
-}
-
-module_init(ebt_ulog_init);
-module_exit(ebt_ulog_fini);
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
-MODULE_DESCRIPTION("Ebtables: Packet logging to netlink using ULOG");
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 1059ed3bc255..d9a8c05d995d 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -26,6 +26,7 @@
#include <asm/uaccess.h>
#include <linux/smp.h>
#include <linux/cpumask.h>
+#include <linux/audit.h>
#include <net/sock.h>
/* needed for logical [in,out]-dev filtering */
#include "../br_private.h"
@@ -327,10 +328,7 @@ find_inlist_lock_noload(struct list_head *head, const char *name, int *error,
char name[EBT_FUNCTION_MAXNAMELEN];
} *e;
- *error = mutex_lock_interruptible(mutex);
- if (*error != 0)
- return NULL;
-
+ mutex_lock(mutex);
list_for_each_entry(e, head, list) {
if (strcmp(e->name, name) == 0)
return e;
@@ -1061,6 +1059,20 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
vfree(table);
vfree(counterstmp);
+
+#ifdef CONFIG_AUDIT
+ if (audit_enabled) {
+ struct audit_buffer *ab;
+
+ ab = audit_log_start(current->audit_context, GFP_KERNEL,
+ AUDIT_NETFILTER_CFG);
+ if (ab) {
+ audit_log_format(ab, "table=%s family=%u entries=%u",
+ repl->name, AF_BRIDGE, repl->nentries);
+ audit_log_end(ab);
+ }
+ }
+#endif
return ret;
free_unlock:
@@ -1203,10 +1215,7 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table)
table->private = newinfo;
rwlock_init(&table->lock);
- ret = mutex_lock_interruptible(&ebt_mutex);
- if (ret != 0)
- goto free_chainstack;
-
+ mutex_lock(&ebt_mutex);
list_for_each_entry(t, &net->xt.tables[NFPROTO_BRIDGE], list) {
if (strcmp(t->name, table->name) == 0) {
ret = -EEXIST;
diff --git a/net/bridge/netfilter/nf_log_bridge.c b/net/bridge/netfilter/nf_log_bridge.c
new file mode 100644
index 000000000000..5d9953a90929
--- /dev/null
+++ b/net/bridge/netfilter/nf_log_bridge.c
@@ -0,0 +1,96 @@
+/*
+ * (C) 2014 by Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/if_bridge.h>
+#include <linux/ip.h>
+#include <net/route.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_log.h>
+
+static void nf_log_bridge_packet(struct net *net, u_int8_t pf,
+ unsigned int hooknum,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct nf_loginfo *loginfo,
+ const char *prefix)
+{
+ switch (eth_hdr(skb)->h_proto) {
+ case htons(ETH_P_IP):
+ nf_log_packet(net, NFPROTO_IPV4, hooknum, skb, in, out,
+ loginfo, "%s", prefix);
+ break;
+ case htons(ETH_P_IPV6):
+ nf_log_packet(net, NFPROTO_IPV6, hooknum, skb, in, out,
+ loginfo, "%s", prefix);
+ break;
+ case htons(ETH_P_ARP):
+ case htons(ETH_P_RARP):
+ nf_log_packet(net, NFPROTO_ARP, hooknum, skb, in, out,
+ loginfo, "%s", prefix);
+ break;
+ }
+}
+
+static struct nf_logger nf_bridge_logger __read_mostly = {
+ .name = "nf_log_bridge",
+ .type = NF_LOG_TYPE_LOG,
+ .logfn = nf_log_bridge_packet,
+ .me = THIS_MODULE,
+};
+
+static int __net_init nf_log_bridge_net_init(struct net *net)
+{
+ nf_log_set(net, NFPROTO_BRIDGE, &nf_bridge_logger);
+ return 0;
+}
+
+static void __net_exit nf_log_bridge_net_exit(struct net *net)
+{
+ nf_log_unset(net, &nf_bridge_logger);
+}
+
+static struct pernet_operations nf_log_bridge_net_ops = {
+ .init = nf_log_bridge_net_init,
+ .exit = nf_log_bridge_net_exit,
+};
+
+static int __init nf_log_bridge_init(void)
+{
+ int ret;
+
+ /* Request to load the real packet loggers. */
+ nf_logger_request_module(NFPROTO_IPV4, NF_LOG_TYPE_LOG);
+ nf_logger_request_module(NFPROTO_IPV6, NF_LOG_TYPE_LOG);
+ nf_logger_request_module(NFPROTO_ARP, NF_LOG_TYPE_LOG);
+
+ ret = register_pernet_subsys(&nf_log_bridge_net_ops);
+ if (ret < 0)
+ return ret;
+
+ nf_log_register(NFPROTO_BRIDGE, &nf_bridge_logger);
+ return 0;
+}
+
+static void __exit nf_log_bridge_exit(void)
+{
+ unregister_pernet_subsys(&nf_log_bridge_net_ops);
+ nf_log_unregister(&nf_bridge_logger);
+}
+
+module_init(nf_log_bridge_init);
+module_exit(nf_log_bridge_exit);
+
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("Netfilter bridge packet logging");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 0);
diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c
index 5bcc0d8b31f2..da17a5eab8b4 100644
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -34,9 +34,11 @@ static struct nft_af_info nft_af_bridge __read_mostly = {
.owner = THIS_MODULE,
.nops = 1,
.hooks = {
+ [NF_BR_PRE_ROUTING] = nft_do_chain_bridge,
[NF_BR_LOCAL_IN] = nft_do_chain_bridge,
[NF_BR_FORWARD] = nft_do_chain_bridge,
[NF_BR_LOCAL_OUT] = nft_do_chain_bridge,
+ [NF_BR_POST_ROUTING] = nft_do_chain_bridge,
},
};
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
new file mode 100644
index 000000000000..a76479535df2
--- /dev/null
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2014 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_reject.h>
+#include <net/netfilter/ipv4/nf_reject.h>
+#include <net/netfilter/ipv6/nf_reject.h>
+
+static void nft_reject_bridge_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_reject *priv = nft_expr_priv(expr);
+ struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
+
+ switch (eth_hdr(pkt->skb)->h_proto) {
+ case htons(ETH_P_IP):
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ nf_send_unreach(pkt->skb, priv->icmp_code);
+ break;
+ case NFT_REJECT_TCP_RST:
+ nf_send_reset(pkt->skb, pkt->ops->hooknum);
+ break;
+ case NFT_REJECT_ICMPX_UNREACH:
+ nf_send_unreach(pkt->skb,
+ nft_reject_icmp_code(priv->icmp_code));
+ break;
+ }
+ break;
+ case htons(ETH_P_IPV6):
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ nf_send_unreach6(net, pkt->skb, priv->icmp_code,
+ pkt->ops->hooknum);
+ break;
+ case NFT_REJECT_TCP_RST:
+ nf_send_reset6(net, pkt->skb, pkt->ops->hooknum);
+ break;
+ case NFT_REJECT_ICMPX_UNREACH:
+ nf_send_unreach6(net, pkt->skb,
+ nft_reject_icmpv6_code(priv->icmp_code),
+ pkt->ops->hooknum);
+ break;
+ }
+ break;
+ default:
+ /* No explicit way to reject this protocol, drop it. */
+ break;
+ }
+ data[NFT_REG_VERDICT].verdict = NF_DROP;
+}
+
+static int nft_reject_bridge_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_reject *priv = nft_expr_priv(expr);
+ int icmp_code;
+
+ if (tb[NFTA_REJECT_TYPE] == NULL)
+ return -EINVAL;
+
+ priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE]));
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ case NFT_REJECT_ICMPX_UNREACH:
+ if (tb[NFTA_REJECT_ICMP_CODE] == NULL)
+ return -EINVAL;
+
+ icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]);
+ if (priv->type == NFT_REJECT_ICMPX_UNREACH &&
+ icmp_code > NFT_REJECT_ICMPX_MAX)
+ return -EINVAL;
+
+ priv->icmp_code = icmp_code;
+ break;
+ case NFT_REJECT_TCP_RST:
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int nft_reject_bridge_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
+{
+ const struct nft_reject *priv = nft_expr_priv(expr);
+
+ if (nla_put_be32(skb, NFTA_REJECT_TYPE, htonl(priv->type)))
+ goto nla_put_failure;
+
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ case NFT_REJECT_ICMPX_UNREACH:
+ if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code))
+ goto nla_put_failure;
+ break;
+ }
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_reject_bridge_type;
+static const struct nft_expr_ops nft_reject_bridge_ops = {
+ .type = &nft_reject_bridge_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)),
+ .eval = nft_reject_bridge_eval,
+ .init = nft_reject_bridge_init,
+ .dump = nft_reject_bridge_dump,
+};
+
+static struct nft_expr_type nft_reject_bridge_type __read_mostly = {
+ .family = NFPROTO_BRIDGE,
+ .name = "reject",
+ .ops = &nft_reject_bridge_ops,
+ .policy = nft_reject_policy,
+ .maxattr = NFTA_REJECT_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_reject_bridge_module_init(void)
+{
+ return nft_register_expr(&nft_reject_bridge_type);
+}
+
+static void __exit nft_reject_bridge_module_exit(void)
+{
+ nft_unregister_expr(&nft_reject_bridge_type);
+}
+
+module_init(nft_reject_bridge_module_init);
+module_exit(nft_reject_bridge_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_BRIDGE, "reject");
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index e8437094d15f..43f750e88e19 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -908,8 +908,7 @@ static int caif_release(struct socket *sock)
sock->sk = NULL;
WARN_ON(IS_ERR(cf_sk->debugfs_socket_dir));
- if (cf_sk->debugfs_socket_dir != NULL)
- debugfs_remove_recursive(cf_sk->debugfs_socket_dir);
+ debugfs_remove_recursive(cf_sk->debugfs_socket_dir);
lock_sock(&(cf_sk->sk));
sk->sk_state = CAIF_DISCONNECTED;
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 0f455227da83..f5afda1abc76 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -547,7 +547,6 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
default:
pr_err("Unrecognized Control Frame\n");
goto error;
- break;
}
ret = 0;
error:
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 96238ba95f2b..de6662b14e1f 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -13,8 +13,6 @@
#include "auth_x.h"
#include "auth_x_protocol.h"
-#define TEMP_TICKET_BUF_LEN 256
-
static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed);
static int ceph_x_is_authenticated(struct ceph_auth_client *ac)
@@ -64,7 +62,7 @@ static int ceph_x_encrypt(struct ceph_crypto_key *secret,
}
static int ceph_x_decrypt(struct ceph_crypto_key *secret,
- void **p, void *end, void *obuf, size_t olen)
+ void **p, void *end, void **obuf, size_t olen)
{
struct ceph_x_encrypt_header head;
size_t head_len = sizeof(head);
@@ -75,8 +73,14 @@ static int ceph_x_decrypt(struct ceph_crypto_key *secret,
return -EINVAL;
dout("ceph_x_decrypt len %d\n", len);
- ret = ceph_decrypt2(secret, &head, &head_len, obuf, &olen,
- *p, len);
+ if (*obuf == NULL) {
+ *obuf = kmalloc(len, GFP_NOFS);
+ if (!*obuf)
+ return -ENOMEM;
+ olen = len;
+ }
+
+ ret = ceph_decrypt2(secret, &head, &head_len, *obuf, &olen, *p, len);
if (ret)
return ret;
if (head.struct_v != 1 || le64_to_cpu(head.magic) != CEPHX_ENC_MAGIC)
@@ -129,139 +133,120 @@ static void remove_ticket_handler(struct ceph_auth_client *ac,
kfree(th);
}
-static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
- struct ceph_crypto_key *secret,
- void *buf, void *end)
+static int process_one_ticket(struct ceph_auth_client *ac,
+ struct ceph_crypto_key *secret,
+ void **p, void *end)
{
struct ceph_x_info *xi = ac->private;
- int num;
- void *p = buf;
+ int type;
+ u8 tkt_struct_v, blob_struct_v;
+ struct ceph_x_ticket_handler *th;
+ void *dbuf = NULL;
+ void *dp, *dend;
+ int dlen;
+ char is_enc;
+ struct timespec validity;
+ struct ceph_crypto_key old_key;
+ void *ticket_buf = NULL;
+ void *tp, *tpend;
+ struct ceph_timespec new_validity;
+ struct ceph_crypto_key new_session_key;
+ struct ceph_buffer *new_ticket_blob;
+ unsigned long new_expires, new_renew_after;
+ u64 new_secret_id;
int ret;
- char *dbuf;
- char *ticket_buf;
- u8 reply_struct_v;
- dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
- if (!dbuf)
- return -ENOMEM;
+ ceph_decode_need(p, end, sizeof(u32) + 1, bad);
- ret = -ENOMEM;
- ticket_buf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
- if (!ticket_buf)
- goto out_dbuf;
+ type = ceph_decode_32(p);
+ dout(" ticket type %d %s\n", type, ceph_entity_type_name(type));
- ceph_decode_need(&p, end, 1 + sizeof(u32), bad);
- reply_struct_v = ceph_decode_8(&p);
- if (reply_struct_v != 1)
+ tkt_struct_v = ceph_decode_8(p);
+ if (tkt_struct_v != 1)
goto bad;
- num = ceph_decode_32(&p);
- dout("%d tickets\n", num);
- while (num--) {
- int type;
- u8 tkt_struct_v, blob_struct_v;
- struct ceph_x_ticket_handler *th;
- void *dp, *dend;
- int dlen;
- char is_enc;
- struct timespec validity;
- struct ceph_crypto_key old_key;
- void *tp, *tpend;
- struct ceph_timespec new_validity;
- struct ceph_crypto_key new_session_key;
- struct ceph_buffer *new_ticket_blob;
- unsigned long new_expires, new_renew_after;
- u64 new_secret_id;
-
- ceph_decode_need(&p, end, sizeof(u32) + 1, bad);
-
- type = ceph_decode_32(&p);
- dout(" ticket type %d %s\n", type, ceph_entity_type_name(type));
-
- tkt_struct_v = ceph_decode_8(&p);
- if (tkt_struct_v != 1)
- goto bad;
-
- th = get_ticket_handler(ac, type);
- if (IS_ERR(th)) {
- ret = PTR_ERR(th);
- goto out;
- }
- /* blob for me */
- dlen = ceph_x_decrypt(secret, &p, end, dbuf,
- TEMP_TICKET_BUF_LEN);
- if (dlen <= 0) {
- ret = dlen;
- goto out;
- }
- dout(" decrypted %d bytes\n", dlen);
- dend = dbuf + dlen;
- dp = dbuf;
+ th = get_ticket_handler(ac, type);
+ if (IS_ERR(th)) {
+ ret = PTR_ERR(th);
+ goto out;
+ }
- tkt_struct_v = ceph_decode_8(&dp);
- if (tkt_struct_v != 1)
- goto bad;
+ /* blob for me */
+ dlen = ceph_x_decrypt(secret, p, end, &dbuf, 0);
+ if (dlen <= 0) {
+ ret = dlen;
+ goto out;
+ }
+ dout(" decrypted %d bytes\n", dlen);
+ dp = dbuf;
+ dend = dp + dlen;
- memcpy(&old_key, &th->session_key, sizeof(old_key));
- ret = ceph_crypto_key_decode(&new_session_key, &dp, dend);
- if (ret)
- goto out;
+ tkt_struct_v = ceph_decode_8(&dp);
+ if (tkt_struct_v != 1)
+ goto bad;
- ceph_decode_copy(&dp, &new_validity, sizeof(new_validity));
- ceph_decode_timespec(&validity, &new_validity);
- new_expires = get_seconds() + validity.tv_sec;
- new_renew_after = new_expires - (validity.tv_sec / 4);
- dout(" expires=%lu renew_after=%lu\n", new_expires,
- new_renew_after);
+ memcpy(&old_key, &th->session_key, sizeof(old_key));
+ ret = ceph_crypto_key_decode(&new_session_key, &dp, dend);
+ if (ret)
+ goto out;
- /* ticket blob for service */
- ceph_decode_8_safe(&p, end, is_enc, bad);
- tp = ticket_buf;
- if (is_enc) {
- /* encrypted */
- dout(" encrypted ticket\n");
- dlen = ceph_x_decrypt(&old_key, &p, end, ticket_buf,
- TEMP_TICKET_BUF_LEN);
- if (dlen < 0) {
- ret = dlen;
- goto out;
- }
- dlen = ceph_decode_32(&tp);
- } else {
- /* unencrypted */
- ceph_decode_32_safe(&p, end, dlen, bad);
- ceph_decode_need(&p, end, dlen, bad);
- ceph_decode_copy(&p, ticket_buf, dlen);
+ ceph_decode_copy(&dp, &new_validity, sizeof(new_validity));
+ ceph_decode_timespec(&validity, &new_validity);
+ new_expires = get_seconds() + validity.tv_sec;
+ new_renew_after = new_expires - (validity.tv_sec / 4);
+ dout(" expires=%lu renew_after=%lu\n", new_expires,
+ new_renew_after);
+
+ /* ticket blob for service */
+ ceph_decode_8_safe(p, end, is_enc, bad);
+ if (is_enc) {
+ /* encrypted */
+ dout(" encrypted ticket\n");
+ dlen = ceph_x_decrypt(&old_key, p, end, &ticket_buf, 0);
+ if (dlen < 0) {
+ ret = dlen;
+ goto out;
}
- tpend = tp + dlen;
- dout(" ticket blob is %d bytes\n", dlen);
- ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad);
- blob_struct_v = ceph_decode_8(&tp);
- new_secret_id = ceph_decode_64(&tp);
- ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend);
- if (ret)
+ tp = ticket_buf;
+ dlen = ceph_decode_32(&tp);
+ } else {
+ /* unencrypted */
+ ceph_decode_32_safe(p, end, dlen, bad);
+ ticket_buf = kmalloc(dlen, GFP_NOFS);
+ if (!ticket_buf) {
+ ret = -ENOMEM;
goto out;
-
- /* all is well, update our ticket */
- ceph_crypto_key_destroy(&th->session_key);
- if (th->ticket_blob)
- ceph_buffer_put(th->ticket_blob);
- th->session_key = new_session_key;
- th->ticket_blob = new_ticket_blob;
- th->validity = new_validity;
- th->secret_id = new_secret_id;
- th->expires = new_expires;
- th->renew_after = new_renew_after;
- dout(" got ticket service %d (%s) secret_id %lld len %d\n",
- type, ceph_entity_type_name(type), th->secret_id,
- (int)th->ticket_blob->vec.iov_len);
- xi->have_keys |= th->service;
+ }
+ tp = ticket_buf;
+ ceph_decode_need(p, end, dlen, bad);
+ ceph_decode_copy(p, ticket_buf, dlen);
}
+ tpend = tp + dlen;
+ dout(" ticket blob is %d bytes\n", dlen);
+ ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad);
+ blob_struct_v = ceph_decode_8(&tp);
+ new_secret_id = ceph_decode_64(&tp);
+ ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend);
+ if (ret)
+ goto out;
+
+ /* all is well, update our ticket */
+ ceph_crypto_key_destroy(&th->session_key);
+ if (th->ticket_blob)
+ ceph_buffer_put(th->ticket_blob);
+ th->session_key = new_session_key;
+ th->ticket_blob = new_ticket_blob;
+ th->validity = new_validity;
+ th->secret_id = new_secret_id;
+ th->expires = new_expires;
+ th->renew_after = new_renew_after;
+ dout(" got ticket service %d (%s) secret_id %lld len %d\n",
+ type, ceph_entity_type_name(type), th->secret_id,
+ (int)th->ticket_blob->vec.iov_len);
+ xi->have_keys |= th->service;
- ret = 0;
out:
kfree(ticket_buf);
-out_dbuf:
kfree(dbuf);
return ret;
@@ -270,6 +255,34 @@ bad:
goto out;
}
+static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
+ struct ceph_crypto_key *secret,
+ void *buf, void *end)
+{
+ void *p = buf;
+ u8 reply_struct_v;
+ u32 num;
+ int ret;
+
+ ceph_decode_8_safe(&p, end, reply_struct_v, bad);
+ if (reply_struct_v != 1)
+ return -EINVAL;
+
+ ceph_decode_32_safe(&p, end, num, bad);
+ dout("%d tickets\n", num);
+
+ while (num--) {
+ ret = process_one_ticket(ac, secret, &p, end);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+
+bad:
+ return -EINVAL;
+}
+
static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
struct ceph_x_ticket_handler *th,
struct ceph_x_authorizer *au)
@@ -583,13 +596,14 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_x_ticket_handler *th;
int ret = 0;
struct ceph_x_authorize_reply reply;
+ void *preply = &reply;
void *p = au->reply_buf;
void *end = p + sizeof(au->reply_buf);
th = get_ticket_handler(ac, au->service);
if (IS_ERR(th))
return PTR_ERR(th);
- ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply));
+ ret = ceph_x_decrypt(&th->session_key, &p, end, &preply, sizeof(reply));
if (ret < 0)
return ret;
if (ret != sizeof(reply))
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 1948d592aa54..b2f571dd933d 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -174,6 +174,7 @@ static struct lock_class_key socket_class;
#define SKIP_BUF_SIZE 1024
static void queue_con(struct ceph_connection *con);
+static void cancel_con(struct ceph_connection *con);
static void con_work(struct work_struct *);
static void con_fault(struct ceph_connection *con);
@@ -680,7 +681,7 @@ void ceph_con_close(struct ceph_connection *con)
reset_connection(con);
con->peer_global_seq = 0;
- cancel_delayed_work(&con->work);
+ cancel_con(con);
con_close_socket(con);
mutex_unlock(&con->mutex);
}
@@ -900,7 +901,7 @@ static void ceph_msg_data_pages_cursor_init(struct ceph_msg_data_cursor *cursor,
BUG_ON(page_count > (int)USHRT_MAX);
cursor->page_count = (unsigned short)page_count;
BUG_ON(length > SIZE_MAX - cursor->page_offset);
- cursor->last_piece = (size_t)cursor->page_offset + length <= PAGE_SIZE;
+ cursor->last_piece = cursor->page_offset + cursor->resid <= PAGE_SIZE;
}
static struct page *
@@ -2667,19 +2668,16 @@ static int queue_con_delay(struct ceph_connection *con, unsigned long delay)
{
if (!con->ops->get(con)) {
dout("%s %p ref count 0\n", __func__, con);
-
return -ENOENT;
}
if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) {
dout("%s %p - already queued\n", __func__, con);
con->ops->put(con);
-
return -EBUSY;
}
dout("%s %p %lu\n", __func__, con, delay);
-
return 0;
}
@@ -2688,6 +2686,14 @@ static void queue_con(struct ceph_connection *con)
(void) queue_con_delay(con, 0);
}
+static void cancel_con(struct ceph_connection *con)
+{
+ if (cancel_delayed_work(&con->work)) {
+ dout("%s %p\n", __func__, con);
+ con->ops->put(con);
+ }
+}
+
static bool con_sock_closed(struct ceph_connection *con)
{
if (!con_flag_test_and_clear(con, CON_FLAG_SOCK_CLOSED))
@@ -3269,24 +3275,21 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
/*
* Free a generically kmalloc'd message.
*/
-void ceph_msg_kfree(struct ceph_msg *m)
+static void ceph_msg_free(struct ceph_msg *m)
{
- dout("msg_kfree %p\n", m);
+ dout("%s %p\n", __func__, m);
ceph_kvfree(m->front.iov_base);
kmem_cache_free(ceph_msg_cache, m);
}
-/*
- * Drop a msg ref. Destroy as needed.
- */
-void ceph_msg_last_put(struct kref *kref)
+static void ceph_msg_release(struct kref *kref)
{
struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
LIST_HEAD(data);
struct list_head *links;
struct list_head *next;
- dout("ceph_msg_put last one on %p\n", m);
+ dout("%s %p\n", __func__, m);
WARN_ON(!list_empty(&m->list_head));
/* drop middle, data, if any */
@@ -3308,9 +3311,25 @@ void ceph_msg_last_put(struct kref *kref)
if (m->pool)
ceph_msgpool_put(m->pool, m);
else
- ceph_msg_kfree(m);
+ ceph_msg_free(m);
+}
+
+struct ceph_msg *ceph_msg_get(struct ceph_msg *msg)
+{
+ dout("%s %p (was %d)\n", __func__, msg,
+ atomic_read(&msg->kref.refcount));
+ kref_get(&msg->kref);
+ return msg;
+}
+EXPORT_SYMBOL(ceph_msg_get);
+
+void ceph_msg_put(struct ceph_msg *msg)
+{
+ dout("%s %p (was %d)\n", __func__, msg,
+ atomic_read(&msg->kref.refcount));
+ kref_put(&msg->kref, ceph_msg_release);
}
-EXPORT_SYMBOL(ceph_msg_last_put);
+EXPORT_SYMBOL(ceph_msg_put);
void ceph_msg_dump(struct ceph_msg *msg)
{
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 067d3af2eaf6..61fcfc304f68 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1181,7 +1181,15 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
if (!m) {
pr_info("alloc_msg unknown type %d\n", type);
*skip = 1;
+ } else if (front_len > m->front_alloc_len) {
+ pr_warning("mon_alloc_msg front %d > prealloc %d (%u#%llu)\n",
+ front_len, m->front_alloc_len,
+ (unsigned int)con->peer_name.type,
+ le64_to_cpu(con->peer_name.num));
+ ceph_msg_put(m);
+ m = ceph_msg_new(type, front_len, GFP_NOFS, false);
}
+
return m;
}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 05be0c181695..30f6faf3584f 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -297,12 +297,21 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
/*
* requests
*/
-void ceph_osdc_release_request(struct kref *kref)
+static void ceph_osdc_release_request(struct kref *kref)
{
- struct ceph_osd_request *req;
+ struct ceph_osd_request *req = container_of(kref,
+ struct ceph_osd_request, r_kref);
unsigned int which;
- req = container_of(kref, struct ceph_osd_request, r_kref);
+ dout("%s %p (r_request %p r_reply %p)\n", __func__, req,
+ req->r_request, req->r_reply);
+ WARN_ON(!RB_EMPTY_NODE(&req->r_node));
+ WARN_ON(!list_empty(&req->r_req_lru_item));
+ WARN_ON(!list_empty(&req->r_osd_item));
+ WARN_ON(!list_empty(&req->r_linger_item));
+ WARN_ON(!list_empty(&req->r_linger_osd_item));
+ WARN_ON(req->r_osd);
+
if (req->r_request)
ceph_msg_put(req->r_request);
if (req->r_reply) {
@@ -320,7 +329,22 @@ void ceph_osdc_release_request(struct kref *kref)
kmem_cache_free(ceph_osd_request_cache, req);
}
-EXPORT_SYMBOL(ceph_osdc_release_request);
+
+void ceph_osdc_get_request(struct ceph_osd_request *req)
+{
+ dout("%s %p (was %d)\n", __func__, req,
+ atomic_read(&req->r_kref.refcount));
+ kref_get(&req->r_kref);
+}
+EXPORT_SYMBOL(ceph_osdc_get_request);
+
+void ceph_osdc_put_request(struct ceph_osd_request *req)
+{
+ dout("%s %p (was %d)\n", __func__, req,
+ atomic_read(&req->r_kref.refcount));
+ kref_put(&req->r_kref, ceph_osdc_release_request);
+}
+EXPORT_SYMBOL(ceph_osdc_put_request);
struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
struct ceph_snap_context *snapc,
@@ -364,7 +388,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
RB_CLEAR_NODE(&req->r_node);
INIT_LIST_HEAD(&req->r_unsafe_item);
INIT_LIST_HEAD(&req->r_linger_item);
- INIT_LIST_HEAD(&req->r_linger_osd);
+ INIT_LIST_HEAD(&req->r_linger_osd_item);
INIT_LIST_HEAD(&req->r_req_lru_item);
INIT_LIST_HEAD(&req->r_osd_item);
@@ -916,7 +940,7 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
* list at the end to keep things in tid order.
*/
list_for_each_entry_safe(req, nreq, &osd->o_linger_requests,
- r_linger_osd) {
+ r_linger_osd_item) {
/*
* reregister request prior to unregistering linger so
* that r_osd is preserved.
@@ -1008,6 +1032,8 @@ static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
{
dout("__remove_osd %p\n", osd);
BUG_ON(!list_empty(&osd->o_requests));
+ BUG_ON(!list_empty(&osd->o_linger_requests));
+
rb_erase(&osd->o_node, &osdc->osds);
list_del_init(&osd->o_osd_lru);
ceph_con_close(&osd->o_con);
@@ -1029,12 +1055,23 @@ static void remove_all_osds(struct ceph_osd_client *osdc)
static void __move_osd_to_lru(struct ceph_osd_client *osdc,
struct ceph_osd *osd)
{
- dout("__move_osd_to_lru %p\n", osd);
+ dout("%s %p\n", __func__, osd);
BUG_ON(!list_empty(&osd->o_osd_lru));
+
list_add_tail(&osd->o_osd_lru, &osdc->osd_lru);
osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ;
}
+static void maybe_move_osd_to_lru(struct ceph_osd_client *osdc,
+ struct ceph_osd *osd)
+{
+ dout("%s %p\n", __func__, osd);
+
+ if (list_empty(&osd->o_requests) &&
+ list_empty(&osd->o_linger_requests))
+ __move_osd_to_lru(osdc, osd);
+}
+
static void __remove_osd_from_lru(struct ceph_osd *osd)
{
dout("__remove_osd_from_lru %p\n", osd);
@@ -1175,6 +1212,7 @@ static void __unregister_request(struct ceph_osd_client *osdc,
dout("__unregister_request %p tid %lld\n", req, req->r_tid);
rb_erase(&req->r_node, &osdc->requests);
+ RB_CLEAR_NODE(&req->r_node);
osdc->num_requests--;
if (req->r_osd) {
@@ -1182,12 +1220,8 @@ static void __unregister_request(struct ceph_osd_client *osdc,
ceph_msg_revoke(req->r_request);
list_del_init(&req->r_osd_item);
- if (list_empty(&req->r_osd->o_requests) &&
- list_empty(&req->r_osd->o_linger_requests)) {
- dout("moving osd to %p lru\n", req->r_osd);
- __move_osd_to_lru(osdc, req->r_osd);
- }
- if (list_empty(&req->r_linger_item))
+ maybe_move_osd_to_lru(osdc, req->r_osd);
+ if (list_empty(&req->r_linger_osd_item))
req->r_osd = NULL;
}
@@ -1214,45 +1248,39 @@ static void __cancel_request(struct ceph_osd_request *req)
static void __register_linger_request(struct ceph_osd_client *osdc,
struct ceph_osd_request *req)
{
- dout("__register_linger_request %p\n", req);
+ dout("%s %p tid %llu\n", __func__, req, req->r_tid);
+ WARN_ON(!req->r_linger);
+
ceph_osdc_get_request(req);
list_add_tail(&req->r_linger_item, &osdc->req_linger);
if (req->r_osd)
- list_add_tail(&req->r_linger_osd,
+ list_add_tail(&req->r_linger_osd_item,
&req->r_osd->o_linger_requests);
}
static void __unregister_linger_request(struct ceph_osd_client *osdc,
struct ceph_osd_request *req)
{
- dout("__unregister_linger_request %p\n", req);
+ WARN_ON(!req->r_linger);
+
+ if (list_empty(&req->r_linger_item)) {
+ dout("%s %p tid %llu not registered\n", __func__, req,
+ req->r_tid);
+ return;
+ }
+
+ dout("%s %p tid %llu\n", __func__, req, req->r_tid);
list_del_init(&req->r_linger_item);
- if (req->r_osd) {
- list_del_init(&req->r_linger_osd);
- if (list_empty(&req->r_osd->o_requests) &&
- list_empty(&req->r_osd->o_linger_requests)) {
- dout("moving osd to %p lru\n", req->r_osd);
- __move_osd_to_lru(osdc, req->r_osd);
- }
+ if (req->r_osd) {
+ list_del_init(&req->r_linger_osd_item);
+ maybe_move_osd_to_lru(osdc, req->r_osd);
if (list_empty(&req->r_osd_item))
req->r_osd = NULL;
}
ceph_osdc_put_request(req);
}
-void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
- struct ceph_osd_request *req)
-{
- mutex_lock(&osdc->request_mutex);
- if (req->r_linger) {
- req->r_linger = 0;
- __unregister_linger_request(osdc, req);
- }
- mutex_unlock(&osdc->request_mutex);
-}
-EXPORT_SYMBOL(ceph_osdc_unregister_linger_request);
-
void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
struct ceph_osd_request *req)
{
@@ -2430,6 +2458,25 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
EXPORT_SYMBOL(ceph_osdc_start_request);
/*
+ * Unregister a registered request. The request is not completed (i.e.
+ * no callbacks or wakeups) - higher layers are supposed to know what
+ * they are canceling.
+ */
+void ceph_osdc_cancel_request(struct ceph_osd_request *req)
+{
+ struct ceph_osd_client *osdc = req->r_osdc;
+
+ mutex_lock(&osdc->request_mutex);
+ if (req->r_linger)
+ __unregister_linger_request(osdc, req);
+ __unregister_request(osdc, req);
+ mutex_unlock(&osdc->request_mutex);
+
+ dout("%s %p tid %llu canceled\n", __func__, req, req->r_tid);
+}
+EXPORT_SYMBOL(ceph_osdc_cancel_request);
+
+/*
* wait for a request to complete
*/
int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
@@ -2437,18 +2484,18 @@ int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
{
int rc;
+ dout("%s %p tid %llu\n", __func__, req, req->r_tid);
+
rc = wait_for_completion_interruptible(&req->r_completion);
if (rc < 0) {
- mutex_lock(&osdc->request_mutex);
- __cancel_request(req);
- __unregister_request(osdc, req);
- mutex_unlock(&osdc->request_mutex);
+ dout("%s %p tid %llu interrupted\n", __func__, req, req->r_tid);
+ ceph_osdc_cancel_request(req);
complete_request(req);
- dout("wait_request tid %llu canceled/timed out\n", req->r_tid);
return rc;
}
- dout("wait_request tid %llu result %d\n", req->r_tid, req->r_result);
+ dout("%s %p tid %llu result %d\n", __func__, req, req->r_tid,
+ req->r_result);
return req->r_result;
}
EXPORT_SYMBOL(ceph_osdc_wait_request);
diff --git a/net/core/Makefile b/net/core/Makefile
index 71093d94ad2b..235e6c50708d 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -16,7 +16,6 @@ obj-y += net-sysfs.o
obj-$(CONFIG_PROC_FS) += net-procfs.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
obj-$(CONFIG_NETPOLL) += netpoll.o
-obj-$(CONFIG_NET_DMA) += user_dma.o
obj-$(CONFIG_FIB_RULES) += fib_rules.o
obj-$(CONFIG_TRACEPOINTS) += net-traces.o
obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 488dd1a825c0..fdbc9a81d4c2 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -775,7 +775,7 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb)
EXPORT_SYMBOL(__skb_checksum_complete);
/**
- * skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec.
+ * skb_copy_and_csum_datagram_iovec - Copy and checksum skb to user iovec.
* @skb: skbuff
* @hlen: hardware length
* @iov: io vector
diff --git a/net/core/dev.c b/net/core/dev.c
index 367a586d0c8a..4699dcfdc4ab 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -132,6 +132,7 @@
#include <linux/hashtable.h>
#include <linux/vmalloc.h>
#include <linux/if_macvlan.h>
+#include <linux/errqueue.h>
#include "net-sysfs.h"
@@ -896,23 +897,25 @@ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
EXPORT_SYMBOL(dev_getfirstbyhwtype);
/**
- * dev_get_by_flags_rcu - find any device with given flags
+ * __dev_get_by_flags - find any device with given flags
* @net: the applicable net namespace
* @if_flags: IFF_* values
* @mask: bitmask of bits in if_flags to check
*
* Search for any interface with the given flags. Returns NULL if a device
* is not found or a pointer to the device. Must be called inside
- * rcu_read_lock(), and result refcount is unchanged.
+ * rtnl_lock(), and result refcount is unchanged.
*/
-struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
- unsigned short mask)
+struct net_device *__dev_get_by_flags(struct net *net, unsigned short if_flags,
+ unsigned short mask)
{
struct net_device *dev, *ret;
+ ASSERT_RTNL();
+
ret = NULL;
- for_each_netdev_rcu(net, dev) {
+ for_each_netdev(net, dev) {
if (((dev->flags ^ if_flags) & mask) == 0) {
ret = dev;
break;
@@ -920,7 +923,7 @@ struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags
}
return ret;
}
-EXPORT_SYMBOL(dev_get_by_flags_rcu);
+EXPORT_SYMBOL(__dev_get_by_flags);
/**
* dev_valid_name - check if name is okay for network device
@@ -1085,6 +1088,7 @@ static int dev_get_valid_name(struct net *net,
*/
int dev_change_name(struct net_device *dev, const char *newname)
{
+ unsigned char old_assign_type;
char oldname[IFNAMSIZ];
int err = 0;
int ret;
@@ -1112,10 +1116,17 @@ int dev_change_name(struct net_device *dev, const char *newname)
return err;
}
+ if (oldname[0] && !strchr(oldname, '%'))
+ netdev_info(dev, "renamed from %s\n", oldname);
+
+ old_assign_type = dev->name_assign_type;
+ dev->name_assign_type = NET_NAME_RENAMED;
+
rollback:
ret = device_rename(&dev->dev, dev->name);
if (ret) {
memcpy(dev->name, oldname, IFNAMSIZ);
+ dev->name_assign_type = old_assign_type;
write_seqcount_end(&devnet_rename_seq);
return ret;
}
@@ -1144,6 +1155,8 @@ rollback:
write_seqcount_begin(&devnet_rename_seq);
memcpy(dev->name, oldname, IFNAMSIZ);
memcpy(oldname, newname, IFNAMSIZ);
+ dev->name_assign_type = old_assign_type;
+ old_assign_type = NET_NAME_RENAMED;
goto rollback;
} else {
pr_err("%s: name change rollback failed: %d\n",
@@ -1273,7 +1286,6 @@ static int __dev_open(struct net_device *dev)
clear_bit(__LINK_STATE_START, &dev->state);
else {
dev->flags |= IFF_UP;
- net_dmaengine_get();
dev_set_rx_mode(dev);
dev_activate(dev);
add_device_randomness(dev->dev_addr, dev->addr_len);
@@ -1352,7 +1364,6 @@ static int __dev_close_many(struct list_head *head)
ops->ndo_stop(dev);
dev->flags &= ~IFF_UP;
- net_dmaengine_put();
netpoll_poll_enable(dev);
}
@@ -2166,6 +2177,53 @@ static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
return (struct dev_kfree_skb_cb *)skb->cb;
}
+void netif_schedule_queue(struct netdev_queue *txq)
+{
+ rcu_read_lock();
+ if (!(txq->state & QUEUE_STATE_ANY_XOFF)) {
+ struct Qdisc *q = rcu_dereference(txq->qdisc);
+
+ __netif_schedule(q);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL(netif_schedule_queue);
+
+/**
+ * netif_wake_subqueue - allow sending packets on subqueue
+ * @dev: network device
+ * @queue_index: sub queue index
+ *
+ * Resume individual transmit queue of a device with multiple transmit queues.
+ */
+void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
+{
+ struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
+
+ if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state)) {
+ struct Qdisc *q;
+
+ rcu_read_lock();
+ q = rcu_dereference(txq->qdisc);
+ __netif_schedule(q);
+ rcu_read_unlock();
+ }
+}
+EXPORT_SYMBOL(netif_wake_subqueue);
+
+void netif_tx_wake_queue(struct netdev_queue *dev_queue)
+{
+ if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) {
+ struct Qdisc *q;
+
+ rcu_read_lock();
+ q = rcu_dereference(dev_queue->qdisc);
+ __netif_schedule(q);
+ rcu_read_unlock();
+ }
+}
+EXPORT_SYMBOL(netif_tx_wake_queue);
+
void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
{
unsigned long flags;
@@ -2316,7 +2374,7 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
*/
if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
if (vlan_depth) {
- if (unlikely(WARN_ON(vlan_depth < VLAN_HLEN)))
+ if (WARN_ON(vlan_depth < VLAN_HLEN))
return 0;
vlan_depth -= VLAN_HLEN;
} else {
@@ -2362,16 +2420,6 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
rcu_read_lock();
list_for_each_entry_rcu(ptype, &offload_base, list) {
if (ptype->type == type && ptype->callbacks.gso_segment) {
- if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
- int err;
-
- err = ptype->callbacks.gso_send_check(skb);
- segs = ERR_PTR(err);
- if (err || skb_gso_ok(skb, features))
- break;
- __skb_push(skb, (skb->data -
- skb_network_header(skb)));
- }
segs = ptype->callbacks.gso_segment(skb, features);
break;
}
@@ -2414,8 +2462,8 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
skb_warn_bad_offload(skb);
- if (skb_header_cloned(skb) &&
- (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+ err = skb_cow_head(skb, 0);
+ if (err < 0)
return ERR_PTR(err);
}
@@ -2474,52 +2522,6 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
return 0;
}
-struct dev_gso_cb {
- void (*destructor)(struct sk_buff *skb);
-};
-
-#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
-
-static void dev_gso_skb_destructor(struct sk_buff *skb)
-{
- struct dev_gso_cb *cb;
-
- kfree_skb_list(skb->next);
- skb->next = NULL;
-
- cb = DEV_GSO_CB(skb);
- if (cb->destructor)
- cb->destructor(skb);
-}
-
-/**
- * dev_gso_segment - Perform emulated hardware segmentation on skb.
- * @skb: buffer to segment
- * @features: device features as applicable to this skb
- *
- * This function segments the given skb and stores the list of segments
- * in skb->next.
- */
-static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
-{
- struct sk_buff *segs;
-
- segs = skb_gso_segment(skb, features);
-
- /* Verifying header integrity only. */
- if (!segs)
- return 0;
-
- if (IS_ERR(segs))
- return PTR_ERR(segs);
-
- skb->next = segs;
- DEV_GSO_CB(skb)->destructor = skb->destructor;
- skb->destructor = dev_gso_skb_destructor;
-
- return 0;
-}
-
/* If MPLS offload request, verify we are testing hardware MPLS features
* instead of standard features for the netdev.
*/
@@ -2563,10 +2565,12 @@ static netdev_features_t harmonize_features(struct sk_buff *skb,
netdev_features_t netif_skb_features(struct sk_buff *skb)
{
+ const struct net_device *dev = skb->dev;
+ netdev_features_t features = dev->features;
+ u16 gso_segs = skb_shinfo(skb)->gso_segs;
__be16 protocol = skb->protocol;
- netdev_features_t features = skb->dev->features;
- if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
+ if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs)
features &= ~NETIF_F_GSO_MASK;
if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
@@ -2576,131 +2580,167 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
return harmonize_features(skb, features);
}
- features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
- NETIF_F_HW_VLAN_STAG_TX);
+ features = netdev_intersect_features(features,
+ dev->vlan_features |
+ NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_STAG_TX);
if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
- features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
- NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
- NETIF_F_HW_VLAN_STAG_TX;
+ features = netdev_intersect_features(features,
+ NETIF_F_SG |
+ NETIF_F_HIGHDMA |
+ NETIF_F_FRAGLIST |
+ NETIF_F_GEN_CSUM |
+ NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_STAG_TX);
return harmonize_features(skb, features);
}
EXPORT_SYMBOL(netif_skb_features);
-int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
- struct netdev_queue *txq)
+static int xmit_one(struct sk_buff *skb, struct net_device *dev,
+ struct netdev_queue *txq, bool more)
{
- const struct net_device_ops *ops = dev->netdev_ops;
- int rc = NETDEV_TX_OK;
- unsigned int skb_len;
-
- if (likely(!skb->next)) {
- netdev_features_t features;
+ unsigned int len;
+ int rc;
- /*
- * If device doesn't need skb->dst, release it right now while
- * its hot in this cpu cache
- */
- if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
- skb_dst_drop(skb);
+ if (!list_empty(&ptype_all))
+ dev_queue_xmit_nit(skb, dev);
- features = netif_skb_features(skb);
+ len = skb->len;
+ trace_net_dev_start_xmit(skb, dev);
+ rc = netdev_start_xmit(skb, dev, txq, more);
+ trace_net_dev_xmit(skb, rc, dev, len);
- if (vlan_tx_tag_present(skb) &&
- !vlan_hw_offload_capable(features, skb->vlan_proto)) {
- skb = __vlan_put_tag(skb, skb->vlan_proto,
- vlan_tx_tag_get(skb));
- if (unlikely(!skb))
- goto out;
+ return rc;
+}
- skb->vlan_tci = 0;
- }
+struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *dev,
+ struct netdev_queue *txq, int *ret)
+{
+ struct sk_buff *skb = first;
+ int rc = NETDEV_TX_OK;
- /* If encapsulation offload request, verify we are testing
- * hardware encapsulation features instead of standard
- * features for the netdev
- */
- if (skb->encapsulation)
- features &= dev->hw_enc_features;
+ while (skb) {
+ struct sk_buff *next = skb->next;
- if (netif_needs_gso(skb, features)) {
- if (unlikely(dev_gso_segment(skb, features)))
- goto out_kfree_skb;
- if (skb->next)
- goto gso;
- } else {
- if (skb_needs_linearize(skb, features) &&
- __skb_linearize(skb))
- goto out_kfree_skb;
+ skb->next = NULL;
+ rc = xmit_one(skb, dev, txq, next != NULL);
+ if (unlikely(!dev_xmit_complete(rc))) {
+ skb->next = next;
+ goto out;
+ }
- /* If packet is not checksummed and device does not
- * support checksumming for this protocol, complete
- * checksumming here.
- */
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- if (skb->encapsulation)
- skb_set_inner_transport_header(skb,
- skb_checksum_start_offset(skb));
- else
- skb_set_transport_header(skb,
- skb_checksum_start_offset(skb));
- if (!(features & NETIF_F_ALL_CSUM) &&
- skb_checksum_help(skb))
- goto out_kfree_skb;
- }
+ skb = next;
+ if (netif_xmit_stopped(txq) && skb) {
+ rc = NETDEV_TX_BUSY;
+ break;
}
+ }
- if (!list_empty(&ptype_all))
- dev_queue_xmit_nit(skb, dev);
+out:
+ *ret = rc;
+ return skb;
+}
- skb_len = skb->len;
- trace_net_dev_start_xmit(skb, dev);
- rc = ops->ndo_start_xmit(skb, dev);
- trace_net_dev_xmit(skb, rc, dev, skb_len);
- if (rc == NETDEV_TX_OK)
- txq_trans_update(txq);
- return rc;
+static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ if (vlan_tx_tag_present(skb) &&
+ !vlan_hw_offload_capable(features, skb->vlan_proto)) {
+ skb = __vlan_put_tag(skb, skb->vlan_proto,
+ vlan_tx_tag_get(skb));
+ if (skb)
+ skb->vlan_tci = 0;
}
+ return skb;
+}
+
+static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
+{
+ netdev_features_t features;
-gso:
- do {
- struct sk_buff *nskb = skb->next;
+ if (skb->next)
+ return skb;
- skb->next = nskb->next;
- nskb->next = NULL;
+ features = netif_skb_features(skb);
+ skb = validate_xmit_vlan(skb, features);
+ if (unlikely(!skb))
+ goto out_null;
- if (!list_empty(&ptype_all))
- dev_queue_xmit_nit(nskb, dev);
-
- skb_len = nskb->len;
- trace_net_dev_start_xmit(nskb, dev);
- rc = ops->ndo_start_xmit(nskb, dev);
- trace_net_dev_xmit(nskb, rc, dev, skb_len);
- if (unlikely(rc != NETDEV_TX_OK)) {
- if (rc & ~NETDEV_TX_MASK)
- goto out_kfree_gso_skb;
- nskb->next = skb->next;
- skb->next = nskb;
- return rc;
+ /* If encapsulation offload request, verify we are testing
+ * hardware encapsulation features instead of standard
+ * features for the netdev
+ */
+ if (skb->encapsulation)
+ features &= dev->hw_enc_features;
+
+ if (netif_needs_gso(skb, features)) {
+ struct sk_buff *segs;
+
+ segs = skb_gso_segment(skb, features);
+ if (IS_ERR(segs)) {
+ segs = NULL;
+ } else if (segs) {
+ consume_skb(skb);
+ skb = segs;
}
- txq_trans_update(txq);
- if (unlikely(netif_xmit_stopped(txq) && skb->next))
- return NETDEV_TX_BUSY;
- } while (skb->next);
+ } else {
+ if (skb_needs_linearize(skb, features) &&
+ __skb_linearize(skb))
+ goto out_kfree_skb;
-out_kfree_gso_skb:
- if (likely(skb->next == NULL)) {
- skb->destructor = DEV_GSO_CB(skb)->destructor;
- consume_skb(skb);
- return rc;
+ /* If packet is not checksummed and device does not
+ * support checksumming for this protocol, complete
+ * checksumming here.
+ */
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ if (skb->encapsulation)
+ skb_set_inner_transport_header(skb,
+ skb_checksum_start_offset(skb));
+ else
+ skb_set_transport_header(skb,
+ skb_checksum_start_offset(skb));
+ if (!(features & NETIF_F_ALL_CSUM) &&
+ skb_checksum_help(skb))
+ goto out_kfree_skb;
+ }
}
+
+ return skb;
+
out_kfree_skb:
kfree_skb(skb);
-out:
- return rc;
+out_null:
+ return NULL;
+}
+
+struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev)
+{
+ struct sk_buff *next, *head = NULL, *tail;
+
+ for (; skb != NULL; skb = next) {
+ next = skb->next;
+ skb->next = NULL;
+
+ /* in case skb wont be segmented, point to itself */
+ skb->prev = skb;
+
+ skb = validate_xmit_skb(skb, dev);
+ if (!skb)
+ continue;
+
+ if (!head)
+ head = skb;
+ else
+ tail->next = skb;
+ /* If skb was segmented, skb->prev points to
+ * the last segment. If not, it still contains skb.
+ */
+ tail = skb->prev;
+ }
+ return head;
}
-EXPORT_SYMBOL_GPL(dev_hard_start_xmit);
static void qdisc_pkt_len_init(struct sk_buff *skb)
{
@@ -2745,8 +2785,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
/*
* Heuristic to force contended enqueues to serialize on a
* separate lock before trying to get qdisc main lock.
- * This permits __QDISC_STATE_RUNNING owner to get the lock more often
- * and dequeue packets faster.
+ * This permits __QDISC___STATE_RUNNING owner to get the lock more
+ * often and dequeue packets faster.
*/
contended = qdisc_is_running(q);
if (unlikely(contended))
@@ -2763,12 +2803,10 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
* waiting to be sent out; and the qdisc is not running -
* xmit the skb directly.
*/
- if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
- skb_dst_force(skb);
qdisc_bstats_update(q, skb);
- if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
+ if (sch_direct_xmit(skb, q, dev, txq, root_lock, true)) {
if (unlikely(contended)) {
spin_unlock(&q->busylock);
contended = false;
@@ -2779,7 +2817,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
rc = NET_XMIT_SUCCESS;
} else {
- skb_dst_force(skb);
rc = q->enqueue(skb, q) & NET_XMIT_MASK;
if (qdisc_run_begin(q)) {
if (unlikely(contended)) {
@@ -2866,6 +2903,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
skb_reset_mac_header(skb);
+ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
+ __skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED);
+
/* Disable soft irqs for various locks below. Also
* stops preemption for RCU.
*/
@@ -2873,6 +2913,14 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
skb_update_prio(skb);
+ /* If device/qdisc don't need skb->dst, release it right now while
+ * its hot in this cpu cache.
+ */
+ if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
+ skb_dst_drop(skb);
+ else
+ skb_dst_force(skb);
+
txq = netdev_pick_tx(dev, skb, accel_priv);
q = rcu_dereference_bh(txq->qdisc);
@@ -2905,11 +2953,15 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
goto recursion_alert;
+ skb = validate_xmit_skb(skb, dev);
+ if (!skb)
+ goto drop;
+
HARD_TX_LOCK(dev, txq, cpu);
if (!netif_xmit_stopped(txq)) {
__this_cpu_inc(xmit_recursion);
- rc = dev_hard_start_xmit(skb, dev, txq);
+ skb = dev_hard_start_xmit(skb, dev, txq, &rc);
__this_cpu_dec(xmit_recursion);
if (dev_xmit_complete(rc)) {
HARD_TX_UNLOCK(dev, txq);
@@ -2930,10 +2982,11 @@ recursion_alert:
}
rc = -ENETDOWN;
+drop:
rcu_read_unlock_bh();
atomic_long_inc(&dev->tx_dropped);
- kfree_skb(skb);
+ kfree_skb_list(skb);
return rc;
out:
rcu_read_unlock_bh();
@@ -3110,8 +3163,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
}
if (map) {
- tcpu = map->cpus[((u64) hash * map->len) >> 32];
-
+ tcpu = map->cpus[reciprocal_scale(hash, map->len)];
if (cpu_online(tcpu)) {
cpu = tcpu;
goto done;
@@ -3447,7 +3499,7 @@ static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
- q = rxq->qdisc;
+ q = rcu_dereference(rxq->qdisc);
if (q != &noop_qdisc) {
spin_lock(qdisc_lock(q));
if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
@@ -3464,7 +3516,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
{
struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
- if (!rxq || rxq->qdisc == &noop_qdisc)
+ if (!rxq || rcu_access_pointer(rxq->qdisc) == &noop_qdisc)
goto out;
if (*pt_prev) {
@@ -3588,7 +3640,7 @@ another_round:
if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
- skb = vlan_untag(skb);
+ skb = skb_vlan_untag(skb);
if (unlikely(!skb))
goto unlock;
}
@@ -3945,11 +3997,10 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
if (!(skb->dev->features & NETIF_F_GRO))
goto normal;
- if (skb_is_gso(skb) || skb_has_frag_list(skb))
+ if (skb_is_gso(skb) || skb_has_frag_list(skb) || skb->csum_bad)
goto normal;
gro_list_prepare(napi, skb);
- NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */
rcu_read_lock();
list_for_each_entry_rcu(ptype, head, list) {
@@ -3963,6 +4014,22 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
NAPI_GRO_CB(skb)->free = 0;
NAPI_GRO_CB(skb)->udp_mark = 0;
+ /* Setup for GRO checksum validation */
+ switch (skb->ip_summed) {
+ case CHECKSUM_COMPLETE:
+ NAPI_GRO_CB(skb)->csum = skb->csum;
+ NAPI_GRO_CB(skb)->csum_valid = 1;
+ NAPI_GRO_CB(skb)->csum_cnt = 0;
+ break;
+ case CHECKSUM_UNNECESSARY:
+ NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1;
+ NAPI_GRO_CB(skb)->csum_valid = 0;
+ break;
+ default:
+ NAPI_GRO_CB(skb)->csum_cnt = 0;
+ NAPI_GRO_CB(skb)->csum_valid = 0;
+ }
+
pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
break;
}
@@ -4192,6 +4259,31 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
}
EXPORT_SYMBOL(napi_gro_frags);
+/* Compute the checksum from gro_offset and return the folded value
+ * after adding in any pseudo checksum.
+ */
+__sum16 __skb_gro_checksum_complete(struct sk_buff *skb)
+{
+ __wsum wsum;
+ __sum16 sum;
+
+ wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), 0);
+
+ /* NAPI_GRO_CB(skb)->csum holds pseudo checksum */
+ sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum));
+ if (likely(!sum)) {
+ if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
+ !skb->csum_complete_sw)
+ netdev_rx_csum_fault(skb->dev);
+ }
+
+ NAPI_GRO_CB(skb)->csum = wsum;
+ NAPI_GRO_CB(skb)->csum_valid = 1;
+
+ return sum;
+}
+EXPORT_SYMBOL(__skb_gro_checksum_complete);
+
/*
* net_rps_action_and_irq_enable sends any pending IPI's for rps.
* Note: called with local irq disabled, but exits with local irq enabled.
@@ -4485,14 +4577,6 @@ static void net_rx_action(struct softirq_action *h)
out:
net_rps_action_and_irq_enable(sd);
-#ifdef CONFIG_NET_DMA
- /*
- * There may not be any more sk_buffs coming right now, so push
- * any pending DMA copies to hardware
- */
- dma_issue_pending_all();
-#endif
-
return;
softnet_break:
@@ -4789,9 +4873,14 @@ static void netdev_adjacent_sysfs_del(struct net_device *dev,
sysfs_remove_link(&(dev->dev.kobj), linkname);
}
-#define netdev_adjacent_is_neigh_list(dev, dev_list) \
- (dev_list == &dev->adj_list.upper || \
- dev_list == &dev->adj_list.lower)
+static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev,
+ struct net_device *adj_dev,
+ struct list_head *dev_list)
+{
+ return (dev_list == &dev->adj_list.upper ||
+ dev_list == &dev->adj_list.lower) &&
+ net_eq(dev_net(dev), dev_net(adj_dev));
+}
static int __netdev_adjacent_dev_insert(struct net_device *dev,
struct net_device *adj_dev,
@@ -4821,7 +4910,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
pr_debug("dev_hold for %s, because of link added from %s to %s\n",
adj_dev->name, dev->name, adj_dev->name);
- if (netdev_adjacent_is_neigh_list(dev, dev_list)) {
+ if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) {
ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list);
if (ret)
goto free_adj;
@@ -4842,7 +4931,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
return 0;
remove_symlinks:
- if (netdev_adjacent_is_neigh_list(dev, dev_list))
+ if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
free_adj:
kfree(adj);
@@ -4875,7 +4964,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,
if (adj->master)
sysfs_remove_link(&(dev->dev.kobj), "master");
- if (netdev_adjacent_is_neigh_list(dev, dev_list))
+ if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
list_del_rcu(&adj->list);
@@ -5145,11 +5234,65 @@ void netdev_upper_dev_unlink(struct net_device *dev,
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
+void netdev_adjacent_add_links(struct net_device *dev)
+{
+ struct netdev_adjacent *iter;
+
+ struct net *net = dev_net(dev);
+
+ list_for_each_entry(iter, &dev->adj_list.upper, list) {
+ if (!net_eq(net,dev_net(iter->dev)))
+ continue;
+ netdev_adjacent_sysfs_add(iter->dev, dev,
+ &iter->dev->adj_list.lower);
+ netdev_adjacent_sysfs_add(dev, iter->dev,
+ &dev->adj_list.upper);
+ }
+
+ list_for_each_entry(iter, &dev->adj_list.lower, list) {
+ if (!net_eq(net,dev_net(iter->dev)))
+ continue;
+ netdev_adjacent_sysfs_add(iter->dev, dev,
+ &iter->dev->adj_list.upper);
+ netdev_adjacent_sysfs_add(dev, iter->dev,
+ &dev->adj_list.lower);
+ }
+}
+
+void netdev_adjacent_del_links(struct net_device *dev)
+{
+ struct netdev_adjacent *iter;
+
+ struct net *net = dev_net(dev);
+
+ list_for_each_entry(iter, &dev->adj_list.upper, list) {
+ if (!net_eq(net,dev_net(iter->dev)))
+ continue;
+ netdev_adjacent_sysfs_del(iter->dev, dev->name,
+ &iter->dev->adj_list.lower);
+ netdev_adjacent_sysfs_del(dev, iter->dev->name,
+ &dev->adj_list.upper);
+ }
+
+ list_for_each_entry(iter, &dev->adj_list.lower, list) {
+ if (!net_eq(net,dev_net(iter->dev)))
+ continue;
+ netdev_adjacent_sysfs_del(iter->dev, dev->name,
+ &iter->dev->adj_list.upper);
+ netdev_adjacent_sysfs_del(dev, iter->dev->name,
+ &dev->adj_list.lower);
+ }
+}
+
void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
{
struct netdev_adjacent *iter;
+ struct net *net = dev_net(dev);
+
list_for_each_entry(iter, &dev->adj_list.upper, list) {
+ if (!net_eq(net,dev_net(iter->dev)))
+ continue;
netdev_adjacent_sysfs_del(iter->dev, oldname,
&iter->dev->adj_list.lower);
netdev_adjacent_sysfs_add(iter->dev, dev,
@@ -5157,6 +5300,8 @@ void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
}
list_for_each_entry(iter, &dev->adj_list.lower, list) {
+ if (!net_eq(net,dev_net(iter->dev)))
+ continue;
netdev_adjacent_sysfs_del(iter->dev, oldname,
&iter->dev->adj_list.upper);
netdev_adjacent_sysfs_add(iter->dev, dev,
@@ -5440,13 +5585,9 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags)
*/
ret = 0;
- if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
+ if ((old_flags ^ flags) & IFF_UP)
ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
- if (!ret)
- dev_set_rx_mode(dev);
- }
-
if ((flags ^ dev->gflags) & IFF_PROMISC) {
int inc = (flags & IFF_PROMISC) ? 1 : -1;
unsigned int old_flags = dev->flags;
@@ -6446,17 +6587,19 @@ void netdev_freemem(struct net_device *dev)
/**
* alloc_netdev_mqs - allocate network device
- * @sizeof_priv: size of private data to allocate space for
- * @name: device name format string
- * @setup: callback to initialize device
- * @txqs: the number of TX subqueues to allocate
- * @rxqs: the number of RX subqueues to allocate
+ * @sizeof_priv: size of private data to allocate space for
+ * @name: device name format string
+ * @name_assign_type: origin of device name
+ * @setup: callback to initialize device
+ * @txqs: the number of TX subqueues to allocate
+ * @rxqs: the number of RX subqueues to allocate
*
* Allocates a struct net_device with private data area for driver use
* and performs basic initialization. Also allocates subqueue structs
* for each queue on the device.
*/
struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
+ unsigned char name_assign_type,
void (*setup)(struct net_device *),
unsigned int txqs, unsigned int rxqs)
{
@@ -6510,6 +6653,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->gso_max_size = GSO_MAX_SIZE;
dev->gso_max_segs = GSO_MAX_SEGS;
+ dev->gso_min_segs = 0;
INIT_LIST_HEAD(&dev->napi_list);
INIT_LIST_HEAD(&dev->unreg_list);
@@ -6519,7 +6663,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
INIT_LIST_HEAD(&dev->adj_list.lower);
INIT_LIST_HEAD(&dev->all_adj_list.upper);
INIT_LIST_HEAD(&dev->all_adj_list.lower);
- dev->priv_flags = IFF_XMIT_DST_RELEASE;
+ dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
setup(dev);
dev->num_tx_queues = txqs;
@@ -6535,6 +6679,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
#endif
strcpy(dev->name, name);
+ dev->name_assign_type = name_assign_type;
dev->group = INIT_NETDEV_GROUP;
if (!dev->ethtool_ops)
dev->ethtool_ops = &default_ethtool_ops;
@@ -6760,6 +6905,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
/* Send a netdev-removed uevent to the old namespace */
kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);
+ netdev_adjacent_del_links(dev);
/* Actually switch the network namespace */
dev_net_set(dev, net);
@@ -6774,6 +6920,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
/* Send a netdev-add uevent to the new namespace */
kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
+ netdev_adjacent_add_links(dev);
/* Fixup kobjects */
err = device_rename(&dev->dev, dev->name);
@@ -6938,51 +7085,45 @@ const char *netdev_drivername(const struct net_device *dev)
return empty;
}
-static int __netdev_printk(const char *level, const struct net_device *dev,
- struct va_format *vaf)
+static void __netdev_printk(const char *level, const struct net_device *dev,
+ struct va_format *vaf)
{
- int r;
-
if (dev && dev->dev.parent) {
- r = dev_printk_emit(level[1] - '0',
- dev->dev.parent,
- "%s %s %s: %pV",
- dev_driver_string(dev->dev.parent),
- dev_name(dev->dev.parent),
- netdev_name(dev), vaf);
+ dev_printk_emit(level[1] - '0',
+ dev->dev.parent,
+ "%s %s %s%s: %pV",
+ dev_driver_string(dev->dev.parent),
+ dev_name(dev->dev.parent),
+ netdev_name(dev), netdev_reg_state(dev),
+ vaf);
} else if (dev) {
- r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
+ printk("%s%s%s: %pV",
+ level, netdev_name(dev), netdev_reg_state(dev), vaf);
} else {
- r = printk("%s(NULL net_device): %pV", level, vaf);
+ printk("%s(NULL net_device): %pV", level, vaf);
}
-
- return r;
}
-int netdev_printk(const char *level, const struct net_device *dev,
- const char *format, ...)
+void netdev_printk(const char *level, const struct net_device *dev,
+ const char *format, ...)
{
struct va_format vaf;
va_list args;
- int r;
va_start(args, format);
vaf.fmt = format;
vaf.va = &args;
- r = __netdev_printk(level, dev, &vaf);
+ __netdev_printk(level, dev, &vaf);
va_end(args);
-
- return r;
}
EXPORT_SYMBOL(netdev_printk);
#define define_netdev_printk_level(func, level) \
-int func(const struct net_device *dev, const char *fmt, ...) \
+void func(const struct net_device *dev, const char *fmt, ...) \
{ \
- int r; \
struct va_format vaf; \
va_list args; \
\
@@ -6991,11 +7132,9 @@ int func(const struct net_device *dev, const char *fmt, ...) \
vaf.fmt = fmt; \
vaf.va = &args; \
\
- r = __netdev_printk(level, dev, &vaf); \
+ __netdev_printk(level, dev, &vaf); \
\
va_end(args); \
- \
- return r; \
} \
EXPORT_SYMBOL(func);
@@ -7103,7 +7242,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
rtnl_lock_unregistering(net_list);
list_for_each_entry(net, net_list, exit_list) {
for_each_netdev_reverse(net, dev) {
- if (dev->rtnl_link_ops)
+ if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink)
dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
else
unregister_netdevice_queue(dev, &dev_kill_list);
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index cf999e09bcd2..72e899a3efda 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -365,11 +365,8 @@ void dev_load(struct net *net, const char *name)
no_module = !dev;
if (no_module && capable(CAP_NET_ADMIN))
no_module = request_module("netdev-%s", name);
- if (no_module && capable(CAP_SYS_MODULE)) {
- if (!request_module("%s", name))
- pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n",
- name);
- }
+ if (no_module && capable(CAP_SYS_MODULE))
+ request_module("%s", name);
}
EXPORT_SYMBOL(dev_load);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index e70301eb7a4a..50f9a9db5792 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -289,10 +289,8 @@ static int net_dm_cmd_trace(struct sk_buff *skb,
switch (info->genlhdr->cmd) {
case NET_DM_CMD_START:
return set_all_monitor_traces(TRACE_ON);
- break;
case NET_DM_CMD_STOP:
return set_all_monitor_traces(TRACE_OFF);
- break;
}
return -ENOTSUPP;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 17cb912793fa..1600aa24d36b 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1621,6 +1621,81 @@ static int ethtool_get_module_eeprom(struct net_device *dev,
modinfo.eeprom_len);
}
+static int ethtool_tunable_valid(const struct ethtool_tunable *tuna)
+{
+ switch (tuna->id) {
+ case ETHTOOL_RX_COPYBREAK:
+ case ETHTOOL_TX_COPYBREAK:
+ if (tuna->len != sizeof(u32) ||
+ tuna->type_id != ETHTOOL_TUNABLE_U32)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int ethtool_get_tunable(struct net_device *dev, void __user *useraddr)
+{
+ int ret;
+ struct ethtool_tunable tuna;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ void *data;
+
+ if (!ops->get_tunable)
+ return -EOPNOTSUPP;
+ if (copy_from_user(&tuna, useraddr, sizeof(tuna)))
+ return -EFAULT;
+ ret = ethtool_tunable_valid(&tuna);
+ if (ret)
+ return ret;
+ data = kmalloc(tuna.len, GFP_USER);
+ if (!data)
+ return -ENOMEM;
+ ret = ops->get_tunable(dev, &tuna, data);
+ if (ret)
+ goto out;
+ useraddr += sizeof(tuna);
+ ret = -EFAULT;
+ if (copy_to_user(useraddr, data, tuna.len))
+ goto out;
+ ret = 0;
+
+out:
+ kfree(data);
+ return ret;
+}
+
+static int ethtool_set_tunable(struct net_device *dev, void __user *useraddr)
+{
+ int ret;
+ struct ethtool_tunable tuna;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ void *data;
+
+ if (!ops->set_tunable)
+ return -EOPNOTSUPP;
+ if (copy_from_user(&tuna, useraddr, sizeof(tuna)))
+ return -EFAULT;
+ ret = ethtool_tunable_valid(&tuna);
+ if (ret)
+ return ret;
+ data = kmalloc(tuna.len, GFP_USER);
+ if (!data)
+ return -ENOMEM;
+ useraddr += sizeof(tuna);
+ ret = -EFAULT;
+ if (copy_from_user(data, useraddr, tuna.len))
+ goto out;
+ ret = ops->set_tunable(dev, &tuna, data);
+
+out:
+ kfree(data);
+ return ret;
+}
+
/* The main entry point in this file. Called from net/core/dev_ioctl.c */
int dev_ethtool(struct net *net, struct ifreq *ifr)
@@ -1670,6 +1745,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GCHANNELS:
case ETHTOOL_GET_TS_INFO:
case ETHTOOL_GEEE:
+ case ETHTOOL_GTUNABLE:
break;
default:
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
@@ -1857,6 +1933,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GMODULEEEPROM:
rc = ethtool_get_module_eeprom(dev, useraddr);
break;
+ case ETHTOOL_GTUNABLE:
+ rc = ethtool_get_tunable(dev, useraddr);
+ break;
+ case ETHTOOL_STUNABLE:
+ rc = ethtool_set_tunable(dev, useraddr);
+ break;
default:
rc = -EOPNOTSUPP;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 1dbf6462f766..647b12265e18 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -18,7 +18,7 @@
* 2 of the License, or (at your option) any later version.
*
* Andi Kleen - Fix a few bad bugs and races.
- * Kris Katterjohn - Added many additional checks in sk_chk_filter()
+ * Kris Katterjohn - Added many additional checks in bpf_check_classic()
*/
#include <linux/module.h>
@@ -45,63 +45,15 @@
#include <linux/seccomp.h>
#include <linux/if_vlan.h>
-/* Registers */
-#define BPF_R0 regs[BPF_REG_0]
-#define BPF_R1 regs[BPF_REG_1]
-#define BPF_R2 regs[BPF_REG_2]
-#define BPF_R3 regs[BPF_REG_3]
-#define BPF_R4 regs[BPF_REG_4]
-#define BPF_R5 regs[BPF_REG_5]
-#define BPF_R6 regs[BPF_REG_6]
-#define BPF_R7 regs[BPF_REG_7]
-#define BPF_R8 regs[BPF_REG_8]
-#define BPF_R9 regs[BPF_REG_9]
-#define BPF_R10 regs[BPF_REG_10]
-
-/* Named registers */
-#define DST regs[insn->dst_reg]
-#define SRC regs[insn->src_reg]
-#define FP regs[BPF_REG_FP]
-#define ARG1 regs[BPF_REG_ARG1]
-#define CTX regs[BPF_REG_CTX]
-#define IMM insn->imm
-
-/* No hurry in this branch
- *
- * Exported for the bpf jit load helper.
- */
-void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
-{
- u8 *ptr = NULL;
-
- if (k >= SKF_NET_OFF)
- ptr = skb_network_header(skb) + k - SKF_NET_OFF;
- else if (k >= SKF_LL_OFF)
- ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
- if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
- return ptr;
-
- return NULL;
-}
-
-static inline void *load_pointer(const struct sk_buff *skb, int k,
- unsigned int size, void *buffer)
-{
- if (k >= 0)
- return skb_header_pointer(skb, k, size, buffer);
-
- return bpf_internal_load_pointer_neg_helper(skb, k, size);
-}
-
/**
* sk_filter - run a packet through a socket filter
* @sk: sock associated with &sk_buff
* @skb: buffer to filter
*
* Run the filter code and then cut skb->data to correct size returned by
- * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
+ * SK_RUN_FILTER. If pkt_len is 0 we toss packet. If skb->len is smaller
* than pkt_len we keep whole skb->data. This is the socket level
- * wrapper to sk_run_filter. It returns 0 if the packet should
+ * wrapper to SK_RUN_FILTER. It returns 0 if the packet should
* be accepted or -EPERM if the packet should be tossed.
*
*/
@@ -135,478 +87,9 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(sk_filter);
-/* Base function for offset calculation. Needs to go into .text section,
- * therefore keeping it non-static as well; will also be used by JITs
- * anyway later on, so do not let the compiler omit it.
- */
-noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
-{
- return 0;
-}
-
-/**
- * __sk_run_filter - run a filter on a given context
- * @ctx: buffer to run the filter on
- * @insn: filter to apply
- *
- * Decode and apply filter instructions to the skb->data. Return length to
- * keep, 0 for none. @ctx is the data we are operating on, @insn is the
- * array of filter instructions.
- */
-static unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
-{
- u64 stack[MAX_BPF_STACK / sizeof(u64)];
- u64 regs[MAX_BPF_REG], tmp;
- static const void *jumptable[256] = {
- [0 ... 255] = &&default_label,
- /* Now overwrite non-defaults ... */
- /* 32 bit ALU operations */
- [BPF_ALU | BPF_ADD | BPF_X] = &&ALU_ADD_X,
- [BPF_ALU | BPF_ADD | BPF_K] = &&ALU_ADD_K,
- [BPF_ALU | BPF_SUB | BPF_X] = &&ALU_SUB_X,
- [BPF_ALU | BPF_SUB | BPF_K] = &&ALU_SUB_K,
- [BPF_ALU | BPF_AND | BPF_X] = &&ALU_AND_X,
- [BPF_ALU | BPF_AND | BPF_K] = &&ALU_AND_K,
- [BPF_ALU | BPF_OR | BPF_X] = &&ALU_OR_X,
- [BPF_ALU | BPF_OR | BPF_K] = &&ALU_OR_K,
- [BPF_ALU | BPF_LSH | BPF_X] = &&ALU_LSH_X,
- [BPF_ALU | BPF_LSH | BPF_K] = &&ALU_LSH_K,
- [BPF_ALU | BPF_RSH | BPF_X] = &&ALU_RSH_X,
- [BPF_ALU | BPF_RSH | BPF_K] = &&ALU_RSH_K,
- [BPF_ALU | BPF_XOR | BPF_X] = &&ALU_XOR_X,
- [BPF_ALU | BPF_XOR | BPF_K] = &&ALU_XOR_K,
- [BPF_ALU | BPF_MUL | BPF_X] = &&ALU_MUL_X,
- [BPF_ALU | BPF_MUL | BPF_K] = &&ALU_MUL_K,
- [BPF_ALU | BPF_MOV | BPF_X] = &&ALU_MOV_X,
- [BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K,
- [BPF_ALU | BPF_DIV | BPF_X] = &&ALU_DIV_X,
- [BPF_ALU | BPF_DIV | BPF_K] = &&ALU_DIV_K,
- [BPF_ALU | BPF_MOD | BPF_X] = &&ALU_MOD_X,
- [BPF_ALU | BPF_MOD | BPF_K] = &&ALU_MOD_K,
- [BPF_ALU | BPF_NEG] = &&ALU_NEG,
- [BPF_ALU | BPF_END | BPF_TO_BE] = &&ALU_END_TO_BE,
- [BPF_ALU | BPF_END | BPF_TO_LE] = &&ALU_END_TO_LE,
- /* 64 bit ALU operations */
- [BPF_ALU64 | BPF_ADD | BPF_X] = &&ALU64_ADD_X,
- [BPF_ALU64 | BPF_ADD | BPF_K] = &&ALU64_ADD_K,
- [BPF_ALU64 | BPF_SUB | BPF_X] = &&ALU64_SUB_X,
- [BPF_ALU64 | BPF_SUB | BPF_K] = &&ALU64_SUB_K,
- [BPF_ALU64 | BPF_AND | BPF_X] = &&ALU64_AND_X,
- [BPF_ALU64 | BPF_AND | BPF_K] = &&ALU64_AND_K,
- [BPF_ALU64 | BPF_OR | BPF_X] = &&ALU64_OR_X,
- [BPF_ALU64 | BPF_OR | BPF_K] = &&ALU64_OR_K,
- [BPF_ALU64 | BPF_LSH | BPF_X] = &&ALU64_LSH_X,
- [BPF_ALU64 | BPF_LSH | BPF_K] = &&ALU64_LSH_K,
- [BPF_ALU64 | BPF_RSH | BPF_X] = &&ALU64_RSH_X,
- [BPF_ALU64 | BPF_RSH | BPF_K] = &&ALU64_RSH_K,
- [BPF_ALU64 | BPF_XOR | BPF_X] = &&ALU64_XOR_X,
- [BPF_ALU64 | BPF_XOR | BPF_K] = &&ALU64_XOR_K,
- [BPF_ALU64 | BPF_MUL | BPF_X] = &&ALU64_MUL_X,
- [BPF_ALU64 | BPF_MUL | BPF_K] = &&ALU64_MUL_K,
- [BPF_ALU64 | BPF_MOV | BPF_X] = &&ALU64_MOV_X,
- [BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K,
- [BPF_ALU64 | BPF_ARSH | BPF_X] = &&ALU64_ARSH_X,
- [BPF_ALU64 | BPF_ARSH | BPF_K] = &&ALU64_ARSH_K,
- [BPF_ALU64 | BPF_DIV | BPF_X] = &&ALU64_DIV_X,
- [BPF_ALU64 | BPF_DIV | BPF_K] = &&ALU64_DIV_K,
- [BPF_ALU64 | BPF_MOD | BPF_X] = &&ALU64_MOD_X,
- [BPF_ALU64 | BPF_MOD | BPF_K] = &&ALU64_MOD_K,
- [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG,
- /* Call instruction */
- [BPF_JMP | BPF_CALL] = &&JMP_CALL,
- /* Jumps */
- [BPF_JMP | BPF_JA] = &&JMP_JA,
- [BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X,
- [BPF_JMP | BPF_JEQ | BPF_K] = &&JMP_JEQ_K,
- [BPF_JMP | BPF_JNE | BPF_X] = &&JMP_JNE_X,
- [BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K,
- [BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X,
- [BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K,
- [BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X,
- [BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K,
- [BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X,
- [BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K,
- [BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X,
- [BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K,
- [BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X,
- [BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K,
- /* Program return */
- [BPF_JMP | BPF_EXIT] = &&JMP_EXIT,
- /* Store instructions */
- [BPF_STX | BPF_MEM | BPF_B] = &&STX_MEM_B,
- [BPF_STX | BPF_MEM | BPF_H] = &&STX_MEM_H,
- [BPF_STX | BPF_MEM | BPF_W] = &&STX_MEM_W,
- [BPF_STX | BPF_MEM | BPF_DW] = &&STX_MEM_DW,
- [BPF_STX | BPF_XADD | BPF_W] = &&STX_XADD_W,
- [BPF_STX | BPF_XADD | BPF_DW] = &&STX_XADD_DW,
- [BPF_ST | BPF_MEM | BPF_B] = &&ST_MEM_B,
- [BPF_ST | BPF_MEM | BPF_H] = &&ST_MEM_H,
- [BPF_ST | BPF_MEM | BPF_W] = &&ST_MEM_W,
- [BPF_ST | BPF_MEM | BPF_DW] = &&ST_MEM_DW,
- /* Load instructions */
- [BPF_LDX | BPF_MEM | BPF_B] = &&LDX_MEM_B,
- [BPF_LDX | BPF_MEM | BPF_H] = &&LDX_MEM_H,
- [BPF_LDX | BPF_MEM | BPF_W] = &&LDX_MEM_W,
- [BPF_LDX | BPF_MEM | BPF_DW] = &&LDX_MEM_DW,
- [BPF_LD | BPF_ABS | BPF_W] = &&LD_ABS_W,
- [BPF_LD | BPF_ABS | BPF_H] = &&LD_ABS_H,
- [BPF_LD | BPF_ABS | BPF_B] = &&LD_ABS_B,
- [BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W,
- [BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H,
- [BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B,
- };
- void *ptr;
- int off;
-
-#define CONT ({ insn++; goto select_insn; })
-#define CONT_JMP ({ insn++; goto select_insn; })
-
- FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
- ARG1 = (u64) (unsigned long) ctx;
-
- /* Registers used in classic BPF programs need to be reset first. */
- regs[BPF_REG_A] = 0;
- regs[BPF_REG_X] = 0;
-
-select_insn:
- goto *jumptable[insn->code];
-
- /* ALU */
-#define ALU(OPCODE, OP) \
- ALU64_##OPCODE##_X: \
- DST = DST OP SRC; \
- CONT; \
- ALU_##OPCODE##_X: \
- DST = (u32) DST OP (u32) SRC; \
- CONT; \
- ALU64_##OPCODE##_K: \
- DST = DST OP IMM; \
- CONT; \
- ALU_##OPCODE##_K: \
- DST = (u32) DST OP (u32) IMM; \
- CONT;
-
- ALU(ADD, +)
- ALU(SUB, -)
- ALU(AND, &)
- ALU(OR, |)
- ALU(LSH, <<)
- ALU(RSH, >>)
- ALU(XOR, ^)
- ALU(MUL, *)
-#undef ALU
- ALU_NEG:
- DST = (u32) -DST;
- CONT;
- ALU64_NEG:
- DST = -DST;
- CONT;
- ALU_MOV_X:
- DST = (u32) SRC;
- CONT;
- ALU_MOV_K:
- DST = (u32) IMM;
- CONT;
- ALU64_MOV_X:
- DST = SRC;
- CONT;
- ALU64_MOV_K:
- DST = IMM;
- CONT;
- ALU64_ARSH_X:
- (*(s64 *) &DST) >>= SRC;
- CONT;
- ALU64_ARSH_K:
- (*(s64 *) &DST) >>= IMM;
- CONT;
- ALU64_MOD_X:
- if (unlikely(SRC == 0))
- return 0;
- tmp = DST;
- DST = do_div(tmp, SRC);
- CONT;
- ALU_MOD_X:
- if (unlikely(SRC == 0))
- return 0;
- tmp = (u32) DST;
- DST = do_div(tmp, (u32) SRC);
- CONT;
- ALU64_MOD_K:
- tmp = DST;
- DST = do_div(tmp, IMM);
- CONT;
- ALU_MOD_K:
- tmp = (u32) DST;
- DST = do_div(tmp, (u32) IMM);
- CONT;
- ALU64_DIV_X:
- if (unlikely(SRC == 0))
- return 0;
- do_div(DST, SRC);
- CONT;
- ALU_DIV_X:
- if (unlikely(SRC == 0))
- return 0;
- tmp = (u32) DST;
- do_div(tmp, (u32) SRC);
- DST = (u32) tmp;
- CONT;
- ALU64_DIV_K:
- do_div(DST, IMM);
- CONT;
- ALU_DIV_K:
- tmp = (u32) DST;
- do_div(tmp, (u32) IMM);
- DST = (u32) tmp;
- CONT;
- ALU_END_TO_BE:
- switch (IMM) {
- case 16:
- DST = (__force u16) cpu_to_be16(DST);
- break;
- case 32:
- DST = (__force u32) cpu_to_be32(DST);
- break;
- case 64:
- DST = (__force u64) cpu_to_be64(DST);
- break;
- }
- CONT;
- ALU_END_TO_LE:
- switch (IMM) {
- case 16:
- DST = (__force u16) cpu_to_le16(DST);
- break;
- case 32:
- DST = (__force u32) cpu_to_le32(DST);
- break;
- case 64:
- DST = (__force u64) cpu_to_le64(DST);
- break;
- }
- CONT;
-
- /* CALL */
- JMP_CALL:
- /* Function call scratches BPF_R1-BPF_R5 registers,
- * preserves BPF_R6-BPF_R9, and stores return value
- * into BPF_R0.
- */
- BPF_R0 = (__bpf_call_base + insn->imm)(BPF_R1, BPF_R2, BPF_R3,
- BPF_R4, BPF_R5);
- CONT;
-
- /* JMP */
- JMP_JA:
- insn += insn->off;
- CONT;
- JMP_JEQ_X:
- if (DST == SRC) {
- insn += insn->off;
- CONT_JMP;
- }
- CONT;
- JMP_JEQ_K:
- if (DST == IMM) {
- insn += insn->off;
- CONT_JMP;
- }
- CONT;
- JMP_JNE_X:
- if (DST != SRC) {
- insn += insn->off;
- CONT_JMP;
- }
- CONT;
- JMP_JNE_K:
- if (DST != IMM) {
- insn += insn->off;
- CONT_JMP;
- }
- CONT;
- JMP_JGT_X:
- if (DST > SRC) {
- insn += insn->off;
- CONT_JMP;
- }
- CONT;
- JMP_JGT_K:
- if (DST > IMM) {
- insn += insn->off;
- CONT_JMP;
- }
- CONT;
- JMP_JGE_X:
- if (DST >= SRC) {
- insn += insn->off;
- CONT_JMP;
- }
- CONT;
- JMP_JGE_K:
- if (DST >= IMM) {
- insn += insn->off;
- CONT_JMP;
- }
- CONT;
- JMP_JSGT_X:
- if (((s64) DST) > ((s64) SRC)) {
- insn += insn->off;
- CONT_JMP;
- }
- CONT;
- JMP_JSGT_K:
- if (((s64) DST) > ((s64) IMM)) {
- insn += insn->off;
- CONT_JMP;
- }
- CONT;
- JMP_JSGE_X:
- if (((s64) DST) >= ((s64) SRC)) {
- insn += insn->off;
- CONT_JMP;
- }
- CONT;
- JMP_JSGE_K:
- if (((s64) DST) >= ((s64) IMM)) {
- insn += insn->off;
- CONT_JMP;
- }
- CONT;
- JMP_JSET_X:
- if (DST & SRC) {
- insn += insn->off;
- CONT_JMP;
- }
- CONT;
- JMP_JSET_K:
- if (DST & IMM) {
- insn += insn->off;
- CONT_JMP;
- }
- CONT;
- JMP_EXIT:
- return BPF_R0;
-
- /* STX and ST and LDX*/
-#define LDST(SIZEOP, SIZE) \
- STX_MEM_##SIZEOP: \
- *(SIZE *)(unsigned long) (DST + insn->off) = SRC; \
- CONT; \
- ST_MEM_##SIZEOP: \
- *(SIZE *)(unsigned long) (DST + insn->off) = IMM; \
- CONT; \
- LDX_MEM_##SIZEOP: \
- DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
- CONT;
-
- LDST(B, u8)
- LDST(H, u16)
- LDST(W, u32)
- LDST(DW, u64)
-#undef LDST
- STX_XADD_W: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
- atomic_add((u32) SRC, (atomic_t *)(unsigned long)
- (DST + insn->off));
- CONT;
- STX_XADD_DW: /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */
- atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
- (DST + insn->off));
- CONT;
- LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */
- off = IMM;
-load_word:
- /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are
- * only appearing in the programs where ctx ==
- * skb. All programs keep 'ctx' in regs[BPF_REG_CTX]
- * == BPF_R6, sk_convert_filter() saves it in BPF_R6,
- * internal BPF verifier will check that BPF_R6 ==
- * ctx.
- *
- * BPF_ABS and BPF_IND are wrappers of function calls,
- * so they scratch BPF_R1-BPF_R5 registers, preserve
- * BPF_R6-BPF_R9, and store return value into BPF_R0.
- *
- * Implicit input:
- * ctx == skb == BPF_R6 == CTX
- *
- * Explicit input:
- * SRC == any register
- * IMM == 32-bit immediate
- *
- * Output:
- * BPF_R0 - 8/16/32-bit skb data converted to cpu endianness
- */
-
- ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp);
- if (likely(ptr != NULL)) {
- BPF_R0 = get_unaligned_be32(ptr);
- CONT;
- }
-
- return 0;
- LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */
- off = IMM;
-load_half:
- ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp);
- if (likely(ptr != NULL)) {
- BPF_R0 = get_unaligned_be16(ptr);
- CONT;
- }
-
- return 0;
- LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */
- off = IMM;
-load_byte:
- ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp);
- if (likely(ptr != NULL)) {
- BPF_R0 = *(u8 *)ptr;
- CONT;
- }
-
- return 0;
- LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + src_reg + imm32)) */
- off = IMM + SRC;
- goto load_word;
- LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + src_reg + imm32)) */
- off = IMM + SRC;
- goto load_half;
- LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + src_reg + imm32) */
- off = IMM + SRC;
- goto load_byte;
-
- default_label:
- /* If we ever reach this, we have a bug somewhere. */
- WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code);
- return 0;
-}
-
-/* Helper to find the offset of pkt_type in sk_buff structure. We want
- * to make sure its still a 3bit field starting at a byte boundary;
- * taken from arch/x86/net/bpf_jit_comp.c.
- */
-#ifdef __BIG_ENDIAN_BITFIELD
-#define PKT_TYPE_MAX (7 << 5)
-#else
-#define PKT_TYPE_MAX 7
-#endif
-static unsigned int pkt_type_offset(void)
-{
- struct sk_buff skb_probe = { .pkt_type = ~0, };
- u8 *ct = (u8 *) &skb_probe;
- unsigned int off;
-
- for (off = 0; off < sizeof(struct sk_buff); off++) {
- if (ct[off] == PKT_TYPE_MAX)
- return off;
- }
-
- pr_err_once("Please fix %s, as pkt_type couldn't be found!\n", __func__);
- return -1;
-}
-
static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
{
- return __skb_get_poff((struct sk_buff *)(unsigned long) ctx);
+ return skb_get_poff((struct sk_buff *)(unsigned long) ctx);
}
static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
@@ -667,9 +150,9 @@ static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
}
static bool convert_bpf_extensions(struct sock_filter *fp,
- struct sock_filter_int **insnp)
+ struct bpf_insn **insnp)
{
- struct sock_filter_int *insn = *insnp;
+ struct bpf_insn *insn = *insnp;
switch (fp->k) {
case SKF_AD_OFF + SKF_AD_PROTOCOL:
@@ -683,11 +166,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
break;
case SKF_AD_OFF + SKF_AD_PKTTYPE:
- *insn = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX,
- pkt_type_offset());
- if (insn->off < 0)
- return false;
- insn++;
+ *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX,
+ PKT_TYPE_OFFSET());
*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX);
#ifdef __BIG_ENDIAN_BITFIELD
insn++;
@@ -805,7 +285,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
}
/**
- * sk_convert_filter - convert filter program
+ * bpf_convert_filter - convert filter program
* @prog: the user passed filter program
* @len: the length of the user passed filter program
* @new_prog: buffer where converted program will be stored
@@ -815,12 +295,12 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
* Conversion workflow:
*
* 1) First pass for calculating the new program length:
- * sk_convert_filter(old_prog, old_len, NULL, &new_len)
+ * bpf_convert_filter(old_prog, old_len, NULL, &new_len)
*
* 2) 2nd pass to remap in two passes: 1st pass finds new
* jump offsets, 2nd pass remapping:
- * new_prog = kmalloc(sizeof(struct sock_filter_int) * new_len);
- * sk_convert_filter(old_prog, old_len, new_prog, &new_len);
+ * new_prog = kmalloc(sizeof(struct bpf_insn) * new_len);
+ * bpf_convert_filter(old_prog, old_len, new_prog, &new_len);
*
* User BPF's register A is mapped to our BPF register 6, user BPF
* register X is mapped to BPF register 7; frame pointer is always
@@ -828,11 +308,11 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
* for socket filters: ctx == 'struct sk_buff *', for seccomp:
* ctx == 'struct seccomp_data *'.
*/
-int sk_convert_filter(struct sock_filter *prog, int len,
- struct sock_filter_int *new_prog, int *new_len)
+int bpf_convert_filter(struct sock_filter *prog, int len,
+ struct bpf_insn *new_prog, int *new_len)
{
int new_flen = 0, pass = 0, target, i;
- struct sock_filter_int *new_insn;
+ struct bpf_insn *new_insn;
struct sock_filter *fp;
int *addrs = NULL;
u8 bpf_src;
@@ -858,8 +338,8 @@ do_pass:
new_insn++;
for (i = 0; i < len; fp++, i++) {
- struct sock_filter_int tmp_insns[6] = { };
- struct sock_filter_int *insn = tmp_insns;
+ struct bpf_insn tmp_insns[6] = { };
+ struct bpf_insn *insn = tmp_insns;
if (addrs)
addrs[i] = new_insn - new_prog;
@@ -1086,15 +566,12 @@ err:
/* Security:
*
- * A BPF program is able to use 16 cells of memory to store intermediate
- * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()).
- *
* As we dont want to clear mem[] array for each packet going through
- * sk_run_filter(), we check that filter loaded by user never try to read
+ * __bpf_prog_run(), we check that filter loaded by user never try to read
* a cell if not previously written, and we check all branches to be sure
* a malicious user doesn't try to abuse us.
*/
-static int check_load_and_stores(struct sock_filter *filter, int flen)
+static int check_load_and_stores(const struct sock_filter *filter, int flen)
{
u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */
int pc, ret = 0;
@@ -1214,7 +691,7 @@ static bool chk_code_allowed(u16 code_to_probe)
}
/**
- * sk_chk_filter - verify socket filter code
+ * bpf_check_classic - verify socket filter code
* @filter: filter to verify
* @flen: length of filter
*
@@ -1227,7 +704,7 @@ static bool chk_code_allowed(u16 code_to_probe)
*
* Returns 0 if the rule set is legal or -EINVAL if not.
*/
-int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
+int bpf_check_classic(const struct sock_filter *filter, unsigned int flen)
{
bool anc_found;
int pc;
@@ -1237,7 +714,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
/* Check the filter code now */
for (pc = 0; pc < flen; pc++) {
- struct sock_filter *ftest = &filter[pc];
+ const struct sock_filter *ftest = &filter[pc];
/* May we actually operate on this code? */
if (!chk_code_allowed(ftest->code))
@@ -1301,12 +778,12 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
return -EINVAL;
}
-EXPORT_SYMBOL(sk_chk_filter);
+EXPORT_SYMBOL(bpf_check_classic);
-static int sk_store_orig_filter(struct sk_filter *fp,
- const struct sock_fprog *fprog)
+static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
+ const struct sock_fprog *fprog)
{
- unsigned int fsize = sk_filter_proglen(fprog);
+ unsigned int fsize = bpf_classic_proglen(fprog);
struct sock_fprog_kern *fkprog;
fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL);
@@ -1324,7 +801,7 @@ static int sk_store_orig_filter(struct sk_filter *fp,
return 0;
}
-static void sk_release_orig_filter(struct sk_filter *fp)
+static void bpf_release_orig_filter(struct bpf_prog *fp)
{
struct sock_fprog_kern *fprog = fp->orig_prog;
@@ -1334,6 +811,18 @@ static void sk_release_orig_filter(struct sk_filter *fp)
}
}
+static void __bpf_prog_release(struct bpf_prog *prog)
+{
+ bpf_release_orig_filter(prog);
+ bpf_prog_free(prog);
+}
+
+static void __sk_filter_release(struct sk_filter *fp)
+{
+ __bpf_prog_release(fp->prog);
+ kfree(fp);
+}
+
/**
* sk_filter_release_rcu - Release a socket filter by rcu_head
* @rcu: rcu_head that contains the sk_filter to free
@@ -1342,8 +831,7 @@ static void sk_filter_release_rcu(struct rcu_head *rcu)
{
struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
- sk_release_orig_filter(fp);
- sk_filter_free(fp);
+ __sk_filter_release(fp);
}
/**
@@ -1360,44 +848,33 @@ static void sk_filter_release(struct sk_filter *fp)
void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
{
- atomic_sub(sk_filter_size(fp->len), &sk->sk_omem_alloc);
- sk_filter_release(fp);
-}
+ u32 filter_size = bpf_prog_size(fp->prog->len);
-void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
-{
- atomic_inc(&fp->refcnt);
- atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc);
+ atomic_sub(filter_size, &sk->sk_omem_alloc);
+ sk_filter_release(fp);
}
-static struct sk_filter *__sk_migrate_realloc(struct sk_filter *fp,
- struct sock *sk,
- unsigned int len)
+/* try to charge the socket memory if there is space available
+ * return true on success
+ */
+bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
{
- struct sk_filter *fp_new;
-
- if (sk == NULL)
- return krealloc(fp, len, GFP_KERNEL);
-
- fp_new = sock_kmalloc(sk, len, GFP_KERNEL);
- if (fp_new) {
- *fp_new = *fp;
- /* As we're keeping orig_prog in fp_new along,
- * we need to make sure we're not evicting it
- * from the old fp.
- */
- fp->orig_prog = NULL;
- sk_filter_uncharge(sk, fp);
+ u32 filter_size = bpf_prog_size(fp->prog->len);
+
+ /* same check as in sock_kmalloc() */
+ if (filter_size <= sysctl_optmem_max &&
+ atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
+ atomic_inc(&fp->refcnt);
+ atomic_add(filter_size, &sk->sk_omem_alloc);
+ return true;
}
-
- return fp_new;
+ return false;
}
-static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
- struct sock *sk)
+static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
{
struct sock_filter *old_prog;
- struct sk_filter *old_fp;
+ struct bpf_prog *old_fp;
int err, new_len, old_len = fp->len;
/* We are free to overwrite insns et al right here as it
@@ -1406,7 +883,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
* representation.
*/
BUILD_BUG_ON(sizeof(struct sock_filter) !=
- sizeof(struct sock_filter_int));
+ sizeof(struct bpf_insn));
/* Conversion cannot happen on overlapping memory areas,
* so we need to keep the user BPF around until the 2nd
@@ -1420,13 +897,13 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
}
/* 1st pass: calculate the new program length. */
- err = sk_convert_filter(old_prog, old_len, NULL, &new_len);
+ err = bpf_convert_filter(old_prog, old_len, NULL, &new_len);
if (err)
goto out_err_free;
/* Expand fp for appending the new filter representation. */
old_fp = fp;
- fp = __sk_migrate_realloc(old_fp, sk, sk_filter_size(new_len));
+ fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0);
if (!fp) {
/* The old_fp is still around in case we couldn't
* allocate new memory, so uncharge on that one.
@@ -1438,17 +915,17 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
fp->len = new_len;
- /* 2nd pass: remap sock_filter insns into sock_filter_int insns. */
- err = sk_convert_filter(old_prog, old_len, fp->insnsi, &new_len);
+ /* 2nd pass: remap sock_filter insns into bpf_insn insns. */
+ err = bpf_convert_filter(old_prog, old_len, fp->insnsi, &new_len);
if (err)
- /* 2nd sk_convert_filter() can fail only if it fails
+ /* 2nd bpf_convert_filter() can fail only if it fails
* to allocate memory, remapping must succeed. Note,
* that at this time old_fp has already been released
- * by __sk_migrate_realloc().
+ * by krealloc().
*/
goto out_err_free;
- sk_filter_select_runtime(fp);
+ bpf_prog_select_runtime(fp);
kfree(old_prog);
return fp;
@@ -1456,55 +933,20 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
out_err_free:
kfree(old_prog);
out_err:
- /* Rollback filter setup. */
- if (sk != NULL)
- sk_filter_uncharge(sk, fp);
- else
- kfree(fp);
+ __bpf_prog_release(fp);
return ERR_PTR(err);
}
-void __weak bpf_int_jit_compile(struct sk_filter *prog)
-{
-}
-
-/**
- * sk_filter_select_runtime - select execution runtime for BPF program
- * @fp: sk_filter populated with internal BPF program
- *
- * try to JIT internal BPF program, if JIT is not available select interpreter
- * BPF program will be executed via SK_RUN_FILTER() macro
- */
-void sk_filter_select_runtime(struct sk_filter *fp)
-{
- fp->bpf_func = (void *) __sk_run_filter;
-
- /* Probe if internal BPF can be JITed */
- bpf_int_jit_compile(fp);
-}
-EXPORT_SYMBOL_GPL(sk_filter_select_runtime);
-
-/* free internal BPF program */
-void sk_filter_free(struct sk_filter *fp)
-{
- bpf_jit_free(fp);
-}
-EXPORT_SYMBOL_GPL(sk_filter_free);
-
-static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
- struct sock *sk)
+static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp)
{
int err;
fp->bpf_func = NULL;
- fp->jited = 0;
+ fp->jited = false;
- err = sk_chk_filter(fp->insns, fp->len);
+ err = bpf_check_classic(fp->insns, fp->len);
if (err) {
- if (sk != NULL)
- sk_filter_uncharge(sk, fp);
- else
- kfree(fp);
+ __bpf_prog_release(fp);
return ERR_PTR(err);
}
@@ -1517,13 +959,13 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
* internal BPF translation for the optimized interpreter.
*/
if (!fp->jited)
- fp = __sk_migrate_filter(fp, sk);
+ fp = bpf_migrate_filter(fp);
return fp;
}
/**
- * sk_unattached_filter_create - create an unattached filter
+ * bpf_prog_create - create an unattached filter
* @pfp: the unattached filter that is created
* @fprog: the filter program
*
@@ -1532,23 +974,21 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
* If an error occurs or there is insufficient memory for the filter
* a negative errno code is returned. On success the return is zero.
*/
-int sk_unattached_filter_create(struct sk_filter **pfp,
- struct sock_fprog_kern *fprog)
+int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
{
- unsigned int fsize = sk_filter_proglen(fprog);
- struct sk_filter *fp;
+ unsigned int fsize = bpf_classic_proglen(fprog);
+ struct bpf_prog *fp;
/* Make sure new filter is there and in the right amounts. */
if (fprog->filter == NULL)
return -EINVAL;
- fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL);
+ fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
if (!fp)
return -ENOMEM;
memcpy(fp->insns, fprog->filter, fsize);
- atomic_set(&fp->refcnt, 1);
fp->len = fprog->len;
/* Since unattached filters are not copied back to user
* space through sk_get_filter(), we do not need to hold
@@ -1556,23 +996,23 @@ int sk_unattached_filter_create(struct sk_filter **pfp,
*/
fp->orig_prog = NULL;
- /* __sk_prepare_filter() already takes care of uncharging
+ /* bpf_prepare_filter() already takes care of freeing
* memory in case something goes wrong.
*/
- fp = __sk_prepare_filter(fp, NULL);
+ fp = bpf_prepare_filter(fp);
if (IS_ERR(fp))
return PTR_ERR(fp);
*pfp = fp;
return 0;
}
-EXPORT_SYMBOL_GPL(sk_unattached_filter_create);
+EXPORT_SYMBOL_GPL(bpf_prog_create);
-void sk_unattached_filter_destroy(struct sk_filter *fp)
+void bpf_prog_destroy(struct bpf_prog *fp)
{
- sk_filter_release(fp);
+ __bpf_prog_release(fp);
}
-EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy);
+EXPORT_SYMBOL_GPL(bpf_prog_destroy);
/**
* sk_attach_filter - attach a socket filter
@@ -1587,8 +1027,9 @@ EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy);
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
struct sk_filter *fp, *old_fp;
- unsigned int fsize = sk_filter_proglen(fprog);
- unsigned int sk_fsize = sk_filter_size(fprog->len);
+ unsigned int fsize = bpf_classic_proglen(fprog);
+ unsigned int bpf_fsize = bpf_prog_size(fprog->len);
+ struct bpf_prog *prog;
int err;
if (sock_flag(sk, SOCK_FILTER_LOCKED))
@@ -1598,30 +1039,43 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
if (fprog->filter == NULL)
return -EINVAL;
- fp = sock_kmalloc(sk, sk_fsize, GFP_KERNEL);
- if (!fp)
+ prog = bpf_prog_alloc(bpf_fsize, 0);
+ if (!prog)
return -ENOMEM;
- if (copy_from_user(fp->insns, fprog->filter, fsize)) {
- sock_kfree_s(sk, fp, sk_fsize);
+ if (copy_from_user(prog->insns, fprog->filter, fsize)) {
+ __bpf_prog_free(prog);
return -EFAULT;
}
- atomic_set(&fp->refcnt, 1);
- fp->len = fprog->len;
+ prog->len = fprog->len;
- err = sk_store_orig_filter(fp, fprog);
+ err = bpf_prog_store_orig_filter(prog, fprog);
if (err) {
- sk_filter_uncharge(sk, fp);
+ __bpf_prog_free(prog);
return -ENOMEM;
}
- /* __sk_prepare_filter() already takes care of uncharging
+ /* bpf_prepare_filter() already takes care of freeing
* memory in case something goes wrong.
*/
- fp = __sk_prepare_filter(fp, sk);
- if (IS_ERR(fp))
- return PTR_ERR(fp);
+ prog = bpf_prepare_filter(prog);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ fp = kmalloc(sizeof(*fp), GFP_KERNEL);
+ if (!fp) {
+ __bpf_prog_release(prog);
+ return -ENOMEM;
+ }
+ fp->prog = prog;
+
+ atomic_set(&fp->refcnt, 0);
+
+ if (!sk_filter_charge(sk, fp)) {
+ __sk_filter_release(fp);
+ return -ENOMEM;
+ }
old_fp = rcu_dereference_protected(sk->sk_filter,
sock_owned_by_user(sk));
@@ -1670,7 +1124,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
/* We're copying the filter that has been originally attached,
* so no conversion/decode needed anymore.
*/
- fprog = filter->orig_prog;
+ fprog = filter->prog->orig_prog;
ret = fprog->len;
if (!len)
@@ -1682,7 +1136,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
goto out;
ret = -EFAULT;
- if (copy_to_user(ubuf, fprog->filter, sk_filter_proglen(fprog)))
+ if (copy_to_user(ubuf, fprog->filter, bpf_classic_proglen(fprog)))
goto out;
/* Instead of bytes, the API requests to return the number
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 107ed12a5323..45084938c403 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -13,6 +13,7 @@
#include <linux/if_pppox.h>
#include <linux/ppp_defs.h>
#include <net/flow_keys.h>
+#include <scsi/fc/fc_fcoe.h>
/* copy saddr & daddr, possibly using 64bit load/store
* Equivalent to : flow->src = iph->saddr;
@@ -26,36 +27,61 @@ static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *i
}
/**
- * skb_flow_get_ports - extract the upper layer ports and return them
- * @skb: buffer to extract the ports from
+ * __skb_flow_get_ports - extract the upper layer ports and return them
+ * @skb: sk_buff to extract the ports from
* @thoff: transport header offset
* @ip_proto: protocol for which to get port offset
+ * @data: raw buffer pointer to the packet, if NULL use skb->data
+ * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
*
* The function will try to retrieve the ports at offset thoff + poff where poff
* is the protocol port offset returned from proto_ports_offset
*/
-__be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto)
+__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
+ void *data, int hlen)
{
int poff = proto_ports_offset(ip_proto);
+ if (!data) {
+ data = skb->data;
+ hlen = skb_headlen(skb);
+ }
+
if (poff >= 0) {
__be32 *ports, _ports;
- ports = skb_header_pointer(skb, thoff + poff,
- sizeof(_ports), &_ports);
+ ports = __skb_header_pointer(skb, thoff + poff,
+ sizeof(_ports), data, hlen, &_ports);
if (ports)
return *ports;
}
return 0;
}
-EXPORT_SYMBOL(skb_flow_get_ports);
+EXPORT_SYMBOL(__skb_flow_get_ports);
-bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow)
+/**
+ * __skb_flow_dissect - extract the flow_keys struct and return it
+ * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
+ * @data: raw buffer pointer to the packet, if NULL use skb->data
+ * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol
+ * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb)
+ * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
+ *
+ * The function will try to retrieve the struct flow_keys from either the skbuff
+ * or a raw buffer specified by the rest parameters
+ */
+bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow,
+ void *data, __be16 proto, int nhoff, int hlen)
{
- int nhoff = skb_network_offset(skb);
u8 ip_proto;
- __be16 proto = skb->protocol;
+
+ if (!data) {
+ data = skb->data;
+ proto = skb->protocol;
+ nhoff = skb_network_offset(skb);
+ hlen = skb_headlen(skb);
+ }
memset(flow, 0, sizeof(*flow));
@@ -65,7 +91,7 @@ again:
const struct iphdr *iph;
struct iphdr _iph;
ip:
- iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
+ iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
if (!iph || iph->ihl < 5)
return false;
nhoff += iph->ihl * 4;
@@ -74,21 +100,50 @@ ip:
if (ip_is_fragment(iph))
ip_proto = 0;
+ /* skip the address processing if skb is NULL. The assumption
+ * here is that if there is no skb we are not looking for flow
+ * info but lengths and protocols.
+ */
+ if (!skb)
+ break;
+
iph_to_flow_copy_addrs(flow, iph);
break;
}
case htons(ETH_P_IPV6): {
const struct ipv6hdr *iph;
struct ipv6hdr _iph;
+ __be32 flow_label;
+
ipv6:
- iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
+ iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
if (!iph)
return false;
ip_proto = iph->nexthdr;
+ nhoff += sizeof(struct ipv6hdr);
+
+ /* see comment above in IPv4 section */
+ if (!skb)
+ break;
+
flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
- nhoff += sizeof(struct ipv6hdr);
+
+ flow_label = ip6_flowlabel(iph);
+ if (flow_label) {
+ /* Awesome, IPv6 packet has a flow label so we can
+ * use that to represent the ports without any
+ * further dissection.
+ */
+ flow->n_proto = proto;
+ flow->ip_proto = ip_proto;
+ flow->ports = flow_label;
+ flow->thoff = (u16)nhoff;
+
+ return true;
+ }
+
break;
}
case htons(ETH_P_8021AD):
@@ -96,7 +151,7 @@ ipv6:
const struct vlan_hdr *vlan;
struct vlan_hdr _vlan;
- vlan = skb_header_pointer(skb, nhoff, sizeof(_vlan), &_vlan);
+ vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), data, hlen, &_vlan);
if (!vlan)
return false;
@@ -109,7 +164,7 @@ ipv6:
struct pppoe_hdr hdr;
__be16 proto;
} *hdr, _hdr;
- hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr);
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
if (!hdr)
return false;
proto = hdr->proto;
@@ -123,6 +178,9 @@ ipv6:
return false;
}
}
+ case htons(ETH_P_FCOE):
+ flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN);
+ /* fall through */
default:
return false;
}
@@ -134,7 +192,7 @@ ipv6:
__be16 proto;
} *hdr, _hdr;
- hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr);
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
if (!hdr)
return false;
/*
@@ -154,8 +212,9 @@ ipv6:
const struct ethhdr *eth;
struct ethhdr _eth;
- eth = skb_header_pointer(skb, nhoff,
- sizeof(_eth), &_eth);
+ eth = __skb_header_pointer(skb, nhoff,
+ sizeof(_eth),
+ data, hlen, &_eth);
if (!eth)
return false;
proto = eth->h_proto;
@@ -175,13 +234,18 @@ ipv6:
break;
}
+ flow->n_proto = proto;
flow->ip_proto = ip_proto;
- flow->ports = skb_flow_get_ports(skb, nhoff, ip_proto);
flow->thoff = (u16) nhoff;
+ /* unless skb is set we don't need to record port info */
+ if (skb)
+ flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
+ data, hlen);
+
return true;
}
-EXPORT_SYMBOL(skb_flow_dissect);
+EXPORT_SYMBOL(__skb_flow_dissect);
static u32 hashrnd __read_mostly;
static __always_inline void __flow_hash_secret_init(void)
@@ -195,11 +259,32 @@ static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c)
return jhash_3words(a, b, c, hashrnd);
}
-static __always_inline u32 __flow_hash_1word(u32 a)
+static inline u32 __flow_hash_from_keys(struct flow_keys *keys)
{
- __flow_hash_secret_init();
- return jhash_1word(a, hashrnd);
+ u32 hash;
+
+ /* get a consistent hash (same value on both flow directions) */
+ if (((__force u32)keys->dst < (__force u32)keys->src) ||
+ (((__force u32)keys->dst == (__force u32)keys->src) &&
+ ((__force u16)keys->port16[1] < (__force u16)keys->port16[0]))) {
+ swap(keys->dst, keys->src);
+ swap(keys->port16[0], keys->port16[1]);
+ }
+
+ hash = __flow_hash_3words((__force u32)keys->dst,
+ (__force u32)keys->src,
+ (__force u32)keys->ports);
+ if (!hash)
+ hash = 1;
+
+ return hash;
+}
+
+u32 flow_hash_from_keys(struct flow_keys *keys)
+{
+ return __flow_hash_from_keys(keys);
}
+EXPORT_SYMBOL(flow_hash_from_keys);
/*
* __skb_get_hash: calculate a flow hash based on src/dst addresses
@@ -210,7 +295,6 @@ static __always_inline u32 __flow_hash_1word(u32 a)
void __skb_get_hash(struct sk_buff *skb)
{
struct flow_keys keys;
- u32 hash;
if (!skb_flow_dissect(skb, &keys))
return;
@@ -218,21 +302,9 @@ void __skb_get_hash(struct sk_buff *skb)
if (keys.ports)
skb->l4_hash = 1;
- /* get a consistent hash (same value on both flow directions) */
- if (((__force u32)keys.dst < (__force u32)keys.src) ||
- (((__force u32)keys.dst == (__force u32)keys.src) &&
- ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) {
- swap(keys.dst, keys.src);
- swap(keys.port16[0], keys.port16[1]);
- }
-
- hash = __flow_hash_3words((__force u32)keys.dst,
- (__force u32)keys.src,
- (__force u32)keys.ports);
- if (!hash)
- hash = 1;
+ skb->sw_hash = 1;
- skb->hash = hash;
+ skb->hash = __flow_hash_from_keys(&keys);
}
EXPORT_SYMBOL(__skb_get_hash);
@@ -240,7 +312,7 @@ EXPORT_SYMBOL(__skb_get_hash);
* Returns a Tx hash based on the given packet descriptor a Tx queues' number
* to be used as a distribution range.
*/
-u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
+u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
unsigned int num_tx_queues)
{
u32 hash;
@@ -260,40 +332,27 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
qcount = dev->tc_to_txq[tc].count;
}
- if (skb->sk && skb->sk->sk_hash)
- hash = skb->sk->sk_hash;
- else
- hash = (__force u16) skb->protocol;
- hash = __flow_hash_1word(hash);
-
- return (u16) (((u64) hash * qcount) >> 32) + qoffset;
+ return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
}
EXPORT_SYMBOL(__skb_tx_hash);
-/* __skb_get_poff() returns the offset to the payload as far as it could
- * be dissected. The main user is currently BPF, so that we can dynamically
- * truncate packets without needing to push actual payload to the user
- * space and can analyze headers only, instead.
- */
-u32 __skb_get_poff(const struct sk_buff *skb)
+u32 __skb_get_poff(const struct sk_buff *skb, void *data,
+ const struct flow_keys *keys, int hlen)
{
- struct flow_keys keys;
- u32 poff = 0;
+ u32 poff = keys->thoff;
- if (!skb_flow_dissect(skb, &keys))
- return 0;
-
- poff += keys.thoff;
- switch (keys.ip_proto) {
+ switch (keys->ip_proto) {
case IPPROTO_TCP: {
- const struct tcphdr *tcph;
- struct tcphdr _tcph;
+ /* access doff as u8 to avoid unaligned access */
+ const u8 *doff;
+ u8 _doff;
- tcph = skb_header_pointer(skb, poff, sizeof(_tcph), &_tcph);
- if (!tcph)
+ doff = __skb_header_pointer(skb, poff + 12, sizeof(_doff),
+ data, hlen, &_doff);
+ if (!doff)
return poff;
- poff += max_t(u32, sizeof(struct tcphdr), tcph->doff * 4);
+ poff += max_t(u32, sizeof(struct tcphdr), (*doff & 0xF0) >> 2);
break;
}
case IPPROTO_UDP:
@@ -323,6 +382,21 @@ u32 __skb_get_poff(const struct sk_buff *skb)
return poff;
}
+/* skb_get_poff() returns the offset to the payload as far as it could
+ * be dissected. The main user is currently BPF, so that we can dynamically
+ * truncate packets without needing to push actual payload to the user
+ * space and can analyze headers only, instead.
+ */
+u32 skb_get_poff(const struct sk_buff *skb)
+{
+ struct flow_keys keys;
+
+ if (!skb_flow_dissect(skb, &keys))
+ return 0;
+
+ return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb));
+}
+
static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
{
#ifdef CONFIG_XPS
@@ -338,17 +412,9 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
if (map) {
if (map->len == 1)
queue_index = map->queues[0];
- else {
- u32 hash;
- if (skb->sk && skb->sk->sk_hash)
- hash = skb->sk->sk_hash;
- else
- hash = (__force u16) skb->protocol ^
- skb->hash;
- hash = __flow_hash_1word(hash);
- queue_index = map->queues[
- ((u64)hash * map->len) >> 32];
- }
+ else
+ queue_index = map->queues[reciprocal_scale(skb_get_hash(skb),
+ map->len)];
if (unlikely(queue_index >= dev->real_num_tx_queues))
queue_index = -1;
}
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 6b5b6e7013ca..9dfb88a933e7 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -91,6 +91,8 @@ struct gen_estimator
u32 avpps;
struct rcu_head e_rcu;
struct rb_node node;
+ struct gnet_stats_basic_cpu __percpu *cpu_bstats;
+ struct rcu_head head;
};
struct gen_estimator_head
@@ -115,9 +117,8 @@ static void est_timer(unsigned long arg)
rcu_read_lock();
list_for_each_entry_rcu(e, &elist[idx].list, list) {
- u64 nbytes;
+ struct gnet_stats_basic_packed b = {0};
u64 brate;
- u32 npackets;
u32 rate;
spin_lock(e->stats_lock);
@@ -125,15 +126,15 @@ static void est_timer(unsigned long arg)
if (e->bstats == NULL)
goto skip;
- nbytes = e->bstats->bytes;
- npackets = e->bstats->packets;
- brate = (nbytes - e->last_bytes)<<(7 - idx);
- e->last_bytes = nbytes;
+ __gnet_stats_copy_basic(&b, e->cpu_bstats, e->bstats);
+
+ brate = (b.bytes - e->last_bytes)<<(7 - idx);
+ e->last_bytes = b.bytes;
e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log);
e->rate_est->bps = (e->avbps+0xF)>>5;
- rate = (npackets - e->last_packets)<<(12 - idx);
- e->last_packets = npackets;
+ rate = (b.packets - e->last_packets)<<(12 - idx);
+ e->last_packets = b.packets;
e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log);
e->rate_est->pps = (e->avpps+0x1FF)>>10;
skip:
@@ -197,18 +198,20 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
* as destination. A new timer with the interval specified in the
* configuration TLV is created. Upon each interval, the latest statistics
* will be read from &bstats and the estimated rate will be stored in
- * &rate_est with the statistics lock grabed during this period.
+ * &rate_est with the statistics lock grabbed during this period.
*
* Returns 0 on success or a negative error code.
*
*/
int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
+ struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct gnet_stats_rate_est64 *rate_est,
spinlock_t *stats_lock,
struct nlattr *opt)
{
struct gen_estimator *est;
struct gnet_estimator *parm = nla_data(opt);
+ struct gnet_stats_basic_packed b = {0};
int idx;
if (nla_len(opt) < sizeof(*parm))
@@ -221,15 +224,18 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
if (est == NULL)
return -ENOBUFS;
+ __gnet_stats_copy_basic(&b, cpu_bstats, bstats);
+
idx = parm->interval + 2;
est->bstats = bstats;
est->rate_est = rate_est;
est->stats_lock = stats_lock;
est->ewma_log = parm->ewma_log;
- est->last_bytes = bstats->bytes;
+ est->last_bytes = b.bytes;
est->avbps = rate_est->bps<<5;
- est->last_packets = bstats->packets;
+ est->last_packets = b.packets;
est->avpps = rate_est->pps<<10;
+ est->cpu_bstats = cpu_bstats;
spin_lock_bh(&est_tree_lock);
if (!elist[idx].timer.function) {
@@ -290,11 +296,12 @@ EXPORT_SYMBOL(gen_kill_estimator);
* Returns 0 on success or a negative error code.
*/
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
+ struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct gnet_stats_rate_est64 *rate_est,
spinlock_t *stats_lock, struct nlattr *opt)
{
gen_kill_estimator(bstats, rate_est);
- return gen_new_estimator(bstats, rate_est, stats_lock, opt);
+ return gen_new_estimator(bstats, cpu_bstats, rate_est, stats_lock, opt);
}
EXPORT_SYMBOL(gen_replace_estimator);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 9d3d9e78397b..0c08062d1796 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -97,6 +97,43 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
}
EXPORT_SYMBOL(gnet_stats_start_copy);
+static void
+__gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats,
+ struct gnet_stats_basic_cpu __percpu *cpu)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ struct gnet_stats_basic_cpu *bcpu = per_cpu_ptr(cpu, i);
+ unsigned int start;
+ u64 bytes;
+ u32 packets;
+
+ do {
+ start = u64_stats_fetch_begin_irq(&bcpu->syncp);
+ bytes = bcpu->bstats.bytes;
+ packets = bcpu->bstats.packets;
+ } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start));
+
+ bstats->bytes += bytes;
+ bstats->packets += packets;
+ }
+}
+
+void
+__gnet_stats_copy_basic(struct gnet_stats_basic_packed *bstats,
+ struct gnet_stats_basic_cpu __percpu *cpu,
+ struct gnet_stats_basic_packed *b)
+{
+ if (cpu) {
+ __gnet_stats_copy_basic_cpu(bstats, cpu);
+ } else {
+ bstats->bytes = b->bytes;
+ bstats->packets = b->packets;
+ }
+}
+EXPORT_SYMBOL(__gnet_stats_copy_basic);
+
/**
* gnet_stats_copy_basic - copy basic statistics into statistic TLV
* @d: dumping handle
@@ -109,19 +146,25 @@ EXPORT_SYMBOL(gnet_stats_start_copy);
* if the room in the socket buffer was not sufficient.
*/
int
-gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b)
+gnet_stats_copy_basic(struct gnet_dump *d,
+ struct gnet_stats_basic_cpu __percpu *cpu,
+ struct gnet_stats_basic_packed *b)
{
+ struct gnet_stats_basic_packed bstats = {0};
+
+ __gnet_stats_copy_basic(&bstats, cpu, b);
+
if (d->compat_tc_stats) {
- d->tc_stats.bytes = b->bytes;
- d->tc_stats.packets = b->packets;
+ d->tc_stats.bytes = bstats.bytes;
+ d->tc_stats.packets = bstats.packets;
}
if (d->tail) {
struct gnet_stats_basic sb;
memset(&sb, 0, sizeof(sb));
- sb.bytes = b->bytes;
- sb.packets = b->packets;
+ sb.bytes = bstats.bytes;
+ sb.packets = bstats.packets;
return gnet_stats_copy(d, TCA_STATS_BASIC, &sb, sizeof(sb));
}
return 0;
@@ -172,29 +215,74 @@ gnet_stats_copy_rate_est(struct gnet_dump *d,
}
EXPORT_SYMBOL(gnet_stats_copy_rate_est);
+static void
+__gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats,
+ const struct gnet_stats_queue __percpu *q)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i);
+
+ qstats->qlen = 0;
+ qstats->backlog += qcpu->backlog;
+ qstats->drops += qcpu->drops;
+ qstats->requeues += qcpu->requeues;
+ qstats->overlimits += qcpu->overlimits;
+ }
+}
+
+static void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
+ const struct gnet_stats_queue __percpu *cpu,
+ const struct gnet_stats_queue *q,
+ __u32 qlen)
+{
+ if (cpu) {
+ __gnet_stats_copy_queue_cpu(qstats, cpu);
+ } else {
+ qstats->qlen = q->qlen;
+ qstats->backlog = q->backlog;
+ qstats->drops = q->drops;
+ qstats->requeues = q->requeues;
+ qstats->overlimits = q->overlimits;
+ }
+
+ qstats->qlen = qlen;
+}
+
/**
* gnet_stats_copy_queue - copy queue statistics into statistics TLV
* @d: dumping handle
+ * @cpu_q: per cpu queue statistics
* @q: queue statistics
+ * @qlen: queue length statistics
*
* Appends the queue statistics to the top level TLV created by
- * gnet_stats_start_copy().
+ * gnet_stats_start_copy(). Using per cpu queue statistics if
+ * they are available.
*
* Returns 0 on success or -1 with the statistic lock released
* if the room in the socket buffer was not sufficient.
*/
int
-gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue *q)
+gnet_stats_copy_queue(struct gnet_dump *d,
+ struct gnet_stats_queue __percpu *cpu_q,
+ struct gnet_stats_queue *q, __u32 qlen)
{
+ struct gnet_stats_queue qstats = {0};
+
+ __gnet_stats_copy_queue(&qstats, cpu_q, q, qlen);
+
if (d->compat_tc_stats) {
- d->tc_stats.drops = q->drops;
- d->tc_stats.qlen = q->qlen;
- d->tc_stats.backlog = q->backlog;
- d->tc_stats.overlimits = q->overlimits;
+ d->tc_stats.drops = qstats.drops;
+ d->tc_stats.qlen = qstats.qlen;
+ d->tc_stats.backlog = qstats.backlog;
+ d->tc_stats.overlimits = qstats.overlimits;
}
if (d->tail)
- return gnet_stats_copy(d, TCA_STATS_QUEUE, q, sizeof(*q));
+ return gnet_stats_copy(d, TCA_STATS_QUEUE,
+ &qstats, sizeof(qstats));
return 0;
}
@@ -206,7 +294,7 @@ EXPORT_SYMBOL(gnet_stats_copy_queue);
* @st: application specific statistics data
* @len: length of data
*
- * Appends the application sepecific statistics to the top level TLV created by
+ * Appends the application specific statistics to the top level TLV created by
* gnet_stats_start_copy() and remembers the data for XSTATS if the dumping
* handle is in backward compatibility mode.
*
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 1cac29ebb05b..9dd06699b09c 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -43,12 +43,12 @@ static ssize_t netdev_show(const struct device *dev,
struct device_attribute *attr, char *buf,
ssize_t (*format)(const struct net_device *, char *))
{
- struct net_device *net = to_net_dev(dev);
+ struct net_device *ndev = to_net_dev(dev);
ssize_t ret = -EINVAL;
read_lock(&dev_base_lock);
- if (dev_isalive(net))
- ret = (*format)(net, buf);
+ if (dev_isalive(ndev))
+ ret = (*format)(ndev, buf);
read_unlock(&dev_base_lock);
return ret;
@@ -56,9 +56,9 @@ static ssize_t netdev_show(const struct device *dev,
/* generate a show function for simple field */
#define NETDEVICE_SHOW(field, format_string) \
-static ssize_t format_##field(const struct net_device *net, char *buf) \
+static ssize_t format_##field(const struct net_device *dev, char *buf) \
{ \
- return sprintf(buf, format_string, net->field); \
+ return sprintf(buf, format_string, dev->field); \
} \
static ssize_t field##_show(struct device *dev, \
struct device_attribute *attr, char *buf) \
@@ -112,16 +112,35 @@ NETDEVICE_SHOW_RO(ifindex, fmt_dec);
NETDEVICE_SHOW_RO(type, fmt_dec);
NETDEVICE_SHOW_RO(link_mode, fmt_dec);
+static ssize_t format_name_assign_type(const struct net_device *dev, char *buf)
+{
+ return sprintf(buf, fmt_dec, dev->name_assign_type);
+}
+
+static ssize_t name_assign_type_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct net_device *ndev = to_net_dev(dev);
+ ssize_t ret = -EINVAL;
+
+ if (ndev->name_assign_type != NET_NAME_UNKNOWN)
+ ret = netdev_show(dev, attr, buf, format_name_assign_type);
+
+ return ret;
+}
+static DEVICE_ATTR_RO(name_assign_type);
+
/* use same locking rules as GIFHWADDR ioctl's */
static ssize_t address_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
- struct net_device *net = to_net_dev(dev);
+ struct net_device *ndev = to_net_dev(dev);
ssize_t ret = -EINVAL;
read_lock(&dev_base_lock);
- if (dev_isalive(net))
- ret = sysfs_format_mac(buf, net->dev_addr, net->addr_len);
+ if (dev_isalive(ndev))
+ ret = sysfs_format_mac(buf, ndev->dev_addr, ndev->addr_len);
read_unlock(&dev_base_lock);
return ret;
}
@@ -130,18 +149,18 @@ static DEVICE_ATTR_RO(address);
static ssize_t broadcast_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct net_device *net = to_net_dev(dev);
- if (dev_isalive(net))
- return sysfs_format_mac(buf, net->broadcast, net->addr_len);
+ struct net_device *ndev = to_net_dev(dev);
+ if (dev_isalive(ndev))
+ return sysfs_format_mac(buf, ndev->broadcast, ndev->addr_len);
return -EINVAL;
}
static DEVICE_ATTR_RO(broadcast);
-static int change_carrier(struct net_device *net, unsigned long new_carrier)
+static int change_carrier(struct net_device *dev, unsigned long new_carrier)
{
- if (!netif_running(net))
+ if (!netif_running(dev))
return -EINVAL;
- return dev_change_carrier(net, (bool) new_carrier);
+ return dev_change_carrier(dev, (bool) new_carrier);
}
static ssize_t carrier_store(struct device *dev, struct device_attribute *attr,
@@ -265,9 +284,9 @@ static DEVICE_ATTR_RO(carrier_changes);
/* read-write attributes */
-static int change_mtu(struct net_device *net, unsigned long new_mtu)
+static int change_mtu(struct net_device *dev, unsigned long new_mtu)
{
- return dev_set_mtu(net, (int) new_mtu);
+ return dev_set_mtu(dev, (int) new_mtu);
}
static ssize_t mtu_store(struct device *dev, struct device_attribute *attr,
@@ -277,9 +296,9 @@ static ssize_t mtu_store(struct device *dev, struct device_attribute *attr,
}
NETDEVICE_SHOW_RW(mtu, fmt_dec);
-static int change_flags(struct net_device *net, unsigned long new_flags)
+static int change_flags(struct net_device *dev, unsigned long new_flags)
{
- return dev_change_flags(net, (unsigned int) new_flags);
+ return dev_change_flags(dev, (unsigned int) new_flags);
}
static ssize_t flags_store(struct device *dev, struct device_attribute *attr,
@@ -289,9 +308,9 @@ static ssize_t flags_store(struct device *dev, struct device_attribute *attr,
}
NETDEVICE_SHOW_RW(flags, fmt_hex);
-static int change_tx_queue_len(struct net_device *net, unsigned long new_len)
+static int change_tx_queue_len(struct net_device *dev, unsigned long new_len)
{
- net->tx_queue_len = new_len;
+ dev->tx_queue_len = new_len;
return 0;
}
@@ -344,9 +363,9 @@ static ssize_t ifalias_show(struct device *dev,
}
static DEVICE_ATTR_RW(ifalias);
-static int change_group(struct net_device *net, unsigned long new_group)
+static int change_group(struct net_device *dev, unsigned long new_group)
{
- dev_set_group(net, (int) new_group);
+ dev_set_group(dev, (int) new_group);
return 0;
}
@@ -387,6 +406,7 @@ static struct attribute *net_class_attrs[] = {
&dev_attr_dev_port.attr,
&dev_attr_iflink.attr,
&dev_attr_ifindex.attr,
+ &dev_attr_name_assign_type.attr,
&dev_attr_addr_assign_type.attr,
&dev_attr_addr_len.attr,
&dev_attr_link_mode.attr,
@@ -776,20 +796,20 @@ static struct kobj_type rx_queue_ktype = {
.namespace = rx_queue_namespace
};
-static int rx_queue_add_kobject(struct net_device *net, int index)
+static int rx_queue_add_kobject(struct net_device *dev, int index)
{
- struct netdev_rx_queue *queue = net->_rx + index;
+ struct netdev_rx_queue *queue = dev->_rx + index;
struct kobject *kobj = &queue->kobj;
int error = 0;
- kobj->kset = net->queues_kset;
+ kobj->kset = dev->queues_kset;
error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
"rx-%u", index);
if (error)
goto exit;
- if (net->sysfs_rx_queue_group) {
- error = sysfs_create_group(kobj, net->sysfs_rx_queue_group);
+ if (dev->sysfs_rx_queue_group) {
+ error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group);
if (error)
goto exit;
}
@@ -805,18 +825,18 @@ exit:
#endif /* CONFIG_SYSFS */
int
-net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
+net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
{
#ifdef CONFIG_SYSFS
int i;
int error = 0;
#ifndef CONFIG_RPS
- if (!net->sysfs_rx_queue_group)
+ if (!dev->sysfs_rx_queue_group)
return 0;
#endif
for (i = old_num; i < new_num; i++) {
- error = rx_queue_add_kobject(net, i);
+ error = rx_queue_add_kobject(dev, i);
if (error) {
new_num = old_num;
break;
@@ -824,10 +844,10 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
}
while (--i >= new_num) {
- if (net->sysfs_rx_queue_group)
- sysfs_remove_group(&net->_rx[i].kobj,
- net->sysfs_rx_queue_group);
- kobject_put(&net->_rx[i].kobj);
+ if (dev->sysfs_rx_queue_group)
+ sysfs_remove_group(&dev->_rx[i].kobj,
+ dev->sysfs_rx_queue_group);
+ kobject_put(&dev->_rx[i].kobj);
}
return error;
@@ -1135,13 +1155,13 @@ static struct kobj_type netdev_queue_ktype = {
.namespace = netdev_queue_namespace,
};
-static int netdev_queue_add_kobject(struct net_device *net, int index)
+static int netdev_queue_add_kobject(struct net_device *dev, int index)
{
- struct netdev_queue *queue = net->_tx + index;
+ struct netdev_queue *queue = dev->_tx + index;
struct kobject *kobj = &queue->kobj;
int error = 0;
- kobj->kset = net->queues_kset;
+ kobj->kset = dev->queues_kset;
error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
"tx-%u", index);
if (error)
@@ -1164,14 +1184,14 @@ exit:
#endif /* CONFIG_SYSFS */
int
-netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
+netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
{
#ifdef CONFIG_SYSFS
int i;
int error = 0;
for (i = old_num; i < new_num; i++) {
- error = netdev_queue_add_kobject(net, i);
+ error = netdev_queue_add_kobject(dev, i);
if (error) {
new_num = old_num;
break;
@@ -1179,7 +1199,7 @@ netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
}
while (--i >= new_num) {
- struct netdev_queue *queue = net->_tx + i;
+ struct netdev_queue *queue = dev->_tx + i;
#ifdef CONFIG_BQL
sysfs_remove_group(&queue->kobj, &dql_group);
@@ -1193,25 +1213,25 @@ netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
#endif /* CONFIG_SYSFS */
}
-static int register_queue_kobjects(struct net_device *net)
+static int register_queue_kobjects(struct net_device *dev)
{
int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0;
#ifdef CONFIG_SYSFS
- net->queues_kset = kset_create_and_add("queues",
- NULL, &net->dev.kobj);
- if (!net->queues_kset)
+ dev->queues_kset = kset_create_and_add("queues",
+ NULL, &dev->dev.kobj);
+ if (!dev->queues_kset)
return -ENOMEM;
- real_rx = net->real_num_rx_queues;
+ real_rx = dev->real_num_rx_queues;
#endif
- real_tx = net->real_num_tx_queues;
+ real_tx = dev->real_num_tx_queues;
- error = net_rx_queue_update_kobjects(net, 0, real_rx);
+ error = net_rx_queue_update_kobjects(dev, 0, real_rx);
if (error)
goto error;
rxq = real_rx;
- error = netdev_queue_update_kobjects(net, 0, real_tx);
+ error = netdev_queue_update_kobjects(dev, 0, real_tx);
if (error)
goto error;
txq = real_tx;
@@ -1219,24 +1239,24 @@ static int register_queue_kobjects(struct net_device *net)
return 0;
error:
- netdev_queue_update_kobjects(net, txq, 0);
- net_rx_queue_update_kobjects(net, rxq, 0);
+ netdev_queue_update_kobjects(dev, txq, 0);
+ net_rx_queue_update_kobjects(dev, rxq, 0);
return error;
}
-static void remove_queue_kobjects(struct net_device *net)
+static void remove_queue_kobjects(struct net_device *dev)
{
int real_rx = 0, real_tx = 0;
#ifdef CONFIG_SYSFS
- real_rx = net->real_num_rx_queues;
+ real_rx = dev->real_num_rx_queues;
#endif
- real_tx = net->real_num_tx_queues;
+ real_tx = dev->real_num_tx_queues;
- net_rx_queue_update_kobjects(net, real_rx, 0);
- netdev_queue_update_kobjects(net, real_tx, 0);
+ net_rx_queue_update_kobjects(dev, real_rx, 0);
+ netdev_queue_update_kobjects(dev, real_tx, 0);
#ifdef CONFIG_SYSFS
- kset_unregister(net->queues_kset);
+ kset_unregister(dev->queues_kset);
#endif
}
@@ -1329,13 +1349,13 @@ static struct class net_class = {
/* Delete sysfs entries but hold kobject reference until after all
* netdev references are gone.
*/
-void netdev_unregister_kobject(struct net_device * net)
+void netdev_unregister_kobject(struct net_device *ndev)
{
- struct device *dev = &(net->dev);
+ struct device *dev = &(ndev->dev);
kobject_get(&dev->kobj);
- remove_queue_kobjects(net);
+ remove_queue_kobjects(ndev);
pm_runtime_set_memalloc_noio(dev, false);
@@ -1343,18 +1363,18 @@ void netdev_unregister_kobject(struct net_device * net)
}
/* Create sysfs entries for network device. */
-int netdev_register_kobject(struct net_device *net)
+int netdev_register_kobject(struct net_device *ndev)
{
- struct device *dev = &(net->dev);
- const struct attribute_group **groups = net->sysfs_groups;
+ struct device *dev = &(ndev->dev);
+ const struct attribute_group **groups = ndev->sysfs_groups;
int error = 0;
device_initialize(dev);
dev->class = &net_class;
- dev->platform_data = net;
+ dev->platform_data = ndev;
dev->groups = groups;
- dev_set_name(dev, "%s", net->name);
+ dev_set_name(dev, "%s", ndev->name);
#ifdef CONFIG_SYSFS
/* Allow for a device specific group */
@@ -1364,10 +1384,10 @@ int netdev_register_kobject(struct net_device *net)
*groups++ = &netstat_group;
#if IS_ENABLED(CONFIG_WIRELESS_EXT) || IS_ENABLED(CONFIG_CFG80211)
- if (net->ieee80211_ptr)
+ if (ndev->ieee80211_ptr)
*groups++ = &wireless_group;
#if IS_ENABLED(CONFIG_WIRELESS_EXT)
- else if (net->wireless_handlers)
+ else if (ndev->wireless_handlers)
*groups++ = &wireless_group;
#endif
#endif
@@ -1377,7 +1397,7 @@ int netdev_register_kobject(struct net_device *net)
if (error)
return error;
- error = register_queue_kobjects(net);
+ error = register_queue_kobjects(ndev);
if (error) {
device_del(dev);
return error;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 85b62691f4f2..7f155175bba8 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -224,7 +224,7 @@ static void net_free(struct net *net)
return;
}
#endif
- kfree(net->gen);
+ kfree(rcu_access_pointer(net->gen));
kmem_cache_free(net_cachep, net);
}
@@ -373,9 +373,11 @@ struct net *get_net_ns_by_pid(pid_t pid)
tsk = find_task_by_vpid(pid);
if (tsk) {
struct nsproxy *nsproxy;
- nsproxy = task_nsproxy(tsk);
+ task_lock(tsk);
+ nsproxy = tsk->nsproxy;
if (nsproxy)
net = get_net(nsproxy->net_ns);
+ task_unlock(tsk);
}
rcu_read_unlock();
return net;
@@ -632,11 +634,11 @@ static void *netns_get(struct task_struct *task)
struct net *net = NULL;
struct nsproxy *nsproxy;
- rcu_read_lock();
- nsproxy = task_nsproxy(task);
+ task_lock(task);
+ nsproxy = task->nsproxy;
if (nsproxy)
net = get_net(nsproxy->net_ns);
- rcu_read_unlock();
+ task_unlock(task);
return net;
}
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 30d903b19c62..1f2a126f4ffa 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -107,5 +107,5 @@ struct cgroup_subsys net_cls_cgrp_subsys = {
.css_online = cgrp_css_online,
.css_free = cgrp_css_free,
.attach = cgrp_attach,
- .base_cftypes = ss_files,
+ .legacy_cftypes = ss_files,
};
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index e33937fb32a0..e6645b4f330a 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -72,7 +72,6 @@ module_param(carrier_timeout, uint, 0644);
static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq)
{
- const struct net_device_ops *ops = dev->netdev_ops;
int status = NETDEV_TX_OK;
netdev_features_t features;
@@ -92,9 +91,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
skb->vlan_tci = 0;
}
- status = ops->ndo_start_xmit(skb, dev);
- if (status == NETDEV_TX_OK)
- txq_trans_update(txq);
+ status = netdev_start_xmit(skb, dev, txq, false);
out:
return status;
@@ -116,7 +113,7 @@ static void queue_process(struct work_struct *work)
continue;
}
- txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+ txq = skb_get_tx_queue(dev, skb);
local_irq_save(flags);
HARD_TX_LOCK(dev, txq, smp_processor_id());
@@ -822,7 +819,8 @@ void __netpoll_cleanup(struct netpoll *np)
RCU_INIT_POINTER(np->dev->npinfo, NULL);
call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
- }
+ } else
+ RCU_INIT_POINTER(np->dev->npinfo, NULL);
}
EXPORT_SYMBOL_GPL(__netpoll_cleanup);
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 2f385b9bccc0..cbd0a199bf52 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -249,7 +249,7 @@ struct cgroup_subsys net_prio_cgrp_subsys = {
.css_online = cgrp_css_online,
.css_free = cgrp_css_free,
.attach = net_prio_attach,
- .base_cftypes = ss_files,
+ .legacy_cftypes = ss_files,
};
static int netprio_device_event(struct notifier_block *unused,
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index fc17a9d309ac..443256bdcddc 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -69,8 +69,9 @@
* for running devices in the if_list and sends packets until count is 0 it
* also the thread checks the thread->control which is used for inter-process
* communication. controlling process "posts" operations to the threads this
- * way. The if_lock should be possible to remove when add/rem_device is merged
- * into this too.
+ * way.
+ * The if_list is RCU protected, and the if_lock remains to protect updating
+ * of if_list, from "add_device" as it invoked from userspace (via proc write).
*
* By design there should only be *one* "controlling" process. In practice
* multiple write accesses gives unpredictable result. Understood by "write"
@@ -201,6 +202,7 @@
#define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */
#define F_NODE (1<<15) /* Node memory alloc*/
#define F_UDPCSUM (1<<16) /* Include UDP checksum */
+#define F_NO_TIMESTAMP (1<<17) /* Don't timestamp packets (default TS) */
/* Thread control flag bits */
#define T_STOP (1<<0) /* Stop run */
@@ -208,7 +210,7 @@
#define T_REMDEVALL (1<<2) /* Remove all devs */
#define T_REMDEV (1<<3) /* Remove one dev */
-/* If lock -- can be removed after some work */
+/* If lock -- protects updating of if_list */
#define if_lock(t) spin_lock(&(t->if_lock));
#define if_unlock(t) spin_unlock(&(t->if_lock));
@@ -241,6 +243,7 @@ struct pktgen_dev {
struct proc_dir_entry *entry; /* proc file */
struct pktgen_thread *pg_thread;/* the owner */
struct list_head list; /* chaining in the thread's run-queue */
+ struct rcu_head rcu; /* freed by RCU */
int running; /* if false, the test will stop */
@@ -384,6 +387,7 @@ struct pktgen_dev {
u16 queue_map_min;
u16 queue_map_max;
__u32 skb_priority; /* skb priority field */
+ unsigned int burst; /* number of duplicated packets to burst */
int node; /* Memory node */
#ifdef CONFIG_XFRM
@@ -503,7 +507,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
pktgen_reset_all_threads(pn);
else
- pr_warning("Unknown command: %s\n", data);
+ pr_warn("Unknown command: %s\n", data);
return count;
}
@@ -610,6 +614,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev->traffic_class)
seq_printf(seq, " traffic_class: 0x%02x\n", pkt_dev->traffic_class);
+ if (pkt_dev->burst > 1)
+ seq_printf(seq, " burst: %d\n", pkt_dev->burst);
+
if (pkt_dev->node >= 0)
seq_printf(seq, " node: %d\n", pkt_dev->node);
@@ -636,6 +643,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev->flags & F_UDPCSUM)
seq_puts(seq, "UDPCSUM ");
+ if (pkt_dev->flags & F_NO_TIMESTAMP)
+ seq_puts(seq, "NO_TIMESTAMP ");
+
if (pkt_dev->flags & F_MPLS_RND)
seq_puts(seq, "MPLS_RND ");
@@ -802,7 +812,6 @@ static int strn_len(const char __user * user_buffer, unsigned int maxlen)
case '\t':
case ' ':
goto done_str;
- break;
default:
break;
}
@@ -856,14 +865,14 @@ static ssize_t pktgen_if_write(struct file *file,
pg_result = &(pkt_dev->result[0]);
if (count < 1) {
- pr_warning("wrong command format\n");
+ pr_warn("wrong command format\n");
return -EINVAL;
}
max = count;
tmp = count_trail_chars(user_buffer, max);
if (tmp < 0) {
- pr_warning("illegal format\n");
+ pr_warn("illegal format\n");
return tmp;
}
i = tmp;
@@ -1119,6 +1128,16 @@ static ssize_t pktgen_if_write(struct file *file,
pkt_dev->dst_mac_count);
return count;
}
+ if (!strcmp(name, "burst")) {
+ len = num_arg(&user_buffer[i], 10, &value);
+ if (len < 0)
+ return len;
+
+ i += len;
+ pkt_dev->burst = value < 1 ? 1 : value;
+ sprintf(pg_result, "OK: burst=%d", pkt_dev->burst);
+ return count;
+ }
if (!strcmp(name, "node")) {
len = num_arg(&user_buffer[i], 10, &value);
if (len < 0)
@@ -1242,6 +1261,9 @@ static ssize_t pktgen_if_write(struct file *file,
else if (strcmp(f, "!UDPCSUM") == 0)
pkt_dev->flags &= ~F_UDPCSUM;
+ else if (strcmp(f, "NO_TIMESTAMP") == 0)
+ pkt_dev->flags |= F_NO_TIMESTAMP;
+
else {
sprintf(pg_result,
"Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
@@ -1250,6 +1272,7 @@ static ssize_t pktgen_if_write(struct file *file,
"MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, "
"MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, "
"QUEUE_MAP_RND, QUEUE_MAP_CPU, UDPCSUM, "
+ "NO_TIMESTAMP, "
#ifdef CONFIG_XFRM
"IPSEC, "
#endif
@@ -1737,14 +1760,14 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
seq_puts(seq, "Running: ");
- if_lock(t);
- list_for_each_entry(pkt_dev, &t->if_list, list)
+ rcu_read_lock();
+ list_for_each_entry_rcu(pkt_dev, &t->if_list, list)
if (pkt_dev->running)
seq_printf(seq, "%s ", pkt_dev->odevname);
seq_puts(seq, "\nStopped: ");
- list_for_each_entry(pkt_dev, &t->if_list, list)
+ list_for_each_entry_rcu(pkt_dev, &t->if_list, list)
if (!pkt_dev->running)
seq_printf(seq, "%s ", pkt_dev->odevname);
@@ -1753,7 +1776,7 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
else
seq_puts(seq, "\nResult: NA\n");
- if_unlock(t);
+ rcu_read_unlock();
return 0;
}
@@ -1878,10 +1901,8 @@ static struct pktgen_dev *__pktgen_NN_threads(const struct pktgen_net *pn,
pkt_dev = pktgen_find_dev(t, ifname, exact);
if (pkt_dev) {
if (remove) {
- if_lock(t);
pkt_dev->removal_mark = 1;
t->control |= T_REMDEV;
- if_unlock(t);
}
break;
}
@@ -1931,7 +1952,8 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
list_for_each_entry(t, &pn->pktgen_threads, th_list) {
struct pktgen_dev *pkt_dev;
- list_for_each_entry(pkt_dev, &t->if_list, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
if (pkt_dev->odev != dev)
continue;
@@ -1946,6 +1968,7 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
dev->name);
break;
}
+ rcu_read_unlock();
}
}
@@ -2047,15 +2070,15 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
ntxq = pkt_dev->odev->real_num_tx_queues;
if (ntxq <= pkt_dev->queue_map_min) {
- pr_warning("WARNING: Requested queue_map_min (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n",
- pkt_dev->queue_map_min, (ntxq ?: 1) - 1, ntxq,
- pkt_dev->odevname);
+ pr_warn("WARNING: Requested queue_map_min (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n",
+ pkt_dev->queue_map_min, (ntxq ?: 1) - 1, ntxq,
+ pkt_dev->odevname);
pkt_dev->queue_map_min = (ntxq ?: 1) - 1;
}
if (pkt_dev->queue_map_max >= ntxq) {
- pr_warning("WARNING: Requested queue_map_max (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n",
- pkt_dev->queue_map_max, (ntxq ?: 1) - 1, ntxq,
- pkt_dev->odevname);
+ pr_warn("WARNING: Requested queue_map_max (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n",
+ pkt_dev->queue_map_max, (ntxq ?: 1) - 1, ntxq,
+ pkt_dev->odevname);
pkt_dev->queue_map_max = (ntxq ?: 1) - 1;
}
@@ -2684,9 +2707,14 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
pgh->pgh_magic = htonl(PKTGEN_MAGIC);
pgh->seq_num = htonl(pkt_dev->seq_num);
- do_gettimeofday(&timestamp);
- pgh->tv_sec = htonl(timestamp.tv_sec);
- pgh->tv_usec = htonl(timestamp.tv_usec);
+ if (pkt_dev->flags & F_NO_TIMESTAMP) {
+ pgh->tv_sec = 0;
+ pgh->tv_usec = 0;
+ } else {
+ do_gettimeofday(&timestamp);
+ pgh->tv_sec = htonl(timestamp.tv_sec);
+ pgh->tv_usec = htonl(timestamp.tv_usec);
+ }
}
static struct sk_buff *pktgen_alloc_skb(struct net_device *dev,
@@ -2997,8 +3025,8 @@ static void pktgen_run(struct pktgen_thread *t)
func_enter();
- if_lock(t);
- list_for_each_entry(pkt_dev, &t->if_list, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
/*
* setup odev and create initial packet.
@@ -3007,18 +3035,18 @@ static void pktgen_run(struct pktgen_thread *t)
if (pkt_dev->odev) {
pktgen_clear_counters(pkt_dev);
- pkt_dev->running = 1; /* Cranke yeself! */
pkt_dev->skb = NULL;
pkt_dev->started_at = pkt_dev->next_tx = ktime_get();
set_pkt_overhead(pkt_dev);
strcpy(pkt_dev->result, "Starting");
+ pkt_dev->running = 1; /* Cranke yeself! */
started++;
} else
strcpy(pkt_dev->result, "Error starting");
}
- if_unlock(t);
+ rcu_read_unlock();
if (started)
t->control &= ~(T_STOP);
}
@@ -3041,27 +3069,25 @@ static int thread_is_running(const struct pktgen_thread *t)
{
const struct pktgen_dev *pkt_dev;
- list_for_each_entry(pkt_dev, &t->if_list, list)
- if (pkt_dev->running)
+ rcu_read_lock();
+ list_for_each_entry_rcu(pkt_dev, &t->if_list, list)
+ if (pkt_dev->running) {
+ rcu_read_unlock();
return 1;
+ }
+ rcu_read_unlock();
return 0;
}
static int pktgen_wait_thread_run(struct pktgen_thread *t)
{
- if_lock(t);
-
while (thread_is_running(t)) {
- if_unlock(t);
-
msleep_interruptible(100);
if (signal_pending(current))
goto signal;
- if_lock(t);
}
- if_unlock(t);
return 1;
signal:
return 0;
@@ -3161,15 +3187,15 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
int nr_frags = pkt_dev->skb ? skb_shinfo(pkt_dev->skb)->nr_frags : -1;
if (!pkt_dev->running) {
- pr_warning("interface: %s is already stopped\n",
- pkt_dev->odevname);
+ pr_warn("interface: %s is already stopped\n",
+ pkt_dev->odevname);
return -EINVAL;
}
+ pkt_dev->running = 0;
kfree_skb(pkt_dev->skb);
pkt_dev->skb = NULL;
pkt_dev->stopped_at = ktime_get();
- pkt_dev->running = 0;
show_results(pkt_dev, nr_frags);
@@ -3180,9 +3206,8 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t)
{
struct pktgen_dev *pkt_dev, *best = NULL;
- if_lock(t);
-
- list_for_each_entry(pkt_dev, &t->if_list, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
if (!pkt_dev->running)
continue;
if (best == NULL)
@@ -3190,7 +3215,8 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t)
else if (ktime_compare(pkt_dev->next_tx, best->next_tx) < 0)
best = pkt_dev;
}
- if_unlock(t);
+ rcu_read_unlock();
+
return best;
}
@@ -3200,13 +3226,13 @@ static void pktgen_stop(struct pktgen_thread *t)
func_enter();
- if_lock(t);
+ rcu_read_lock();
- list_for_each_entry(pkt_dev, &t->if_list, list) {
+ list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
pktgen_stop_device(pkt_dev);
}
- if_unlock(t);
+ rcu_read_unlock();
}
/*
@@ -3220,8 +3246,6 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
func_enter();
- if_lock(t);
-
list_for_each_safe(q, n, &t->if_list) {
cur = list_entry(q, struct pktgen_dev, list);
@@ -3235,8 +3259,6 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
break;
}
-
- if_unlock(t);
}
static void pktgen_rem_all_ifs(struct pktgen_thread *t)
@@ -3248,8 +3270,6 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
/* Remove all devices, free mem */
- if_lock(t);
-
list_for_each_safe(q, n, &t->if_list) {
cur = list_entry(q, struct pktgen_dev, list);
@@ -3258,8 +3278,6 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
pktgen_remove_device(t, cur);
}
-
- if_unlock(t);
}
static void pktgen_rem_thread(struct pktgen_thread *t)
@@ -3293,11 +3311,9 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
static void pktgen_xmit(struct pktgen_dev *pkt_dev)
{
+ unsigned int burst = ACCESS_ONCE(pkt_dev->burst);
struct net_device *odev = pkt_dev->odev;
- netdev_tx_t (*xmit)(struct sk_buff *, struct net_device *)
- = odev->netdev_ops->ndo_start_xmit;
struct netdev_queue *txq;
- u16 queue_map;
int ret;
/* If device is offline, then don't send */
@@ -3335,8 +3351,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
if (pkt_dev->delay && pkt_dev->last_ok)
spin(pkt_dev, pkt_dev->next_tx);
- queue_map = skb_get_queue_mapping(pkt_dev->skb);
- txq = netdev_get_tx_queue(odev, queue_map);
+ txq = skb_get_tx_queue(odev, pkt_dev->skb);
local_bh_disable();
@@ -3347,16 +3362,19 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
pkt_dev->last_ok = 0;
goto unlock;
}
- atomic_inc(&(pkt_dev->skb->users));
- ret = (*xmit)(pkt_dev->skb, odev);
+ atomic_add(burst, &pkt_dev->skb->users);
+
+xmit_more:
+ ret = netdev_start_xmit(pkt_dev->skb, odev, txq, --burst > 0);
switch (ret) {
case NETDEV_TX_OK:
- txq_trans_update(txq);
pkt_dev->last_ok = 1;
pkt_dev->sofar++;
pkt_dev->seq_num++;
pkt_dev->tx_bytes += pkt_dev->last_pkt_size;
+ if (burst > 0 && !netif_xmit_frozen_or_drv_stopped(txq))
+ goto xmit_more;
break;
case NET_XMIT_DROP:
case NET_XMIT_CN:
@@ -3375,6 +3393,8 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
atomic_dec(&(pkt_dev->skb->users));
pkt_dev->last_ok = 0;
}
+ if (unlikely(burst))
+ atomic_sub(burst, &pkt_dev->skb->users);
unlock:
HARD_TX_UNLOCK(odev, txq);
@@ -3407,10 +3427,10 @@ static int pktgen_thread_worker(void *arg)
pr_debug("starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current));
- set_current_state(TASK_INTERRUPTIBLE);
-
set_freezable();
+ __set_current_state(TASK_RUNNING);
+
while (!kthread_should_stop()) {
pkt_dev = next_to_run(t);
@@ -3424,8 +3444,6 @@ static int pktgen_thread_worker(void *arg)
continue;
}
- __set_current_state(TASK_RUNNING);
-
if (likely(pkt_dev)) {
pktgen_xmit(pkt_dev);
@@ -3456,9 +3474,8 @@ static int pktgen_thread_worker(void *arg)
}
try_to_freeze();
-
- set_current_state(TASK_INTERRUPTIBLE);
}
+ set_current_state(TASK_INTERRUPTIBLE);
pr_debug("%s stopping all device\n", t->tsk->comm);
pktgen_stop(t);
@@ -3485,8 +3502,8 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
struct pktgen_dev *p, *pkt_dev = NULL;
size_t len = strlen(ifname);
- if_lock(t);
- list_for_each_entry(p, &t->if_list, list)
+ rcu_read_lock();
+ list_for_each_entry_rcu(p, &t->if_list, list)
if (strncmp(p->odevname, ifname, len) == 0) {
if (p->odevname[len]) {
if (exact || p->odevname[len] != '@')
@@ -3496,7 +3513,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
break;
}
- if_unlock(t);
+ rcu_read_unlock();
pr_debug("find_dev(%s) returning %p\n", ifname, pkt_dev);
return pkt_dev;
}
@@ -3510,6 +3527,12 @@ static int add_dev_to_thread(struct pktgen_thread *t,
{
int rv = 0;
+ /* This function cannot be called concurrently, as its called
+ * under pktgen_thread_lock mutex, but it can run from
+ * userspace on another CPU than the kthread. The if_lock()
+ * is used here to sync with concurrent instances of
+ * _rem_dev_from_if_list() invoked via kthread, which is also
+ * updating the if_list */
if_lock(t);
if (pkt_dev->pg_thread) {
@@ -3518,9 +3541,9 @@ static int add_dev_to_thread(struct pktgen_thread *t,
goto out;
}
- list_add(&pkt_dev->list, &t->if_list);
- pkt_dev->pg_thread = t;
pkt_dev->running = 0;
+ pkt_dev->pg_thread = t;
+ list_add_rcu(&pkt_dev->list, &t->if_list);
out:
if_unlock(t);
@@ -3570,6 +3593,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
pkt_dev->svlan_p = 0;
pkt_dev->svlan_cfi = 0;
pkt_dev->svlan_id = 0xffff;
+ pkt_dev->burst = 1;
pkt_dev->node = -1;
err = pktgen_setup_dev(t->net, pkt_dev, ifname);
@@ -3675,11 +3699,13 @@ static void _rem_dev_from_if_list(struct pktgen_thread *t,
struct list_head *q, *n;
struct pktgen_dev *p;
+ if_lock(t);
list_for_each_safe(q, n, &t->if_list) {
p = list_entry(q, struct pktgen_dev, list);
if (p == pkt_dev)
- list_del(&p->list);
+ list_del_rcu(&p->list);
}
+ if_unlock(t);
}
static int pktgen_remove_device(struct pktgen_thread *t,
@@ -3688,7 +3714,7 @@ static int pktgen_remove_device(struct pktgen_thread *t,
pr_debug("remove_device pkt_dev=%p\n", pkt_dev);
if (pkt_dev->running) {
- pr_warning("WARNING: trying to remove a running interface, stopping it now\n");
+ pr_warn("WARNING: trying to remove a running interface, stopping it now\n");
pktgen_stop_device(pkt_dev);
}
@@ -3699,20 +3725,22 @@ static int pktgen_remove_device(struct pktgen_thread *t,
pkt_dev->odev = NULL;
}
- /* And update the thread if_list */
-
- _rem_dev_from_if_list(t, pkt_dev);
-
+ /* Remove proc before if_list entry, because add_device uses
+ * list to determine if interface already exist, avoid race
+ * with proc_create_data() */
if (pkt_dev->entry)
proc_remove(pkt_dev->entry);
+ /* And update the thread if_list */
+ _rem_dev_from_if_list(t, pkt_dev);
+
#ifdef CONFIG_XFRM
free_SAs(pkt_dev);
#endif
vfree(pkt_dev->flows);
if (pkt_dev->page)
put_page(pkt_dev->page);
- kfree(pkt_dev);
+ kfree_rcu(pkt_dev, rcu);
return 0;
}
@@ -3812,6 +3840,7 @@ static void __exit pg_cleanup(void)
{
unregister_netdevice_notifier(&pktgen_notifier_block);
unregister_pernet_subsys(&pg_net_ops);
+ /* Don't need rcu_barrier() due to use of kfree_rcu() */
}
module_init(pg_init);
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index d3027a73fd4b..4eab4a94a59d 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -52,14 +52,43 @@
* test_8021q:
* jneq #0x8100, test_ieee1588 ; ETH_P_8021Q ?
* ldh [16] ; load inner type
- * jneq #0x88f7, drop_ieee1588 ; ETH_P_1588 ?
+ * jneq #0x88f7, test_8021q_ipv4 ; ETH_P_1588 ?
* ldb [18] ; load payload
* and #0x8 ; as we don't have ports here, test
* jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these
* ldh [18] ; reload payload
* and #0xf ; mask PTP_CLASS_VMASK
- * or #0x40 ; PTP_CLASS_V2_VLAN
+ * or #0x70 ; PTP_CLASS_VLAN|PTP_CLASS_L2
+ * ret a ; return PTP class
+ *
+ * ; PTP over UDP over IPv4 over 802.1Q over Ethernet
+ * test_8021q_ipv4:
+ * jneq #0x800, test_8021q_ipv6 ; ETH_P_IP ?
+ * ldb [27] ; load proto
+ * jneq #17, drop_8021q_ipv4 ; IPPROTO_UDP ?
+ * ldh [24] ; load frag offset field
+ * jset #0x1fff, drop_8021q_ipv4; don't allow fragments
+ * ldxb 4*([18]&0xf) ; load IP header len
+ * ldh [x + 20] ; load UDP dst port
+ * jneq #319, drop_8021q_ipv4 ; is port PTP_EV_PORT ?
+ * ldh [x + 26] ; load payload
+ * and #0xf ; mask PTP_CLASS_VMASK
+ * or #0x50 ; PTP_CLASS_VLAN|PTP_CLASS_IPV4
+ * ret a ; return PTP class
+ * drop_8021q_ipv4: ret #0x0 ; PTP_CLASS_NONE
+ *
+ * ; PTP over UDP over IPv6 over 802.1Q over Ethernet
+ * test_8021q_ipv6:
+ * jneq #0x86dd, drop_8021q_ipv6 ; ETH_P_IPV6 ?
+ * ldb [24] ; load proto
+ * jneq #17, drop_8021q_ipv6 ; IPPROTO_UDP ?
+ * ldh [60] ; load UDP dst port
+ * jneq #319, drop_8021q_ipv6 ; is port PTP_EV_PORT ?
+ * ldh [66] ; load payload
+ * and #0xf ; mask PTP_CLASS_VMASK
+ * or #0x60 ; PTP_CLASS_VLAN|PTP_CLASS_IPV6
* ret a ; return PTP class
+ * drop_8021q_ipv6: ret #0x0 ; PTP_CLASS_NONE
*
* ; PTP over Ethernet
* test_ieee1588:
@@ -78,11 +107,11 @@
#include <linux/filter.h>
#include <linux/ptp_classify.h>
-static struct sk_filter *ptp_insns __read_mostly;
+static struct bpf_prog *ptp_insns __read_mostly;
unsigned int ptp_classify_raw(const struct sk_buff *skb)
{
- return SK_RUN_FILTER(ptp_insns, skb);
+ return BPF_PROG_RUN(ptp_insns, skb);
}
EXPORT_SYMBOL_GPL(ptp_classify_raw);
@@ -113,16 +142,39 @@ void __init ptp_classifier_init(void)
{ 0x44, 0, 0, 0x00000020 },
{ 0x16, 0, 0, 0x00000000 },
{ 0x06, 0, 0, 0x00000000 },
- { 0x15, 0, 9, 0x00008100 },
+ { 0x15, 0, 32, 0x00008100 },
{ 0x28, 0, 0, 0x00000010 },
- { 0x15, 0, 15, 0x000088f7 },
+ { 0x15, 0, 7, 0x000088f7 },
{ 0x30, 0, 0, 0x00000012 },
{ 0x54, 0, 0, 0x00000008 },
- { 0x15, 0, 12, 0x00000000 },
+ { 0x15, 0, 35, 0x00000000 },
{ 0x28, 0, 0, 0x00000012 },
{ 0x54, 0, 0, 0x0000000f },
- { 0x44, 0, 0, 0x00000040 },
+ { 0x44, 0, 0, 0x00000070 },
+ { 0x16, 0, 0, 0x00000000 },
+ { 0x15, 0, 12, 0x00000800 },
+ { 0x30, 0, 0, 0x0000001b },
+ { 0x15, 0, 9, 0x00000011 },
+ { 0x28, 0, 0, 0x00000018 },
+ { 0x45, 7, 0, 0x00001fff },
+ { 0xb1, 0, 0, 0x00000012 },
+ { 0x48, 0, 0, 0x00000014 },
+ { 0x15, 0, 4, 0x0000013f },
+ { 0x48, 0, 0, 0x0000001a },
+ { 0x54, 0, 0, 0x0000000f },
+ { 0x44, 0, 0, 0x00000050 },
+ { 0x16, 0, 0, 0x00000000 },
+ { 0x06, 0, 0, 0x00000000 },
+ { 0x15, 0, 8, 0x000086dd },
+ { 0x30, 0, 0, 0x00000018 },
+ { 0x15, 0, 6, 0x00000011 },
+ { 0x28, 0, 0, 0x0000003c },
+ { 0x15, 0, 4, 0x0000013f },
+ { 0x28, 0, 0, 0x00000042 },
+ { 0x54, 0, 0, 0x0000000f },
+ { 0x44, 0, 0, 0x00000060 },
{ 0x16, 0, 0, 0x00000000 },
+ { 0x06, 0, 0, 0x00000000 },
{ 0x15, 0, 7, 0x000088f7 },
{ 0x30, 0, 0, 0x0000000e },
{ 0x54, 0, 0, 0x00000008 },
@@ -137,5 +189,5 @@ void __init ptp_classifier_init(void)
.len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter,
};
- BUG_ON(sk_unattached_filter_create(&ptp_insns, &ptp_prog));
+ BUG_ON(bpf_prog_create(&ptp_insns, &ptp_prog));
}
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 467f326126e0..04db318e6218 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -41,27 +41,27 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
unsigned int nr_table_entries)
{
size_t lopt_size = sizeof(struct listen_sock);
- struct listen_sock *lopt;
+ struct listen_sock *lopt = NULL;
nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog);
nr_table_entries = max_t(u32, nr_table_entries, 8);
nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
lopt_size += nr_table_entries * sizeof(struct request_sock *);
- if (lopt_size > PAGE_SIZE)
+
+ if (lopt_size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
+ lopt = kzalloc(lopt_size, GFP_KERNEL |
+ __GFP_NOWARN |
+ __GFP_NORETRY);
+ if (!lopt)
lopt = vzalloc(lopt_size);
- else
- lopt = kzalloc(lopt_size, GFP_KERNEL);
- if (lopt == NULL)
+ if (!lopt)
return -ENOMEM;
- for (lopt->max_qlen_log = 3;
- (1 << lopt->max_qlen_log) < nr_table_entries;
- lopt->max_qlen_log++);
-
get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
rwlock_init(&queue->syn_wait_lock);
queue->rskq_accept_head = NULL;
lopt->nr_table_entries = nr_table_entries;
+ lopt->max_qlen_log = ilog2(nr_table_entries);
write_lock_bh(&queue->syn_wait_lock);
queue->listen_opt = lopt;
@@ -72,22 +72,8 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
void __reqsk_queue_destroy(struct request_sock_queue *queue)
{
- struct listen_sock *lopt;
- size_t lopt_size;
-
- /*
- * this is an error recovery path only
- * no locking needed and the lopt is not NULL
- */
-
- lopt = queue->listen_opt;
- lopt_size = sizeof(struct listen_sock) +
- lopt->nr_table_entries * sizeof(struct request_sock *);
-
- if (lopt_size > PAGE_SIZE)
- vfree(lopt);
- else
- kfree(lopt);
+ /* This is an error recovery path only, no locking needed */
+ kvfree(queue->listen_opt);
}
static inline struct listen_sock *reqsk_queue_yank_listen_sk(
@@ -107,8 +93,6 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
{
/* make all the listen_opt local to us */
struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue);
- size_t lopt_size = sizeof(struct listen_sock) +
- lopt->nr_table_entries * sizeof(struct request_sock *);
if (lopt->qlen != 0) {
unsigned int i;
@@ -125,10 +109,7 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
}
WARN_ON(lopt->qlen != 0);
- if (lopt_size > PAGE_SIZE)
- vfree(lopt);
- else
- kfree(lopt);
+ kvfree(lopt);
}
/*
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1063996f8317..a6882686ca3a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -299,7 +299,12 @@ int __rtnl_link_register(struct rtnl_link_ops *ops)
if (rtnl_link_ops_get(ops->kind))
return -EEXIST;
- if (!ops->dellink)
+ /* The check for setup is here because if ops
+ * does not have that filled up, it is not possible
+ * to use the ops for creating device. So do not
+ * fill up dellink as well. That disables rtnl_dellink.
+ */
+ if (ops->setup && !ops->dellink)
ops->dellink = unregister_netdevice_queue;
list_add_tail(&ops->list, &link_ops);
@@ -799,7 +804,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
(nla_total_size(sizeof(struct ifla_vf_mac)) +
nla_total_size(sizeof(struct ifla_vf_vlan)) +
nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
- nla_total_size(sizeof(struct ifla_vf_rate)));
+ nla_total_size(sizeof(struct ifla_vf_rate)) +
+ nla_total_size(sizeof(struct ifla_vf_link_state)));
return size;
} else
return 0;
@@ -1475,9 +1481,12 @@ static int do_set_master(struct net_device *dev, int ifindex)
return 0;
}
+#define DO_SETLINK_MODIFIED 0x01
+/* notify flag means notify + modified. */
+#define DO_SETLINK_NOTIFY 0x03
static int do_setlink(const struct sk_buff *skb,
struct net_device *dev, struct ifinfomsg *ifm,
- struct nlattr **tb, char *ifname, int modified)
+ struct nlattr **tb, char *ifname, int status)
{
const struct net_device_ops *ops = dev->netdev_ops;
int err;
@@ -1496,7 +1505,7 @@ static int do_setlink(const struct sk_buff *skb,
put_net(net);
if (err)
goto errout;
- modified = 1;
+ status |= DO_SETLINK_MODIFIED;
}
if (tb[IFLA_MAP]) {
@@ -1525,7 +1534,7 @@ static int do_setlink(const struct sk_buff *skb,
if (err < 0)
goto errout;
- modified = 1;
+ status |= DO_SETLINK_NOTIFY;
}
if (tb[IFLA_ADDRESS]) {
@@ -1545,19 +1554,19 @@ static int do_setlink(const struct sk_buff *skb,
kfree(sa);
if (err)
goto errout;
- modified = 1;
+ status |= DO_SETLINK_MODIFIED;
}
if (tb[IFLA_MTU]) {
err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
if (err < 0)
goto errout;
- modified = 1;
+ status |= DO_SETLINK_MODIFIED;
}
if (tb[IFLA_GROUP]) {
dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
- modified = 1;
+ status |= DO_SETLINK_NOTIFY;
}
/*
@@ -1569,7 +1578,7 @@ static int do_setlink(const struct sk_buff *skb,
err = dev_change_name(dev, ifname);
if (err < 0)
goto errout;
- modified = 1;
+ status |= DO_SETLINK_MODIFIED;
}
if (tb[IFLA_IFALIAS]) {
@@ -1577,7 +1586,7 @@ static int do_setlink(const struct sk_buff *skb,
nla_len(tb[IFLA_IFALIAS]));
if (err < 0)
goto errout;
- modified = 1;
+ status |= DO_SETLINK_NOTIFY;
}
if (tb[IFLA_BROADCAST]) {
@@ -1595,25 +1604,35 @@ static int do_setlink(const struct sk_buff *skb,
err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]));
if (err)
goto errout;
- modified = 1;
+ status |= DO_SETLINK_MODIFIED;
}
if (tb[IFLA_CARRIER]) {
err = dev_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER]));
if (err)
goto errout;
- modified = 1;
+ status |= DO_SETLINK_MODIFIED;
}
- if (tb[IFLA_TXQLEN])
- dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
+ if (tb[IFLA_TXQLEN]) {
+ unsigned long value = nla_get_u32(tb[IFLA_TXQLEN]);
+
+ if (dev->tx_queue_len ^ value)
+ status |= DO_SETLINK_NOTIFY;
+
+ dev->tx_queue_len = value;
+ }
if (tb[IFLA_OPERSTATE])
set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
if (tb[IFLA_LINKMODE]) {
+ unsigned char value = nla_get_u8(tb[IFLA_LINKMODE]);
+
write_lock_bh(&dev_base_lock);
- dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
+ if (dev->link_mode ^ value)
+ status |= DO_SETLINK_NOTIFY;
+ dev->link_mode = value;
write_unlock_bh(&dev_base_lock);
}
@@ -1628,7 +1647,7 @@ static int do_setlink(const struct sk_buff *skb,
err = do_setvfinfo(dev, attr);
if (err < 0)
goto errout;
- modified = 1;
+ status |= DO_SETLINK_NOTIFY;
}
}
err = 0;
@@ -1658,7 +1677,7 @@ static int do_setlink(const struct sk_buff *skb,
err = ops->ndo_set_vf_port(dev, vf, port);
if (err < 0)
goto errout;
- modified = 1;
+ status |= DO_SETLINK_NOTIFY;
}
}
err = 0;
@@ -1676,7 +1695,7 @@ static int do_setlink(const struct sk_buff *skb,
err = ops->ndo_set_vf_port(dev, PORT_SELF_VF, port);
if (err < 0)
goto errout;
- modified = 1;
+ status |= DO_SETLINK_NOTIFY;
}
if (tb[IFLA_AF_SPEC]) {
@@ -1693,15 +1712,20 @@ static int do_setlink(const struct sk_buff *skb,
if (err < 0)
goto errout;
- modified = 1;
+ status |= DO_SETLINK_NOTIFY;
}
}
err = 0;
errout:
- if (err < 0 && modified)
- net_warn_ratelimited("A link change request failed with some changes committed already. Interface %s may have been left with an inconsistent configuration, please check.\n",
- dev->name);
+ if (status & DO_SETLINK_MODIFIED) {
+ if (status & DO_SETLINK_NOTIFY)
+ netdev_state_change(dev);
+
+ if (err < 0)
+ net_warn_ratelimited("A link change request failed with some changes committed already. Interface %s may have been left with an inconsistent configuration, please check.\n",
+ dev->name);
+ }
return err;
}
@@ -1777,7 +1801,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
return -ENODEV;
ops = dev->rtnl_link_ops;
- if (!ops)
+ if (!ops || !ops->dellink)
return -EOPNOTSUPP;
ops->dellink(dev, &list_kill);
@@ -1805,7 +1829,8 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
EXPORT_SYMBOL(rtnl_configure_link);
struct net_device *rtnl_create_link(struct net *net,
- char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[])
+ char *ifname, unsigned char name_assign_type,
+ const struct rtnl_link_ops *ops, struct nlattr *tb[])
{
int err;
struct net_device *dev;
@@ -1823,8 +1848,8 @@ struct net_device *rtnl_create_link(struct net *net,
num_rx_queues = ops->get_num_rx_queues();
err = -ENOMEM;
- dev = alloc_netdev_mqs(ops->priv_size, ifname, ops->setup,
- num_tx_queues, num_rx_queues);
+ dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type,
+ ops->setup, num_tx_queues, num_rx_queues);
if (!dev)
goto err;
@@ -1889,6 +1914,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh)
char ifname[IFNAMSIZ];
struct nlattr *tb[IFLA_MAX+1];
struct nlattr *linkinfo[IFLA_INFO_MAX+1];
+ unsigned char name_assign_type = NET_NAME_USER;
int err;
#ifdef CONFIG_MODULES
@@ -1981,7 +2007,7 @@ replay:
}
if (dev) {
- int modified = 0;
+ int status = 0;
if (nlh->nlmsg_flags & NLM_F_EXCL)
return -EEXIST;
@@ -1996,7 +2022,7 @@ replay:
err = ops->changelink(dev, tb, data);
if (err < 0)
return err;
- modified = 1;
+ status |= DO_SETLINK_NOTIFY;
}
if (linkinfo[IFLA_INFO_SLAVE_DATA]) {
@@ -2007,10 +2033,10 @@ replay:
tb, slave_data);
if (err < 0)
return err;
- modified = 1;
+ status |= DO_SETLINK_NOTIFY;
}
- return do_setlink(skb, dev, ifm, tb, ifname, modified);
+ return do_setlink(skb, dev, ifm, tb, ifname, status);
}
if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
@@ -2038,14 +2064,19 @@ replay:
return -EOPNOTSUPP;
}
- if (!ifname[0])
+ if (!ops->setup)
+ return -EOPNOTSUPP;
+
+ if (!ifname[0]) {
snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
+ name_assign_type = NET_NAME_ENUM;
+ }
dest_net = rtnl_link_get_net(net, tb);
if (IS_ERR(dest_net))
return PTR_ERR(dest_net);
- dev = rtnl_create_link(dest_net, ifname, ops, tb);
+ dev = rtnl_create_link(dest_net, ifname, name_assign_type, ops, tb);
if (IS_ERR(dev)) {
err = PTR_ERR(dev);
goto out;
@@ -2380,22 +2411,20 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm,
struct net_device *dev,
const unsigned char *addr)
{
- int err = -EOPNOTSUPP;
+ int err = -EINVAL;
/* If aging addresses are supported device will need to
* implement its own handler for this.
*/
if (!(ndm->ndm_state & NUD_PERMANENT)) {
pr_info("%s: FDB only supports static addresses\n", dev->name);
- return -EINVAL;
+ return err;
}
if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
err = dev_uc_del(dev, addr);
else if (is_multicast_ether_addr(addr))
err = dev_mc_del(dev, addr);
- else
- err = -EINVAL;
return err;
}
@@ -2509,6 +2538,7 @@ skip:
int ndo_dflt_fdb_dump(struct sk_buff *skb,
struct netlink_callback *cb,
struct net_device *dev,
+ struct net_device *filter_dev,
int idx)
{
int err;
@@ -2526,28 +2556,72 @@ EXPORT_SYMBOL(ndo_dflt_fdb_dump);
static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
- int idx = 0;
- struct net *net = sock_net(skb->sk);
struct net_device *dev;
+ struct nlattr *tb[IFLA_MAX+1];
+ struct net_device *bdev = NULL;
+ struct net_device *br_dev = NULL;
+ const struct net_device_ops *ops = NULL;
+ const struct net_device_ops *cops = NULL;
+ struct ifinfomsg *ifm = nlmsg_data(cb->nlh);
+ struct net *net = sock_net(skb->sk);
+ int brport_idx = 0;
+ int br_idx = 0;
+ int idx = 0;
- rcu_read_lock();
- for_each_netdev_rcu(net, dev) {
- if (dev->priv_flags & IFF_BRIDGE_PORT) {
- struct net_device *br_dev;
- const struct net_device_ops *ops;
+ if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
+ ifla_policy) == 0) {
+ if (tb[IFLA_MASTER])
+ br_idx = nla_get_u32(tb[IFLA_MASTER]);
+ }
+
+ brport_idx = ifm->ifi_index;
+
+ if (br_idx) {
+ br_dev = __dev_get_by_index(net, br_idx);
+ if (!br_dev)
+ return -ENODEV;
- br_dev = netdev_master_upper_dev_get(dev);
- ops = br_dev->netdev_ops;
- if (ops->ndo_fdb_dump)
- idx = ops->ndo_fdb_dump(skb, cb, dev, idx);
+ ops = br_dev->netdev_ops;
+ bdev = br_dev;
+ }
+
+ for_each_netdev(net, dev) {
+ if (brport_idx && (dev->ifindex != brport_idx))
+ continue;
+
+ if (!br_idx) { /* user did not specify a specific bridge */
+ if (dev->priv_flags & IFF_BRIDGE_PORT) {
+ br_dev = netdev_master_upper_dev_get(dev);
+ cops = br_dev->netdev_ops;
+ }
+
+ bdev = dev;
+ } else {
+ if (dev != br_dev &&
+ !(dev->priv_flags & IFF_BRIDGE_PORT))
+ continue;
+
+ if (br_dev != netdev_master_upper_dev_get(dev) &&
+ !(dev->priv_flags & IFF_EBRIDGE))
+ continue;
+
+ bdev = br_dev;
+ cops = ops;
}
+ if (dev->priv_flags & IFF_BRIDGE_PORT) {
+ if (cops && cops->ndo_fdb_dump)
+ idx = cops->ndo_fdb_dump(skb, cb, br_dev, dev,
+ idx);
+ }
+
+ idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
if (dev->netdev_ops->ndo_fdb_dump)
- idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, idx);
- else
- idx = ndo_dflt_fdb_dump(skb, cb, dev, idx);
+ idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, bdev, dev,
+ idx);
+
+ cops = NULL;
}
- rcu_read_unlock();
cb->args[0] = idx;
return skb->len;
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index ba71212f0251..51dd3193a33e 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -35,7 +35,7 @@ static u32 seq_scale(u32 seq)
* overlaps less than one time per MSL (2 minutes).
* Choosing a clock of 64 ns period is OK. (period of 274 s)
*/
- return seq + (ktime_to_ns(ktime_get_real()) >> 6);
+ return seq + (ktime_get_real_ns() >> 6);
}
#endif
@@ -135,7 +135,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
md5_transform(hash, net_secret);
seq = hash[0] | (((u64)hash[1]) << 32);
- seq += ktime_to_ns(ktime_get_real());
+ seq += ktime_get_real_ns();
seq &= (1ull << 48) - 1;
return seq;
@@ -163,7 +163,7 @@ u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
md5_transform(hash, secret);
seq = hash[0] | (((u64)hash[1]) << 32);
- seq += ktime_to_ns(ktime_get_real());
+ seq += ktime_get_real_ns();
seq &= (1ull << 48) - 1;
return seq;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c1a33033cbe2..829d013745ab 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -62,6 +62,7 @@
#include <linux/scatterlist.h>
#include <linux/errqueue.h>
#include <linux/prefetch.h>
+#include <linux/if_vlan.h>
#include <net/protocol.h>
#include <net/dst.h>
@@ -256,16 +257,16 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
kmemcheck_annotate_variable(shinfo->destructor_arg);
if (flags & SKB_ALLOC_FCLONE) {
- struct sk_buff *child = skb + 1;
- atomic_t *fclone_ref = (atomic_t *) (child + 1);
+ struct sk_buff_fclones *fclones;
- kmemcheck_annotate_bitfield(child, flags1);
- kmemcheck_annotate_bitfield(child, flags2);
+ fclones = container_of(skb, struct sk_buff_fclones, skb1);
+
+ kmemcheck_annotate_bitfield(&fclones->skb2, flags1);
skb->fclone = SKB_FCLONE_ORIG;
- atomic_set(fclone_ref, 1);
+ atomic_set(&fclones->fclone_ref, 1);
- child->fclone = SKB_FCLONE_UNAVAILABLE;
- child->pfmemalloc = pfmemalloc;
+ fclones->skb2.fclone = SKB_FCLONE_FREE;
+ fclones->skb2.pfmemalloc = pfmemalloc;
}
out:
return skb;
@@ -359,18 +360,29 @@ refill:
goto end;
}
nc->frag.size = PAGE_SIZE << order;
-recycle:
- atomic_set(&nc->frag.page->_count, NETDEV_PAGECNT_MAX_BIAS);
+ /* Even if we own the page, we do not use atomic_set().
+ * This would break get_page_unless_zero() users.
+ */
+ atomic_add(NETDEV_PAGECNT_MAX_BIAS - 1,
+ &nc->frag.page->_count);
nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
nc->frag.offset = 0;
}
if (nc->frag.offset + fragsz > nc->frag.size) {
- /* avoid unnecessary locked operations if possible */
- if ((atomic_read(&nc->frag.page->_count) == nc->pagecnt_bias) ||
- atomic_sub_and_test(nc->pagecnt_bias, &nc->frag.page->_count))
- goto recycle;
- goto refill;
+ if (atomic_read(&nc->frag.page->_count) != nc->pagecnt_bias) {
+ if (!atomic_sub_and_test(nc->pagecnt_bias,
+ &nc->frag.page->_count))
+ goto refill;
+ /* OK, page count is 0, we can safely set it */
+ atomic_set(&nc->frag.page->_count,
+ NETDEV_PAGECNT_MAX_BIAS);
+ } else {
+ atomic_add(NETDEV_PAGECNT_MAX_BIAS - nc->pagecnt_bias,
+ &nc->frag.page->_count);
+ }
+ nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
+ nc->frag.offset = 0;
}
data = page_address(nc->frag.page) + nc->frag.offset;
@@ -490,32 +502,33 @@ static void skb_free_head(struct sk_buff *skb)
static void skb_release_data(struct sk_buff *skb)
{
- if (!skb->cloned ||
- !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
- &skb_shinfo(skb)->dataref)) {
- if (skb_shinfo(skb)->nr_frags) {
- int i;
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- skb_frag_unref(skb, i);
- }
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+ int i;
- /*
- * If skb buf is from userspace, we need to notify the caller
- * the lower device DMA has done;
- */
- if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
- struct ubuf_info *uarg;
+ if (skb->cloned &&
+ atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
+ &shinfo->dataref))
+ return;
- uarg = skb_shinfo(skb)->destructor_arg;
- if (uarg->callback)
- uarg->callback(uarg, true);
- }
+ for (i = 0; i < shinfo->nr_frags; i++)
+ __skb_frag_unref(&shinfo->frags[i]);
- if (skb_has_frag_list(skb))
- skb_drop_fraglist(skb);
+ /*
+ * If skb buf is from userspace, we need to notify the caller
+ * the lower device DMA has done;
+ */
+ if (shinfo->tx_flags & SKBTX_DEV_ZEROCOPY) {
+ struct ubuf_info *uarg;
- skb_free_head(skb);
+ uarg = shinfo->destructor_arg;
+ if (uarg->callback)
+ uarg->callback(uarg, true);
}
+
+ if (shinfo->frag_list)
+ kfree_skb_list(shinfo->frag_list);
+
+ skb_free_head(skb);
}
/*
@@ -523,8 +536,7 @@ static void skb_release_data(struct sk_buff *skb)
*/
static void kfree_skbmem(struct sk_buff *skb)
{
- struct sk_buff *other;
- atomic_t *fclone_ref;
+ struct sk_buff_fclones *fclones;
switch (skb->fclone) {
case SKB_FCLONE_UNAVAILABLE:
@@ -532,22 +544,28 @@ static void kfree_skbmem(struct sk_buff *skb)
break;
case SKB_FCLONE_ORIG:
- fclone_ref = (atomic_t *) (skb + 2);
- if (atomic_dec_and_test(fclone_ref))
- kmem_cache_free(skbuff_fclone_cache, skb);
+ fclones = container_of(skb, struct sk_buff_fclones, skb1);
+ if (atomic_dec_and_test(&fclones->fclone_ref))
+ kmem_cache_free(skbuff_fclone_cache, fclones);
break;
case SKB_FCLONE_CLONE:
- fclone_ref = (atomic_t *) (skb + 1);
- other = skb - 1;
+ fclones = container_of(skb, struct sk_buff_fclones, skb2);
- /* The clone portion is available for
- * fast-cloning again.
+ /* Warning : We must perform the atomic_dec_and_test() before
+ * setting skb->fclone back to SKB_FCLONE_FREE, otherwise
+ * skb_clone() could set clone_ref to 2 before our decrement.
+ * Anyway, if we are going to free the structure, no need to
+ * rewrite skb->fclone.
*/
- skb->fclone = SKB_FCLONE_UNAVAILABLE;
-
- if (atomic_dec_and_test(fclone_ref))
- kmem_cache_free(skbuff_fclone_cache, other);
+ if (atomic_dec_and_test(&fclones->fclone_ref)) {
+ kmem_cache_free(skbuff_fclone_cache, fclones);
+ } else {
+ /* The clone portion is available for
+ * fast-cloning again.
+ */
+ skb->fclone = SKB_FCLONE_FREE;
+ }
break;
}
}
@@ -565,7 +583,7 @@ static void skb_release_head_state(struct sk_buff *skb)
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
nf_conntrack_put(skb->nfct);
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
nf_bridge_put(skb->nf_bridge);
#endif
/* XXX: IS this still necessary? - JHS */
@@ -673,57 +691,61 @@ void consume_skb(struct sk_buff *skb)
}
EXPORT_SYMBOL(consume_skb);
+/* Make sure a field is enclosed inside headers_start/headers_end section */
+#define CHECK_SKB_FIELD(field) \
+ BUILD_BUG_ON(offsetof(struct sk_buff, field) < \
+ offsetof(struct sk_buff, headers_start)); \
+ BUILD_BUG_ON(offsetof(struct sk_buff, field) > \
+ offsetof(struct sk_buff, headers_end)); \
+
static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
{
new->tstamp = old->tstamp;
+ /* We do not copy old->sk */
new->dev = old->dev;
- new->transport_header = old->transport_header;
- new->network_header = old->network_header;
- new->mac_header = old->mac_header;
- new->inner_protocol = old->inner_protocol;
- new->inner_transport_header = old->inner_transport_header;
- new->inner_network_header = old->inner_network_header;
- new->inner_mac_header = old->inner_mac_header;
+ memcpy(new->cb, old->cb, sizeof(old->cb));
skb_dst_copy(new, old);
- skb_copy_hash(new, old);
- new->ooo_okay = old->ooo_okay;
- new->no_fcs = old->no_fcs;
- new->encapsulation = old->encapsulation;
- new->encap_hdr_csum = old->encap_hdr_csum;
- new->csum_valid = old->csum_valid;
- new->csum_complete_sw = old->csum_complete_sw;
#ifdef CONFIG_XFRM
new->sp = secpath_get(old->sp);
#endif
- memcpy(new->cb, old->cb, sizeof(old->cb));
- new->csum = old->csum;
- new->ignore_df = old->ignore_df;
- new->pkt_type = old->pkt_type;
- new->ip_summed = old->ip_summed;
- skb_copy_queue_mapping(new, old);
- new->priority = old->priority;
-#if IS_ENABLED(CONFIG_IP_VS)
- new->ipvs_property = old->ipvs_property;
+ __nf_copy(new, old, false);
+
+ /* Note : this field could be in headers_start/headers_end section
+ * It is not yet because we do not want to have a 16 bit hole
+ */
+ new->queue_mapping = old->queue_mapping;
+
+ memcpy(&new->headers_start, &old->headers_start,
+ offsetof(struct sk_buff, headers_end) -
+ offsetof(struct sk_buff, headers_start));
+ CHECK_SKB_FIELD(protocol);
+ CHECK_SKB_FIELD(csum);
+ CHECK_SKB_FIELD(hash);
+ CHECK_SKB_FIELD(priority);
+ CHECK_SKB_FIELD(skb_iif);
+ CHECK_SKB_FIELD(vlan_proto);
+ CHECK_SKB_FIELD(vlan_tci);
+ CHECK_SKB_FIELD(transport_header);
+ CHECK_SKB_FIELD(network_header);
+ CHECK_SKB_FIELD(mac_header);
+ CHECK_SKB_FIELD(inner_protocol);
+ CHECK_SKB_FIELD(inner_transport_header);
+ CHECK_SKB_FIELD(inner_network_header);
+ CHECK_SKB_FIELD(inner_mac_header);
+ CHECK_SKB_FIELD(mark);
+#ifdef CONFIG_NETWORK_SECMARK
+ CHECK_SKB_FIELD(secmark);
+#endif
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ CHECK_SKB_FIELD(napi_id);
#endif
- new->pfmemalloc = old->pfmemalloc;
- new->protocol = old->protocol;
- new->mark = old->mark;
- new->skb_iif = old->skb_iif;
- __nf_copy(new, old);
#ifdef CONFIG_NET_SCHED
- new->tc_index = old->tc_index;
+ CHECK_SKB_FIELD(tc_index);
#ifdef CONFIG_NET_CLS_ACT
- new->tc_verd = old->tc_verd;
+ CHECK_SKB_FIELD(tc_verd);
#endif
#endif
- new->vlan_proto = old->vlan_proto;
- new->vlan_tci = old->vlan_tci;
-
- skb_copy_secmark(new, old);
-#ifdef CONFIG_NET_RX_BUSY_POLL
- new->napi_id = old->napi_id;
-#endif
}
/*
@@ -854,17 +876,22 @@ EXPORT_SYMBOL_GPL(skb_copy_ubufs);
struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
{
- struct sk_buff *n;
+ struct sk_buff_fclones *fclones = container_of(skb,
+ struct sk_buff_fclones,
+ skb1);
+ struct sk_buff *n = &fclones->skb2;
if (skb_orphan_frags(skb, gfp_mask))
return NULL;
- n = skb + 1;
if (skb->fclone == SKB_FCLONE_ORIG &&
- n->fclone == SKB_FCLONE_UNAVAILABLE) {
- atomic_t *fclone_ref = (atomic_t *) (n + 1);
+ n->fclone == SKB_FCLONE_FREE) {
n->fclone = SKB_FCLONE_CLONE;
- atomic_inc(fclone_ref);
+ /* As our fastclone was free, clone_ref must be 1 at this point.
+ * We could use atomic_inc() here, but it is faster
+ * to set the final value.
+ */
+ atomic_set(&fclones->fclone_ref, 2);
} else {
if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC;
@@ -874,7 +901,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
return NULL;
kmemcheck_annotate_bitfield(n, flags1);
- kmemcheck_annotate_bitfield(n, flags2);
n->fclone = SKB_FCLONE_UNAVAILABLE;
}
@@ -2646,7 +2672,7 @@ EXPORT_SYMBOL(skb_prepare_seq_read);
* skb_seq_read() will return the remaining part of the block.
*
* Note 1: The size of each block of data returned can be arbitrary,
- * this limitation is the cost for zerocopy seqeuental
+ * this limitation is the cost for zerocopy sequential
* reads of potentially non linear data.
*
* Note 2: Fragment lists within fragments are not implemented
@@ -2780,7 +2806,7 @@ EXPORT_SYMBOL(skb_find_text);
/**
* skb_append_datato_frags - append the user data to a skb
* @sk: sock structure
- * @skb: skb structure to be appened with user data.
+ * @skb: skb structure to be appended with user data.
* @getfrag: call back function to be used for getting the user data
* @from: pointer to user message iov
* @length: length of the iov message
@@ -2976,9 +3002,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
tail = nskb;
__copy_skb_header(nskb, head_skb);
- nskb->mac_len = head_skb->mac_len;
skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom);
+ skb_reset_mac_len(nskb);
skb_copy_from_linear_data_offset(head_skb, -tnl_hlen,
nskb->data - tnl_hlen,
@@ -3068,6 +3094,11 @@ perform_csum_check:
}
} while ((offset += len) < head_skb->len);
+ /* Some callers want to get the end of the list.
+ * Put it in segs->prev to avoid walking the list.
+ * (see validate_xmit_skb_list() for example)
+ */
+ segs->prev = tail;
return segs;
err:
@@ -3151,6 +3182,9 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;
goto done;
}
+ /* switch back to head shinfo */
+ pinfo = skb_shinfo(p);
+
if (pinfo->frag_list)
goto merge;
if (skb_gro_len(p) != pinfo->gso_size)
@@ -3178,7 +3212,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
skb_shinfo(nskb)->frag_list = p;
skb_shinfo(nskb)->gso_size = pinfo->gso_size;
pinfo->gso_size = 0;
- skb_header_release(p);
+ __skb_header_release(p);
NAPI_GRO_CB(nskb)->last = p;
nskb->data_len += p->len;
@@ -3210,7 +3244,7 @@ merge:
else
NAPI_GRO_CB(p)->last->next = skb;
NAPI_GRO_CB(p)->last = skb;
- skb_header_release(skb);
+ __skb_header_release(skb);
lp = p;
done:
@@ -3226,7 +3260,6 @@ done:
NAPI_GRO_CB(skb)->same_flow = 1;
return 0;
}
-EXPORT_SYMBOL_GPL(skb_gro_receive);
void __init skb_init(void)
{
@@ -3236,8 +3269,7 @@ void __init skb_init(void)
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL);
skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
- (2*sizeof(struct sk_buff)) +
- sizeof(atomic_t),
+ sizeof(struct sk_buff_fclones),
0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL);
@@ -3490,43 +3522,127 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(sock_queue_err_skb);
-void skb_tstamp_tx(struct sk_buff *orig_skb,
- struct skb_shared_hwtstamps *hwtstamps)
+struct sk_buff *sock_dequeue_err_skb(struct sock *sk)
{
- struct sock *sk = orig_skb->sk;
- struct sock_exterr_skb *serr;
- struct sk_buff *skb;
- int err;
+ struct sk_buff_head *q = &sk->sk_error_queue;
+ struct sk_buff *skb, *skb_next;
+ int err = 0;
- if (!sk)
- return;
+ spin_lock_bh(&q->lock);
+ skb = __skb_dequeue(q);
+ if (skb && (skb_next = skb_peek(q)))
+ err = SKB_EXT_ERR(skb_next)->ee.ee_errno;
+ spin_unlock_bh(&q->lock);
- if (hwtstamps) {
- *skb_hwtstamps(orig_skb) =
- *hwtstamps;
- } else {
- /*
- * no hardware time stamps available,
- * so keep the shared tx_flags and only
- * store software time stamp
- */
- orig_skb->tstamp = ktime_get_real();
+ sk->sk_err = err;
+ if (err)
+ sk->sk_error_report(sk);
+
+ return skb;
+}
+EXPORT_SYMBOL(sock_dequeue_err_skb);
+
+/**
+ * skb_clone_sk - create clone of skb, and take reference to socket
+ * @skb: the skb to clone
+ *
+ * This function creates a clone of a buffer that holds a reference on
+ * sk_refcnt. Buffers created via this function are meant to be
+ * returned using sock_queue_err_skb, or free via kfree_skb.
+ *
+ * When passing buffers allocated with this function to sock_queue_err_skb
+ * it is necessary to wrap the call with sock_hold/sock_put in order to
+ * prevent the socket from being released prior to being enqueued on
+ * the sk_error_queue.
+ */
+struct sk_buff *skb_clone_sk(struct sk_buff *skb)
+{
+ struct sock *sk = skb->sk;
+ struct sk_buff *clone;
+
+ if (!sk || !atomic_inc_not_zero(&sk->sk_refcnt))
+ return NULL;
+
+ clone = skb_clone(skb, GFP_ATOMIC);
+ if (!clone) {
+ sock_put(sk);
+ return NULL;
}
- skb = skb_clone(orig_skb, GFP_ATOMIC);
- if (!skb)
- return;
+ clone->sk = sk;
+ clone->destructor = sock_efree;
+
+ return clone;
+}
+EXPORT_SYMBOL(skb_clone_sk);
+
+static void __skb_complete_tx_timestamp(struct sk_buff *skb,
+ struct sock *sk,
+ int tstype)
+{
+ struct sock_exterr_skb *serr;
+ int err;
serr = SKB_EXT_ERR(skb);
memset(serr, 0, sizeof(*serr));
serr->ee.ee_errno = ENOMSG;
serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
+ serr->ee.ee_info = tstype;
+ if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
+ serr->ee.ee_data = skb_shinfo(skb)->tskey;
+ if (sk->sk_protocol == IPPROTO_TCP)
+ serr->ee.ee_data -= sk->sk_tskey;
+ }
err = sock_queue_err_skb(sk, skb);
if (err)
kfree_skb(skb);
}
+
+void skb_complete_tx_timestamp(struct sk_buff *skb,
+ struct skb_shared_hwtstamps *hwtstamps)
+{
+ struct sock *sk = skb->sk;
+
+ /* take a reference to prevent skb_orphan() from freeing the socket */
+ sock_hold(sk);
+
+ *skb_hwtstamps(skb) = *hwtstamps;
+ __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
+
+ sock_put(sk);
+}
+EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
+
+void __skb_tstamp_tx(struct sk_buff *orig_skb,
+ struct skb_shared_hwtstamps *hwtstamps,
+ struct sock *sk, int tstype)
+{
+ struct sk_buff *skb;
+
+ if (!sk)
+ return;
+
+ if (hwtstamps)
+ *skb_hwtstamps(orig_skb) = *hwtstamps;
+ else
+ orig_skb->tstamp = ktime_get_real();
+
+ skb = skb_clone(orig_skb, GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ __skb_complete_tx_timestamp(skb, sk, tstype);
+}
+EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
+
+void skb_tstamp_tx(struct sk_buff *orig_skb,
+ struct skb_shared_hwtstamps *hwtstamps)
+{
+ return __skb_tstamp_tx(orig_skb, hwtstamps, orig_skb->sk,
+ SCM_TSTAMP_SND);
+}
EXPORT_SYMBOL_GPL(skb_tstamp_tx);
void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
@@ -3543,9 +3659,14 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
serr->ee.ee_errno = ENOMSG;
serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS;
+ /* take a reference to prevent skb_orphan() from freeing the socket */
+ sock_hold(sk);
+
err = sock_queue_err_skb(sk, skb);
if (err)
kfree_skb(skb);
+
+ sock_put(sk);
}
EXPORT_SYMBOL_GPL(skb_complete_wifi_ack);
@@ -3846,7 +3967,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
return false;
if (len <= skb_tailroom(to)) {
- BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
+ if (len)
+ BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
*delta_truesize = 0;
return true;
}
@@ -3959,3 +4081,133 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
return shinfo->gso_size;
}
EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
+
+static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
+{
+ if (skb_cow(skb, skb_headroom(skb)) < 0) {
+ kfree_skb(skb);
+ return NULL;
+ }
+
+ memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
+ skb->mac_header += VLAN_HLEN;
+ return skb;
+}
+
+struct sk_buff *skb_vlan_untag(struct sk_buff *skb)
+{
+ struct vlan_hdr *vhdr;
+ u16 vlan_tci;
+
+ if (unlikely(vlan_tx_tag_present(skb))) {
+ /* vlan_tci is already set-up so leave this for another time */
+ return skb;
+ }
+
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (unlikely(!skb))
+ goto err_free;
+
+ if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
+ goto err_free;
+
+ vhdr = (struct vlan_hdr *)skb->data;
+ vlan_tci = ntohs(vhdr->h_vlan_TCI);
+ __vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci);
+
+ skb_pull_rcsum(skb, VLAN_HLEN);
+ vlan_set_encap_proto(skb, vhdr);
+
+ skb = skb_reorder_vlan_header(skb);
+ if (unlikely(!skb))
+ goto err_free;
+
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ skb_reset_mac_len(skb);
+
+ return skb;
+
+err_free:
+ kfree_skb(skb);
+ return NULL;
+}
+EXPORT_SYMBOL(skb_vlan_untag);
+
+/**
+ * alloc_skb_with_frags - allocate skb with page frags
+ *
+ * @header_len: size of linear part
+ * @data_len: needed length in frags
+ * @max_page_order: max page order desired.
+ * @errcode: pointer to error code if any
+ * @gfp_mask: allocation mask
+ *
+ * This can be used to allocate a paged skb, given a maximal order for frags.
+ */
+struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
+ unsigned long data_len,
+ int max_page_order,
+ int *errcode,
+ gfp_t gfp_mask)
+{
+ int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+ unsigned long chunk;
+ struct sk_buff *skb;
+ struct page *page;
+ gfp_t gfp_head;
+ int i;
+
+ *errcode = -EMSGSIZE;
+ /* Note this test could be relaxed, if we succeed to allocate
+ * high order pages...
+ */
+ if (npages > MAX_SKB_FRAGS)
+ return NULL;
+
+ gfp_head = gfp_mask;
+ if (gfp_head & __GFP_WAIT)
+ gfp_head |= __GFP_REPEAT;
+
+ *errcode = -ENOBUFS;
+ skb = alloc_skb(header_len, gfp_head);
+ if (!skb)
+ return NULL;
+
+ skb->truesize += npages << PAGE_SHIFT;
+
+ for (i = 0; npages > 0; i++) {
+ int order = max_page_order;
+
+ while (order) {
+ if (npages >= 1 << order) {
+ page = alloc_pages(gfp_mask |
+ __GFP_COMP |
+ __GFP_NOWARN |
+ __GFP_NORETRY,
+ order);
+ if (page)
+ goto fill_page;
+ /* Do not retry other high order allocations */
+ order = 1;
+ max_page_order = 0;
+ }
+ order--;
+ }
+ page = alloc_page(gfp_mask);
+ if (!page)
+ goto failure;
+fill_page:
+ chunk = min_t(unsigned long, data_len,
+ PAGE_SIZE << order);
+ skb_fill_page_desc(skb, i, page, 0, chunk);
+ data_len -= chunk;
+ npages -= 1 << order;
+ }
+ return skb;
+
+failure:
+ kfree_skb(skb);
+ return NULL;
+}
+EXPORT_SYMBOL(alloc_skb_with_frags);
diff --git a/net/core/sock.c b/net/core/sock.c
index 026e01f70274..b4f3ea2fce60 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -166,7 +166,7 @@ EXPORT_SYMBOL(sk_ns_capable);
/**
* sk_capable - Socket global capability test
* @sk: Socket to use a capability on or through
- * @cap: The global capbility to use
+ * @cap: The global capability to use
*
* Test to see if the opener of the socket had when the socket was
* created and the current process has the capability @cap in all user
@@ -183,7 +183,7 @@ EXPORT_SYMBOL(sk_capable);
* @sk: Socket to use a capability on or through
* @cap: The capability to use
*
- * Test to see if the opener of the socket had when the socke was created
+ * Test to see if the opener of the socket had when the socket was created
* and the current process has the capability @cap over the network namespace
* the socket is a member of.
*/
@@ -437,7 +437,6 @@ static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
int err;
- int skb_len;
unsigned long flags;
struct sk_buff_head *list = &sk->sk_receive_queue;
@@ -459,13 +458,6 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
skb->dev = NULL;
skb_set_owner_r(skb, sk);
- /* Cache the SKB length before we tack it onto the receive
- * queue. Once it is added it no longer belongs to us and
- * may be freed by other threads of control pulling packets
- * from the queue.
- */
- skb_len = skb->len;
-
/* we escape from rcu protected region, make sure we dont leak
* a norefcounted dst
*/
@@ -491,7 +483,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
skb->dev = NULL;
- if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) {
+ if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
atomic_inc(&sk->sk_drops);
goto discard_and_relse;
}
@@ -848,24 +840,25 @@ set_rcvbuf:
ret = -EINVAL;
break;
}
- sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE,
- val & SOF_TIMESTAMPING_TX_HARDWARE);
- sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE,
- val & SOF_TIMESTAMPING_TX_SOFTWARE);
- sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE,
- val & SOF_TIMESTAMPING_RX_HARDWARE);
+ if (val & SOF_TIMESTAMPING_OPT_ID &&
+ !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
+ if (sk->sk_protocol == IPPROTO_TCP) {
+ if (sk->sk_state != TCP_ESTABLISHED) {
+ ret = -EINVAL;
+ break;
+ }
+ sk->sk_tskey = tcp_sk(sk)->snd_una;
+ } else {
+ sk->sk_tskey = 0;
+ }
+ }
+ sk->sk_tsflags = val;
if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
sock_enable_timestamp(sk,
SOCK_TIMESTAMPING_RX_SOFTWARE);
else
sock_disable_timestamp(sk,
(1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
- sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
- val & SOF_TIMESTAMPING_SOFTWARE);
- sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE,
- val & SOF_TIMESTAMPING_SYS_HARDWARE);
- sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE,
- val & SOF_TIMESTAMPING_RAW_HARDWARE);
break;
case SO_RCVLOWAT:
@@ -1091,21 +1084,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
break;
case SO_TIMESTAMPING:
- v.val = 0;
- if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
- v.val |= SOF_TIMESTAMPING_TX_HARDWARE;
- if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
- v.val |= SOF_TIMESTAMPING_TX_SOFTWARE;
- if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE))
- v.val |= SOF_TIMESTAMPING_RX_HARDWARE;
- if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE))
- v.val |= SOF_TIMESTAMPING_RX_SOFTWARE;
- if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE))
- v.val |= SOF_TIMESTAMPING_SOFTWARE;
- if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE))
- v.val |= SOF_TIMESTAMPING_SYS_HARDWARE;
- if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE))
- v.val |= SOF_TIMESTAMPING_RAW_HARDWARE;
+ v.val = sk->sk_tsflags;
break;
case SO_RCVTIMEO:
@@ -1478,6 +1457,7 @@ static void sk_update_clone(const struct sock *sk, struct sock *newsk)
struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
{
struct sock *newsk;
+ bool is_charged = true;
newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
if (newsk != NULL) {
@@ -1501,9 +1481,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
atomic_set(&newsk->sk_omem_alloc, 0);
skb_queue_head_init(&newsk->sk_receive_queue);
skb_queue_head_init(&newsk->sk_write_queue);
-#ifdef CONFIG_NET_DMA
- skb_queue_head_init(&newsk->sk_async_wait_queue);
-#endif
spin_lock_init(&newsk->sk_dst_lock);
rwlock_init(&newsk->sk_callback_lock);
@@ -1522,9 +1499,13 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
filter = rcu_dereference_protected(newsk->sk_filter, 1);
if (filter != NULL)
- sk_filter_charge(newsk, filter);
+ /* though it's an empty new sock, the charging may fail
+ * if sysctl_optmem_max was changed between creation of
+ * original socket and cloning
+ */
+ is_charged = sk_filter_charge(newsk, filter);
- if (unlikely(xfrm_sk_clone_policy(newsk))) {
+ if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk))) {
/* It is still raw copy of parent, so invalidate
* destructor and make plain sk_free() */
newsk->sk_destruct = NULL;
@@ -1653,18 +1634,24 @@ void sock_rfree(struct sk_buff *skb)
}
EXPORT_SYMBOL(sock_rfree);
+void sock_efree(struct sk_buff *skb)
+{
+ sock_put(skb->sk);
+}
+EXPORT_SYMBOL(sock_efree);
+
+#ifdef CONFIG_INET
void sock_edemux(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
-#ifdef CONFIG_INET
if (sk->sk_state == TCP_TIME_WAIT)
inet_twsk_put(inet_twsk(sk));
else
-#endif
sock_put(sk);
}
EXPORT_SYMBOL(sock_edemux);
+#endif
kuid_t sock_i_uid(struct sock *sk)
{
@@ -1772,21 +1759,12 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
unsigned long data_len, int noblock,
int *errcode, int max_page_order)
{
- struct sk_buff *skb = NULL;
- unsigned long chunk;
- gfp_t gfp_mask;
+ struct sk_buff *skb;
long timeo;
int err;
- int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
- struct page *page;
- int i;
-
- err = -EMSGSIZE;
- if (npages > MAX_SKB_FRAGS)
- goto failure;
timeo = sock_sndtimeo(sk, noblock);
- while (!skb) {
+ for (;;) {
err = sock_error(sk);
if (err != 0)
goto failure;
@@ -1795,63 +1773,27 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
if (sk->sk_shutdown & SEND_SHUTDOWN)
goto failure;
- if (atomic_read(&sk->sk_wmem_alloc) >= sk->sk_sndbuf) {
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- err = -EAGAIN;
- if (!timeo)
- goto failure;
- if (signal_pending(current))
- goto interrupted;
- timeo = sock_wait_for_wmem(sk, timeo);
- continue;
- }
-
- err = -ENOBUFS;
- gfp_mask = sk->sk_allocation;
- if (gfp_mask & __GFP_WAIT)
- gfp_mask |= __GFP_REPEAT;
+ if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
+ break;
- skb = alloc_skb(header_len, gfp_mask);
- if (!skb)
+ set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ err = -EAGAIN;
+ if (!timeo)
goto failure;
-
- skb->truesize += data_len;
-
- for (i = 0; npages > 0; i++) {
- int order = max_page_order;
-
- while (order) {
- if (npages >= 1 << order) {
- page = alloc_pages(sk->sk_allocation |
- __GFP_COMP |
- __GFP_NOWARN |
- __GFP_NORETRY,
- order);
- if (page)
- goto fill_page;
- }
- order--;
- }
- page = alloc_page(sk->sk_allocation);
- if (!page)
- goto failure;
-fill_page:
- chunk = min_t(unsigned long, data_len,
- PAGE_SIZE << order);
- skb_fill_page_desc(skb, i, page, 0, chunk);
- data_len -= chunk;
- npages -= 1 << order;
- }
+ if (signal_pending(current))
+ goto interrupted;
+ timeo = sock_wait_for_wmem(sk, timeo);
}
-
- skb_set_owner_w(skb, sk);
+ skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
+ errcode, sk->sk_allocation);
+ if (skb)
+ skb_set_owner_w(skb, sk);
return skb;
interrupted:
err = sock_intr_errno(timeo);
failure:
- kfree_skb(skb);
*errcode = err;
return NULL;
}
@@ -1871,16 +1813,14 @@ EXPORT_SYMBOL(sock_alloc_send_skb);
* skb_page_frag_refill - check that a page_frag contains enough room
* @sz: minimum size of the fragment we want to get
* @pfrag: pointer to page_frag
- * @prio: priority for memory allocation
+ * @gfp: priority for memory allocation
*
* Note: While this allocator tries to use high order pages, there is
* no guarantee that allocations succeed. Therefore, @sz MUST be
* less or equal than PAGE_SIZE.
*/
-bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio)
+bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
{
- int order;
-
if (pfrag->page) {
if (atomic_read(&pfrag->page->_count) == 1) {
pfrag->offset = 0;
@@ -1891,20 +1831,21 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio)
put_page(pfrag->page);
}
- order = SKB_FRAG_PAGE_ORDER;
- do {
- gfp_t gfp = prio;
-
- if (order)
- gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY;
- pfrag->page = alloc_pages(gfp, order);
+ pfrag->offset = 0;
+ if (SKB_FRAG_PAGE_ORDER) {
+ pfrag->page = alloc_pages(gfp | __GFP_COMP |
+ __GFP_NOWARN | __GFP_NORETRY,
+ SKB_FRAG_PAGE_ORDER);
if (likely(pfrag->page)) {
- pfrag->offset = 0;
- pfrag->size = PAGE_SIZE << order;
+ pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
return true;
}
- } while (--order >= 0);
-
+ }
+ pfrag->page = alloc_page(gfp);
+ if (likely(pfrag->page)) {
+ pfrag->size = PAGE_SIZE;
+ return true;
+ }
return false;
}
EXPORT_SYMBOL(skb_page_frag_refill);
@@ -2314,9 +2255,6 @@ void sock_init_data(struct socket *sock, struct sock *sk)
skb_queue_head_init(&sk->sk_receive_queue);
skb_queue_head_init(&sk->sk_write_queue);
skb_queue_head_init(&sk->sk_error_queue);
-#ifdef CONFIG_NET_DMA
- skb_queue_head_init(&sk->sk_async_wait_queue);
-#endif
sk->sk_send_head = NULL;
@@ -2504,11 +2442,11 @@ int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
int level, int type)
{
struct sock_exterr_skb *serr;
- struct sk_buff *skb, *skb2;
+ struct sk_buff *skb;
int copied, err;
err = -EAGAIN;
- skb = skb_dequeue(&sk->sk_error_queue);
+ skb = sock_dequeue_err_skb(sk);
if (skb == NULL)
goto out;
@@ -2529,16 +2467,6 @@ int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
msg->msg_flags |= MSG_ERRQUEUE;
err = copied;
- /* Reset and regenerate socket error */
- spin_lock_bh(&sk->sk_error_queue.lock);
- sk->sk_err = 0;
- if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
- sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
- spin_unlock_bh(&sk->sk_error_queue.lock);
- sk->sk_error_report(sk);
- } else
- spin_unlock_bh(&sk->sk_error_queue.lock);
-
out_free_skb:
kfree_skb(skb);
out:
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index a4216a4c9572..ad704c757bb4 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -68,8 +68,8 @@ int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk,
if (!filter)
goto out;
- fprog = filter->orig_prog;
- flen = sk_filter_proglen(fprog);
+ fprog = filter->prog->orig_prog;
+ flen = bpf_classic_proglen(fprog);
attr = nla_reserve(skb, attrtype, flen);
if (attr == NULL) {
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 6521dfd8b7c8..43d3dd62fcc8 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -36,71 +36,25 @@ void skb_clone_tx_timestamp(struct sk_buff *skb)
{
struct phy_device *phydev;
struct sk_buff *clone;
- struct sock *sk = skb->sk;
unsigned int type;
- if (!sk)
+ if (!skb->sk)
return;
type = classify(skb);
+ if (type == PTP_CLASS_NONE)
+ return;
- switch (type) {
- case PTP_CLASS_V1_IPV4:
- case PTP_CLASS_V1_IPV6:
- case PTP_CLASS_V2_IPV4:
- case PTP_CLASS_V2_IPV6:
- case PTP_CLASS_V2_L2:
- case PTP_CLASS_V2_VLAN:
- phydev = skb->dev->phydev;
- if (likely(phydev->drv->txtstamp)) {
- if (!atomic_inc_not_zero(&sk->sk_refcnt))
- return;
-
- clone = skb_clone(skb, GFP_ATOMIC);
- if (!clone) {
- sock_put(sk);
- return;
- }
-
- clone->sk = sk;
- phydev->drv->txtstamp(phydev, clone, type);
- }
- break;
- default:
- break;
+ phydev = skb->dev->phydev;
+ if (likely(phydev->drv->txtstamp)) {
+ clone = skb_clone_sk(skb);
+ if (!clone)
+ return;
+ phydev->drv->txtstamp(phydev, clone, type);
}
}
EXPORT_SYMBOL_GPL(skb_clone_tx_timestamp);
-void skb_complete_tx_timestamp(struct sk_buff *skb,
- struct skb_shared_hwtstamps *hwtstamps)
-{
- struct sock *sk = skb->sk;
- struct sock_exterr_skb *serr;
- int err;
-
- if (!hwtstamps) {
- sock_put(sk);
- kfree_skb(skb);
- return;
- }
-
- *skb_hwtstamps(skb) = *hwtstamps;
-
- serr = SKB_EXT_ERR(skb);
- memset(serr, 0, sizeof(*serr));
- serr->ee.ee_errno = ENOMSG;
- serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
- skb->sk = NULL;
-
- err = sock_queue_err_skb(sk, skb);
-
- sock_put(sk);
- if (err)
- kfree_skb(skb);
-}
-EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
-
bool skb_defer_rx_timestamp(struct sk_buff *skb)
{
struct phy_device *phydev;
@@ -114,20 +68,12 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb)
__skb_pull(skb, ETH_HLEN);
- switch (type) {
- case PTP_CLASS_V1_IPV4:
- case PTP_CLASS_V1_IPV6:
- case PTP_CLASS_V2_IPV4:
- case PTP_CLASS_V2_IPV6:
- case PTP_CLASS_V2_L2:
- case PTP_CLASS_V2_VLAN:
- phydev = skb->dev->phydev;
- if (likely(phydev->drv->rxtstamp))
- return phydev->drv->rxtstamp(phydev, skb, type);
- break;
- default:
- break;
- }
+ if (type == PTP_CLASS_NONE)
+ return false;
+
+ phydev = skb->dev->phydev;
+ if (likely(phydev->drv->rxtstamp))
+ return phydev->drv->rxtstamp(phydev, skb, type);
return false;
}
diff --git a/net/core/user_dma.c b/net/core/user_dma.c
deleted file mode 100644
index 1b5fefdb8198..000000000000
--- a/net/core/user_dma.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
- * Portions based on net/core/datagram.c and copyrighted by their authors.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * The full GNU General Public License is included in this distribution in the
- * file called COPYING.
- */
-
-/*
- * This code allows the net stack to make use of a DMA engine for
- * skb to iovec copies.
- */
-
-#include <linux/dmaengine.h>
-#include <linux/socket.h>
-#include <linux/export.h>
-#include <net/tcp.h>
-#include <net/netdma.h>
-
-#define NET_DMA_DEFAULT_COPYBREAK 4096
-
-int sysctl_tcp_dma_copybreak = NET_DMA_DEFAULT_COPYBREAK;
-EXPORT_SYMBOL(sysctl_tcp_dma_copybreak);
-
-/**
- * dma_skb_copy_datagram_iovec - Copy a datagram to an iovec.
- * @skb - buffer to copy
- * @offset - offset in the buffer to start copying from
- * @iovec - io vector to copy to
- * @len - amount of data to copy from buffer to iovec
- * @pinned_list - locked iovec buffer data
- *
- * Note: the iovec is modified during the copy.
- */
-int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
- struct sk_buff *skb, int offset, struct iovec *to,
- size_t len, struct dma_pinned_list *pinned_list)
-{
- int start = skb_headlen(skb);
- int i, copy = start - offset;
- struct sk_buff *frag_iter;
- dma_cookie_t cookie = 0;
-
- /* Copy header. */
- if (copy > 0) {
- if (copy > len)
- copy = len;
- cookie = dma_memcpy_to_iovec(chan, to, pinned_list,
- skb->data + offset, copy);
- if (cookie < 0)
- goto fault;
- len -= copy;
- if (len == 0)
- goto end;
- offset += copy;
- }
-
- /* Copy paged appendix. Hmm... why does this look so complicated? */
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
- int end;
- const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-
- WARN_ON(start > offset + len);
-
- end = start + skb_frag_size(frag);
- copy = end - offset;
- if (copy > 0) {
- struct page *page = skb_frag_page(frag);
-
- if (copy > len)
- copy = len;
-
- cookie = dma_memcpy_pg_to_iovec(chan, to, pinned_list, page,
- frag->page_offset + offset - start, copy);
- if (cookie < 0)
- goto fault;
- len -= copy;
- if (len == 0)
- goto end;
- offset += copy;
- }
- start = end;
- }
-
- skb_walk_frags(skb, frag_iter) {
- int end;
-
- WARN_ON(start > offset + len);
-
- end = start + frag_iter->len;
- copy = end - offset;
- if (copy > 0) {
- if (copy > len)
- copy = len;
- cookie = dma_skb_copy_datagram_iovec(chan, frag_iter,
- offset - start,
- to, copy,
- pinned_list);
- if (cookie < 0)
- goto fault;
- len -= copy;
- if (len == 0)
- goto end;
- offset += copy;
- }
- start = end;
- }
-
-end:
- if (!len) {
- skb->dma_cookie = cookie;
- return cookie;
- }
-
-fault:
- return -EFAULT;
-}
diff --git a/net/core/utils.c b/net/core/utils.c
index eed34338736c..efc76dd9dcd1 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -306,16 +306,14 @@ EXPORT_SYMBOL(in6_pton);
void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
__be32 from, __be32 to, int pseudohdr)
{
- __be32 diff[] = { ~from, to };
if (skb->ip_summed != CHECKSUM_PARTIAL) {
- *sum = csum_fold(csum_partial(diff, sizeof(diff),
- ~csum_unfold(*sum)));
+ *sum = csum_fold(csum_add(csum_sub(~csum_unfold(*sum), from),
+ to));
if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
- skb->csum = ~csum_partial(diff, sizeof(diff),
- ~skb->csum);
+ skb->csum = ~csum_add(csum_sub(~(skb->csum), from), to);
} else if (pseudohdr)
- *sum = ~csum_fold(csum_partial(diff, sizeof(diff),
- csum_unfold(*sum)));
+ *sum = ~csum_fold(csum_add(csum_sub(csum_unfold(*sum), from),
+ to));
}
EXPORT_SYMBOL(inet_proto_csum_replace4);
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index f8b98d89c285..ca11d283bbeb 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -471,7 +471,11 @@ static int dcbnl_getapp(struct net_device *netdev, struct nlmsghdr *nlh,
id = nla_get_u16(app_tb[DCB_APP_ATTR_ID]);
if (netdev->dcbnl_ops->getapp) {
- up = netdev->dcbnl_ops->getapp(netdev, idtype, id);
+ ret = netdev->dcbnl_ops->getapp(netdev, idtype, id);
+ if (ret < 0)
+ return ret;
+ else
+ up = ret;
} else {
struct dcb_app app = {
.selector = idtype,
@@ -538,6 +542,8 @@ static int dcbnl_setapp(struct net_device *netdev, struct nlmsghdr *nlh,
if (netdev->dcbnl_ops->setapp) {
ret = netdev->dcbnl_ops->setapp(netdev, idtype, id, up);
+ if (ret < 0)
+ return ret;
} else {
struct dcb_app app;
app.selector = idtype;
@@ -1770,7 +1776,7 @@ EXPORT_SYMBOL(dcb_getapp);
*
* Priority 0 is an invalid priority in CEE spec. This routine
* removes applications from the app list if the priority is
- * set to zero.
+ * set to zero. Priority is expected to be 8-bit 802.1p user priority bitmap
*/
int dcb_setapp(struct net_device *dev, struct dcb_app *new)
{
@@ -1831,7 +1837,8 @@ EXPORT_SYMBOL(dcb_ieee_getapp_mask);
*
* This adds Application data to the list. Multiple application
* entries may exists for the same selector and protocol as long
- * as the priorities are different.
+ * as the priorities are different. Priority is expected to be a
+ * 3-bit unsigned integer
*/
int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new)
{
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index 597557254ddb..83498975165f 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -99,7 +99,7 @@ static void ccid_kmem_cache_destroy(struct kmem_cache *slab)
kmem_cache_destroy(slab);
}
-static int ccid_activate(struct ccid_operations *ccid_ops)
+static int __init ccid_activate(struct ccid_operations *ccid_ops)
{
int err = -ENOBUFS;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 4db3c2a1679c..ad2acfe1ca61 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -386,7 +386,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
goto drop;
- req = inet6_reqsk_alloc(&dccp6_request_sock_ops);
+ req = inet_reqsk_alloc(&dccp6_request_sock_ops);
if (req == NULL)
goto drop;
@@ -404,7 +404,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
- if (ipv6_opt_accepted(sk, skb) ||
+ if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
atomic_inc(&skb->users);
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index c69eb9c4fbb8..b50dc436db1f 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -55,11 +55,9 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == PF_INET6) {
- const struct ipv6_pinfo *np = inet6_sk(sk);
-
tw->tw_v6_daddr = sk->sk_v6_daddr;
tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
- tw->tw_ipv6only = np->ipv6only;
+ tw->tw_ipv6only = sk->sk_ipv6only;
}
#endif
/* Linkage updates. */
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index de2c1e719305..5ab6627cf370 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -848,7 +848,7 @@ int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
default:
dccp_pr_debug("packet_type=%s\n",
dccp_packet_name(dh->dccph_type));
- sk_eat_skb(sk, skb, false);
+ sk_eat_skb(sk, skb);
}
verify_sock_status:
if (sock_flag(sk, SOCK_DONE)) {
@@ -905,7 +905,7 @@ verify_sock_status:
len = skb->len;
found_fin_ok:
if (!(flags & MSG_PEEK))
- sk_eat_skb(sk, skb, false);
+ sk_eat_skb(sk, skb);
break;
} while (1);
out:
@@ -1082,7 +1082,7 @@ void dccp_shutdown(struct sock *sk, int how)
EXPORT_SYMBOL_GPL(dccp_shutdown);
-static inline int dccp_mib_init(void)
+static inline int __init dccp_mib_init(void)
{
dccp_statistics = alloc_percpu(struct dccp_mib);
if (!dccp_statistics)
@@ -1115,7 +1115,7 @@ static int __init dccp_init(void)
BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
FIELD_SIZEOF(struct sk_buff, cb));
- rc = percpu_counter_init(&dccp_orphan_count, 0);
+ rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
if (rc)
goto out_fail;
rc = -ENOBUFS;
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index ae011b46c071..25733d538147 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -127,6 +127,7 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat
#include <linux/stat.h>
#include <linux/init.h>
#include <linux/poll.h>
+#include <linux/jiffies.h>
#include <net/net_namespace.h>
#include <net/neighbour.h>
#include <net/dst.h>
@@ -598,7 +599,7 @@ int dn_destroy_timer(struct sock *sk)
if (sk->sk_socket)
return 0;
- if ((jiffies - scp->stamp) >= (HZ * decnet_time_wait)) {
+ if (time_after_eq(jiffies, scp->stamp + HZ * decnet_time_wait)) {
dn_unhash_sock(sk);
sock_put(sk);
return 1;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 3b726f31c64c..4400da7739da 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -41,6 +41,7 @@
#include <linux/sysctl.h>
#include <linux/notifier.h>
#include <linux/slab.h>
+#include <linux/jiffies.h>
#include <asm/uaccess.h>
#include <net/net_namespace.h>
#include <net/neighbour.h>
@@ -875,7 +876,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa)
static int dn_am_i_a_router(struct dn_neigh *dn, struct dn_dev *dn_db, struct dn_ifaddr *ifa)
{
/* First check time since device went up */
- if ((jiffies - dn_db->uptime) < DRDELAY)
+ if (time_before(jiffies, dn_db->uptime + DRDELAY))
return 0;
/* If there is no router, then yes... */
diff --git a/net/decnet/dn_timer.c b/net/decnet/dn_timer.c
index d9c150cc59a9..1d330fd43dc7 100644
--- a/net/decnet/dn_timer.c
+++ b/net/decnet/dn_timer.c
@@ -23,6 +23,7 @@
#include <linux/spinlock.h>
#include <net/sock.h>
#include <linux/atomic.h>
+#include <linux/jiffies.h>
#include <net/flow.h>
#include <net/dn.h>
@@ -91,7 +92,7 @@ static void dn_slow_timer(unsigned long arg)
* since the last successful transmission.
*/
if (scp->keepalive && scp->keepalive_fxn && (scp->state == DN_RUN)) {
- if ((jiffies - scp->stamp) >= scp->keepalive)
+ if (time_after_eq(jiffies, scp->stamp + scp->keepalive))
scp->keepalive_fxn(sk);
}
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index f5eede1d6cb8..a585fd6352eb 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -12,6 +12,9 @@ config NET_DSA
if NET_DSA
# tagging formats
+config NET_DSA_TAG_BRCM
+ bool
+
config NET_DSA_TAG_DSA
bool
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 7b9fcbbeda5d..da06ed1df620 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -3,6 +3,7 @@ obj-$(CONFIG_NET_DSA) += dsa_core.o
dsa_core-y += dsa.o slave.o
# tagging formats
+dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 5db37cef50a9..22f34cf4cb27 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -10,7 +10,6 @@
*/
#include <linux/list.h>
-#include <linux/netdevice.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/module.h>
@@ -44,7 +43,7 @@ void unregister_switch_driver(struct dsa_switch_driver *drv)
EXPORT_SYMBOL_GPL(unregister_switch_driver);
static struct dsa_switch_driver *
-dsa_switch_probe(struct mii_bus *bus, int sw_addr, char **_name)
+dsa_switch_probe(struct device *host_dev, int sw_addr, char **_name)
{
struct dsa_switch_driver *ret;
struct list_head *list;
@@ -59,7 +58,7 @@ dsa_switch_probe(struct mii_bus *bus, int sw_addr, char **_name)
drv = list_entry(list, struct dsa_switch_driver, list);
- name = drv->probe(bus, sw_addr);
+ name = drv->probe(host_dev, sw_addr);
if (name != NULL) {
ret = drv;
break;
@@ -76,7 +75,7 @@ dsa_switch_probe(struct mii_bus *bus, int sw_addr, char **_name)
/* basic switch operations **************************************************/
static struct dsa_switch *
dsa_switch_setup(struct dsa_switch_tree *dst, int index,
- struct device *parent, struct mii_bus *bus)
+ struct device *parent, struct device *host_dev)
{
struct dsa_chip_data *pd = dst->pd->chip + index;
struct dsa_switch_driver *drv;
@@ -89,7 +88,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
/*
* Probe for switch model.
*/
- drv = dsa_switch_probe(bus, pd->sw_addr, &name);
+ drv = dsa_switch_probe(host_dev, pd->sw_addr, &name);
if (drv == NULL) {
printk(KERN_ERR "%s[%d]: could not detect attached switch\n",
dst->master_netdev->name, index);
@@ -110,8 +109,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
ds->index = index;
ds->pd = dst->pd->chip + index;
ds->drv = drv;
- ds->master_mii_bus = bus;
-
+ ds->master_dev = host_dev;
/*
* Validate supplied switch configuration.
@@ -144,14 +142,44 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
goto out;
}
+ /* Make the built-in MII bus mask match the number of ports,
+ * switch drivers can override this later
+ */
+ ds->phys_mii_mask = ds->phys_port_mask;
+
/*
* If the CPU connects to this switch, set the switch tree
* tagging protocol to the preferred tagging format of this
* switch.
*/
- if (ds->dst->cpu_switch == index)
- ds->dst->tag_protocol = drv->tag_protocol;
+ if (dst->cpu_switch == index) {
+ switch (drv->tag_protocol) {
+#ifdef CONFIG_NET_DSA_TAG_DSA
+ case DSA_TAG_PROTO_DSA:
+ dst->rcv = dsa_netdev_ops.rcv;
+ break;
+#endif
+#ifdef CONFIG_NET_DSA_TAG_EDSA
+ case DSA_TAG_PROTO_EDSA:
+ dst->rcv = edsa_netdev_ops.rcv;
+ break;
+#endif
+#ifdef CONFIG_NET_DSA_TAG_TRAILER
+ case DSA_TAG_PROTO_TRAILER:
+ dst->rcv = trailer_netdev_ops.rcv;
+ break;
+#endif
+#ifdef CONFIG_NET_DSA_TAG_BRCM
+ case DSA_TAG_PROTO_BRCM:
+ dst->rcv = brcm_netdev_ops.rcv;
+ break;
+#endif
+ default:
+ break;
+ }
+ dst->tag_protocol = drv->tag_protocol;
+ }
/*
* Do basic register setup.
@@ -210,6 +238,51 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
{
}
+#ifdef CONFIG_PM_SLEEP
+static int dsa_switch_suspend(struct dsa_switch *ds)
+{
+ int i, ret = 0;
+
+ /* Suspend slave network devices */
+ for (i = 0; i < DSA_MAX_PORTS; i++) {
+ if (!(ds->phys_port_mask & (1 << i)))
+ continue;
+
+ ret = dsa_slave_suspend(ds->ports[i]);
+ if (ret)
+ return ret;
+ }
+
+ if (ds->drv->suspend)
+ ret = ds->drv->suspend(ds);
+
+ return ret;
+}
+
+static int dsa_switch_resume(struct dsa_switch *ds)
+{
+ int i, ret = 0;
+
+ if (ds->drv->resume)
+ ret = ds->drv->resume(ds);
+
+ if (ret)
+ return ret;
+
+ /* Resume slave network devices */
+ for (i = 0; i < DSA_MAX_PORTS; i++) {
+ if (!(ds->phys_port_mask & (1 << i)))
+ continue;
+
+ ret = dsa_slave_resume(ds->ports[i]);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+#endif
+
/* link polling *************************************************************/
static void dsa_link_poll_work(struct work_struct *ugly)
@@ -256,7 +329,7 @@ static struct device *dev_find_class(struct device *parent, char *class)
return device_find_child(parent, class, dev_is_class);
}
-static struct mii_bus *dev_to_mii_bus(struct device *dev)
+struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev)
{
struct device *d;
@@ -272,6 +345,7 @@ static struct mii_bus *dev_to_mii_bus(struct device *dev)
return NULL;
}
+EXPORT_SYMBOL_GPL(dsa_host_dev_to_mii_bus);
static struct net_device *dev_to_net_device(struct device *dev)
{
@@ -351,8 +425,7 @@ static void dsa_of_free_platform_data(struct dsa_platform_data *pd)
for (i = 0; i < pd->nr_chips; i++) {
port_index = 0;
while (port_index < DSA_MAX_PORTS) {
- if (pd->chip[i].port_names[port_index])
- kfree(pd->chip[i].port_names[port_index]);
+ kfree(pd->chip[i].port_names[port_index]);
port_index++;
}
kfree(pd->chip[i].rtable);
@@ -411,7 +484,8 @@ static int dsa_of_probe(struct platform_device *pdev)
chip_index++;
cd = &pd->chip[chip_index];
- cd->mii_bus = &mdio_bus->dev;
+ cd->of_node = child;
+ cd->host_dev = &mdio_bus->dev;
sw_addr = of_get_property(child, "reg", NULL);
if (!sw_addr)
@@ -432,6 +506,8 @@ static int dsa_of_probe(struct platform_device *pdev)
if (!port_name)
continue;
+ cd->port_dn[port_index] = port;
+
cd->port_names[port_index] = kstrdup(port_name,
GFP_KERNEL);
if (!cd->port_names[port_index]) {
@@ -535,17 +611,9 @@ static int dsa_probe(struct platform_device *pdev)
dst->cpu_port = -1;
for (i = 0; i < pd->nr_chips; i++) {
- struct mii_bus *bus;
struct dsa_switch *ds;
- bus = dev_to_mii_bus(pd->chip[i].mii_bus);
- if (bus == NULL) {
- printk(KERN_ERR "%s[%d]: no mii bus found for "
- "dsa switch\n", dev->name, i);
- continue;
- }
-
- ds = dsa_switch_setup(dst, i, &pdev->dev, bus);
+ ds = dsa_switch_setup(dst, i, &pdev->dev, pd->chip[i].host_dev);
if (IS_ERR(ds)) {
printk(KERN_ERR "%s[%d]: couldn't create dsa switch "
"instance (error %ld)\n", dev->name, i,
@@ -609,7 +677,62 @@ static void dsa_shutdown(struct platform_device *pdev)
{
}
+static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *orig_dev)
+{
+ struct dsa_switch_tree *dst = dev->dsa_ptr;
+
+ if (unlikely(dst == NULL)) {
+ kfree_skb(skb);
+ return 0;
+ }
+
+ return dst->rcv(skb, dev, pt, orig_dev);
+}
+
+static struct packet_type dsa_pack_type __read_mostly = {
+ .type = cpu_to_be16(ETH_P_XDSA),
+ .func = dsa_switch_rcv,
+};
+
+#ifdef CONFIG_PM_SLEEP
+static int dsa_suspend(struct device *d)
+{
+ struct platform_device *pdev = to_platform_device(d);
+ struct dsa_switch_tree *dst = platform_get_drvdata(pdev);
+ int i, ret = 0;
+
+ for (i = 0; i < dst->pd->nr_chips; i++) {
+ struct dsa_switch *ds = dst->ds[i];
+
+ if (ds != NULL)
+ ret = dsa_switch_suspend(ds);
+ }
+
+ return ret;
+}
+
+static int dsa_resume(struct device *d)
+{
+ struct platform_device *pdev = to_platform_device(d);
+ struct dsa_switch_tree *dst = platform_get_drvdata(pdev);
+ int i, ret = 0;
+
+ for (i = 0; i < dst->pd->nr_chips; i++) {
+ struct dsa_switch *ds = dst->ds[i];
+
+ if (ds != NULL)
+ ret = dsa_switch_resume(ds);
+ }
+
+ return ret;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume);
+
static const struct of_device_id dsa_of_match_table[] = {
+ { .compatible = "brcm,bcm7445-switch-v4.0" },
{ .compatible = "marvell,dsa", },
{}
};
@@ -623,6 +746,7 @@ static struct platform_driver dsa_driver = {
.name = "dsa",
.owner = THIS_MODULE,
.of_match_table = dsa_of_match_table,
+ .pm = &dsa_pm_ops,
},
};
@@ -634,30 +758,15 @@ static int __init dsa_init_module(void)
if (rc)
return rc;
-#ifdef CONFIG_NET_DSA_TAG_DSA
- dev_add_pack(&dsa_packet_type);
-#endif
-#ifdef CONFIG_NET_DSA_TAG_EDSA
- dev_add_pack(&edsa_packet_type);
-#endif
-#ifdef CONFIG_NET_DSA_TAG_TRAILER
- dev_add_pack(&trailer_packet_type);
-#endif
+ dev_add_pack(&dsa_pack_type);
+
return 0;
}
module_init(dsa_init_module);
static void __exit dsa_cleanup_module(void)
{
-#ifdef CONFIG_NET_DSA_TAG_TRAILER
- dev_remove_pack(&trailer_packet_type);
-#endif
-#ifdef CONFIG_NET_DSA_TAG_EDSA
- dev_remove_pack(&edsa_packet_type);
-#endif
-#ifdef CONFIG_NET_DSA_TAG_DSA
- dev_remove_pack(&dsa_packet_type);
-#endif
+ dev_remove_pack(&dsa_pack_type);
platform_driver_unregister(&dsa_driver);
}
module_exit(dsa_cleanup_module);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index d4cf5cc747e3..dc9756d3154c 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -12,7 +12,13 @@
#define __DSA_PRIV_H
#include <linux/phy.h>
-#include <net/dsa.h>
+#include <linux/netdevice.h>
+
+struct dsa_device_ops {
+ netdev_tx_t (*xmit)(struct sk_buff *skb, struct net_device *dev);
+ int (*rcv)(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *orig_dev);
+};
struct dsa_slave_priv {
/*
@@ -20,6 +26,8 @@ struct dsa_slave_priv {
* switch port.
*/
struct net_device *dev;
+ netdev_tx_t (*xmit)(struct sk_buff *skb,
+ struct net_device *dev);
/*
* Which switch this port is a part of, and the port index
@@ -33,28 +41,35 @@ struct dsa_slave_priv {
* to this port.
*/
struct phy_device *phy;
+ phy_interface_t phy_interface;
+ int old_link;
+ int old_pause;
+ int old_duplex;
};
/* dsa.c */
extern char dsa_driver_version[];
/* slave.c */
+extern const struct dsa_device_ops notag_netdev_ops;
void dsa_slave_mii_bus_init(struct dsa_switch *ds);
struct net_device *dsa_slave_create(struct dsa_switch *ds,
struct device *parent,
int port, char *name);
+int dsa_slave_suspend(struct net_device *slave_dev);
+int dsa_slave_resume(struct net_device *slave_dev);
/* tag_dsa.c */
-netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev);
-extern struct packet_type dsa_packet_type;
+extern const struct dsa_device_ops dsa_netdev_ops;
/* tag_edsa.c */
-netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev);
-extern struct packet_type edsa_packet_type;
+extern const struct dsa_device_ops edsa_netdev_ops;
/* tag_trailer.c */
-netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev);
-extern struct packet_type trailer_packet_type;
+extern const struct dsa_device_ops trailer_netdev_ops;
+
+/* tag_brcm.c */
+extern const struct dsa_device_ops brcm_netdev_ops;
#endif
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 64c5af0a10dd..8030489d9cbe 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -9,9 +9,10 @@
*/
#include <linux/list.h>
-#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/phy.h>
+#include <linux/of_net.h>
+#include <linux/of_mdio.h>
#include "dsa_priv.h"
/* slave mii_bus handling ***************************************************/
@@ -19,7 +20,7 @@ static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg)
{
struct dsa_switch *ds = bus->priv;
- if (ds->phys_port_mask & (1 << addr))
+ if (ds->phys_mii_mask & (1 << addr))
return ds->drv->phy_read(ds, addr, reg);
return 0xffff;
@@ -29,7 +30,7 @@ static int dsa_slave_phy_write(struct mii_bus *bus, int addr, int reg, u16 val)
{
struct dsa_switch *ds = bus->priv;
- if (ds->phys_port_mask & (1 << addr))
+ if (ds->phys_mii_mask & (1 << addr))
return ds->drv->phy_write(ds, addr, reg, val);
return 0;
@@ -43,7 +44,7 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds)
ds->slave_mii_bus->write = dsa_slave_phy_write;
snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d:%.2x",
ds->index, ds->pd->sw_addr);
- ds->slave_mii_bus->parent = &ds->master_mii_bus->dev;
+ ds->slave_mii_bus->parent = ds->master_dev;
}
@@ -61,6 +62,7 @@ static int dsa_slave_open(struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
struct net_device *master = p->parent->dst->master_netdev;
+ struct dsa_switch *ds = p->parent;
int err;
if (!(master->flags & IFF_UP))
@@ -83,8 +85,20 @@ static int dsa_slave_open(struct net_device *dev)
goto clear_allmulti;
}
+ if (ds->drv->port_enable) {
+ err = ds->drv->port_enable(ds, p->port, p->phy);
+ if (err)
+ goto clear_promisc;
+ }
+
+ if (p->phy)
+ phy_start(p->phy);
+
return 0;
+clear_promisc:
+ if (dev->flags & IFF_PROMISC)
+ dev_set_promiscuity(master, 0);
clear_allmulti:
if (dev->flags & IFF_ALLMULTI)
dev_set_allmulti(master, -1);
@@ -99,6 +113,10 @@ static int dsa_slave_close(struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
struct net_device *master = p->parent->dst->master_netdev;
+ struct dsa_switch *ds = p->parent;
+
+ if (p->phy)
+ phy_stop(p->phy);
dev_mc_unsync(master, dev);
dev_uc_unsync(master, dev);
@@ -110,6 +128,9 @@ static int dsa_slave_close(struct net_device *dev)
if (!ether_addr_equal(dev->dev_addr, master->dev_addr))
dev_uc_del(master, dev->dev_addr);
+ if (ds->drv->port_disable)
+ ds->drv->port_disable(ds, p->port, p->phy);
+
return 0;
}
@@ -171,6 +192,24 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return -EOPNOTSUPP;
}
+static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+
+ return p->xmit(skb, dev);
+}
+
+static netdev_tx_t dsa_slave_notag_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+
+ skb->dev = p->parent->dst->master_netdev;
+ dev_queue_xmit(skb);
+
+ return NETDEV_TX_OK;
+}
+
/* ethtool operations *******************************************************/
static int
@@ -282,6 +321,65 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
return -EOPNOTSUPP;
}
+static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+
+ if (ds->drv->get_wol)
+ ds->drv->get_wol(ds, p->port, w);
+}
+
+static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+ int ret = -EOPNOTSUPP;
+
+ if (ds->drv->set_wol)
+ ret = ds->drv->set_wol(ds, p->port, w);
+
+ return ret;
+}
+
+static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+ int ret;
+
+ if (!ds->drv->set_eee)
+ return -EOPNOTSUPP;
+
+ ret = ds->drv->set_eee(ds, p->port, p->phy, e);
+ if (ret)
+ return ret;
+
+ if (p->phy)
+ ret = phy_ethtool_set_eee(p->phy, e);
+
+ return ret;
+}
+
+static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+ int ret;
+
+ if (!ds->drv->get_eee)
+ return -EOPNOTSUPP;
+
+ ret = ds->drv->get_eee(ds, p->port, e);
+ if (ret)
+ return ret;
+
+ if (p->phy)
+ ret = phy_ethtool_get_eee(p->phy, e);
+
+ return ret;
+}
+
static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_settings = dsa_slave_get_settings,
.set_settings = dsa_slave_set_settings,
@@ -291,46 +389,143 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_strings = dsa_slave_get_strings,
.get_ethtool_stats = dsa_slave_get_ethtool_stats,
.get_sset_count = dsa_slave_get_sset_count,
+ .set_wol = dsa_slave_set_wol,
+ .get_wol = dsa_slave_get_wol,
+ .set_eee = dsa_slave_set_eee,
+ .get_eee = dsa_slave_get_eee,
};
-#ifdef CONFIG_NET_DSA_TAG_DSA
-static const struct net_device_ops dsa_netdev_ops = {
+static const struct net_device_ops dsa_slave_netdev_ops = {
.ndo_init = dsa_slave_init,
.ndo_open = dsa_slave_open,
.ndo_stop = dsa_slave_close,
- .ndo_start_xmit = dsa_xmit,
+ .ndo_start_xmit = dsa_slave_xmit,
.ndo_change_rx_flags = dsa_slave_change_rx_flags,
.ndo_set_rx_mode = dsa_slave_set_rx_mode,
.ndo_set_mac_address = dsa_slave_set_mac_address,
.ndo_do_ioctl = dsa_slave_ioctl,
};
-#endif
-#ifdef CONFIG_NET_DSA_TAG_EDSA
-static const struct net_device_ops edsa_netdev_ops = {
- .ndo_init = dsa_slave_init,
- .ndo_open = dsa_slave_open,
- .ndo_stop = dsa_slave_close,
- .ndo_start_xmit = edsa_xmit,
- .ndo_change_rx_flags = dsa_slave_change_rx_flags,
- .ndo_set_rx_mode = dsa_slave_set_rx_mode,
- .ndo_set_mac_address = dsa_slave_set_mac_address,
- .ndo_do_ioctl = dsa_slave_ioctl,
-};
-#endif
-#ifdef CONFIG_NET_DSA_TAG_TRAILER
-static const struct net_device_ops trailer_netdev_ops = {
- .ndo_init = dsa_slave_init,
- .ndo_open = dsa_slave_open,
- .ndo_stop = dsa_slave_close,
- .ndo_start_xmit = trailer_xmit,
- .ndo_change_rx_flags = dsa_slave_change_rx_flags,
- .ndo_set_rx_mode = dsa_slave_set_rx_mode,
- .ndo_set_mac_address = dsa_slave_set_mac_address,
- .ndo_do_ioctl = dsa_slave_ioctl,
-};
-#endif
+
+static void dsa_slave_adjust_link(struct net_device *dev)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+ unsigned int status_changed = 0;
+
+ if (p->old_link != p->phy->link) {
+ status_changed = 1;
+ p->old_link = p->phy->link;
+ }
+
+ if (p->old_duplex != p->phy->duplex) {
+ status_changed = 1;
+ p->old_duplex = p->phy->duplex;
+ }
+
+ if (p->old_pause != p->phy->pause) {
+ status_changed = 1;
+ p->old_pause = p->phy->pause;
+ }
+
+ if (ds->drv->adjust_link && status_changed)
+ ds->drv->adjust_link(ds, p->port, p->phy);
+
+ if (status_changed)
+ phy_print_status(p->phy);
+}
+
+static int dsa_slave_fixed_link_update(struct net_device *dev,
+ struct fixed_phy_status *status)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+
+ if (ds->drv->fixed_link_update)
+ ds->drv->fixed_link_update(ds, p->port, status);
+
+ return 0;
+}
/* slave device setup *******************************************************/
+static void dsa_slave_phy_setup(struct dsa_slave_priv *p,
+ struct net_device *slave_dev)
+{
+ struct dsa_switch *ds = p->parent;
+ struct dsa_chip_data *cd = ds->pd;
+ struct device_node *phy_dn, *port_dn;
+ bool phy_is_fixed = false;
+ u32 phy_flags = 0;
+ int ret;
+
+ port_dn = cd->port_dn[p->port];
+ p->phy_interface = of_get_phy_mode(port_dn);
+
+ phy_dn = of_parse_phandle(port_dn, "phy-handle", 0);
+ if (of_phy_is_fixed_link(port_dn)) {
+ /* In the case of a fixed PHY, the DT node associated
+ * to the fixed PHY is the Port DT node
+ */
+ ret = of_phy_register_fixed_link(port_dn);
+ if (ret) {
+ pr_err("failed to register fixed PHY\n");
+ return;
+ }
+ phy_is_fixed = true;
+ phy_dn = port_dn;
+ }
+
+ if (ds->drv->get_phy_flags)
+ phy_flags = ds->drv->get_phy_flags(ds, p->port);
+
+ if (phy_dn)
+ p->phy = of_phy_connect(slave_dev, phy_dn,
+ dsa_slave_adjust_link, phy_flags,
+ p->phy_interface);
+
+ if (p->phy && phy_is_fixed)
+ fixed_phy_set_link_update(p->phy, dsa_slave_fixed_link_update);
+
+ /* We could not connect to a designated PHY, so use the switch internal
+ * MDIO bus instead
+ */
+ if (!p->phy)
+ p->phy = ds->slave_mii_bus->phy_map[p->port];
+ else
+ pr_info("attached PHY at address %d [%s]\n",
+ p->phy->addr, p->phy->drv->name);
+}
+
+int dsa_slave_suspend(struct net_device *slave_dev)
+{
+ struct dsa_slave_priv *p = netdev_priv(slave_dev);
+
+ netif_device_detach(slave_dev);
+
+ if (p->phy) {
+ phy_stop(p->phy);
+ p->old_pause = -1;
+ p->old_link = -1;
+ p->old_duplex = -1;
+ phy_suspend(p->phy);
+ }
+
+ return 0;
+}
+
+int dsa_slave_resume(struct net_device *slave_dev)
+{
+ struct dsa_slave_priv *p = netdev_priv(slave_dev);
+
+ netif_device_attach(slave_dev);
+
+ if (p->phy) {
+ phy_resume(p->phy);
+ phy_start(p->phy);
+ }
+
+ return 0;
+}
+
struct net_device *
dsa_slave_create(struct dsa_switch *ds, struct device *parent,
int port, char *name)
@@ -340,8 +535,8 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
struct dsa_slave_priv *p;
int ret;
- slave_dev = alloc_netdev(sizeof(struct dsa_slave_priv),
- name, ether_setup);
+ slave_dev = alloc_netdev(sizeof(struct dsa_slave_priv), name,
+ NET_NAME_UNKNOWN, ether_setup);
if (slave_dev == NULL)
return slave_dev;
@@ -349,35 +544,48 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
slave_dev->ethtool_ops = &dsa_slave_ethtool_ops;
eth_hw_addr_inherit(slave_dev, master);
slave_dev->tx_queue_len = 0;
+ slave_dev->netdev_ops = &dsa_slave_netdev_ops;
+
+ SET_NETDEV_DEV(slave_dev, parent);
+ slave_dev->dev.of_node = ds->pd->port_dn[port];
+ slave_dev->vlan_features = master->vlan_features;
+
+ p = netdev_priv(slave_dev);
+ p->dev = slave_dev;
+ p->parent = ds;
+ p->port = port;
switch (ds->dst->tag_protocol) {
#ifdef CONFIG_NET_DSA_TAG_DSA
- case htons(ETH_P_DSA):
- slave_dev->netdev_ops = &dsa_netdev_ops;
+ case DSA_TAG_PROTO_DSA:
+ p->xmit = dsa_netdev_ops.xmit;
break;
#endif
#ifdef CONFIG_NET_DSA_TAG_EDSA
- case htons(ETH_P_EDSA):
- slave_dev->netdev_ops = &edsa_netdev_ops;
+ case DSA_TAG_PROTO_EDSA:
+ p->xmit = edsa_netdev_ops.xmit;
break;
#endif
#ifdef CONFIG_NET_DSA_TAG_TRAILER
- case htons(ETH_P_TRAILER):
- slave_dev->netdev_ops = &trailer_netdev_ops;
+ case DSA_TAG_PROTO_TRAILER:
+ p->xmit = trailer_netdev_ops.xmit;
+ break;
+#endif
+#ifdef CONFIG_NET_DSA_TAG_BRCM
+ case DSA_TAG_PROTO_BRCM:
+ p->xmit = brcm_netdev_ops.xmit;
break;
#endif
default:
- BUG();
+ p->xmit = dsa_slave_notag_xmit;
+ break;
}
- SET_NETDEV_DEV(slave_dev, parent);
- slave_dev->vlan_features = master->vlan_features;
+ p->old_pause = -1;
+ p->old_link = -1;
+ p->old_duplex = -1;
- p = netdev_priv(slave_dev);
- p->dev = slave_dev;
- p->parent = ds;
- p->port = port;
- p->phy = ds->slave_mii_bus->phy_map[port];
+ dsa_slave_phy_setup(p, slave_dev);
ret = register_netdev(slave_dev);
if (ret) {
@@ -390,6 +598,9 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
netif_carrier_off(slave_dev);
if (p->phy != NULL) {
+ if (ds->drv->get_phy_flags(ds, port))
+ p->phy->dev_flags |= ds->drv->get_phy_flags(ds, port);
+
phy_attach(slave_dev, dev_name(&p->phy->dev),
PHY_INTERFACE_MODE_GMII);
@@ -397,7 +608,6 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
p->phy->speed = 0;
p->phy->duplex = 0;
p->phy->advertising = p->phy->supported | ADVERTISED_Autoneg;
- phy_start_aneg(p->phy);
}
return slave_dev;
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
new file mode 100644
index 000000000000..83d3572cdb20
--- /dev/null
+++ b/net/dsa/tag_brcm.c
@@ -0,0 +1,171 @@
+/*
+ * Broadcom tag support
+ *
+ * Copyright (C) 2014 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include "dsa_priv.h"
+
+/* This tag length is 4 bytes, older ones were 6 bytes, we do not
+ * handle them
+ */
+#define BRCM_TAG_LEN 4
+
+/* Tag is constructed and desconstructed using byte by byte access
+ * because the tag is placed after the MAC Source Address, which does
+ * not make it 4-bytes aligned, so this might cause unaligned accesses
+ * on most systems where this is used.
+ */
+
+/* Ingress and egress opcodes */
+#define BRCM_OPCODE_SHIFT 5
+#define BRCM_OPCODE_MASK 0x7
+
+/* Ingress fields */
+/* 1st byte in the tag */
+#define BRCM_IG_TC_SHIFT 2
+#define BRCM_IG_TC_MASK 0x7
+/* 2nd byte in the tag */
+#define BRCM_IG_TE_MASK 0x3
+#define BRCM_IG_TS_SHIFT 7
+/* 3rd byte in the tag */
+#define BRCM_IG_DSTMAP2_MASK 1
+#define BRCM_IG_DSTMAP1_MASK 0xff
+
+/* Egress fields */
+
+/* 2nd byte in the tag */
+#define BRCM_EG_CID_MASK 0xff
+
+/* 3rd byte in the tag */
+#define BRCM_EG_RC_MASK 0xff
+#define BRCM_EG_RC_RSVD (3 << 6)
+#define BRCM_EG_RC_EXCEPTION (1 << 5)
+#define BRCM_EG_RC_PROT_SNOOP (1 << 4)
+#define BRCM_EG_RC_PROT_TERM (1 << 3)
+#define BRCM_EG_RC_SWITCH (1 << 2)
+#define BRCM_EG_RC_MAC_LEARN (1 << 1)
+#define BRCM_EG_RC_MIRROR (1 << 0)
+#define BRCM_EG_TC_SHIFT 5
+#define BRCM_EG_TC_MASK 0x7
+#define BRCM_EG_PID_MASK 0x1f
+
+static netdev_tx_t brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ u8 *brcm_tag;
+
+ dev->stats.tx_packets++;
+ dev->stats.tx_bytes += skb->len;
+
+ if (skb_cow_head(skb, BRCM_TAG_LEN) < 0)
+ goto out_free;
+
+ skb_push(skb, BRCM_TAG_LEN);
+
+ memmove(skb->data, skb->data + BRCM_TAG_LEN, 2 * ETH_ALEN);
+
+ /* Build the tag after the MAC Source Address */
+ brcm_tag = skb->data + 2 * ETH_ALEN;
+
+ /* Set the ingress opcode, traffic class, tag enforcment is
+ * deprecated
+ */
+ brcm_tag[0] = (1 << BRCM_OPCODE_SHIFT) |
+ ((skb->priority << BRCM_IG_TC_SHIFT) & BRCM_IG_TC_MASK);
+ brcm_tag[1] = 0;
+ brcm_tag[2] = 0;
+ if (p->port == 8)
+ brcm_tag[2] = BRCM_IG_DSTMAP2_MASK;
+ brcm_tag[3] = (1 << p->port) & BRCM_IG_DSTMAP1_MASK;
+
+ /* Queue the SKB for transmission on the parent interface, but
+ * do not modify its EtherType
+ */
+ skb->dev = p->parent->dst->master_netdev;
+ dev_queue_xmit(skb);
+
+ return NETDEV_TX_OK;
+
+out_free:
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+}
+
+static int brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *orig_dev)
+{
+ struct dsa_switch_tree *dst = dev->dsa_ptr;
+ struct dsa_switch *ds;
+ int source_port;
+ u8 *brcm_tag;
+
+ if (unlikely(dst == NULL))
+ goto out_drop;
+
+ ds = dst->ds[0];
+
+ skb = skb_unshare(skb, GFP_ATOMIC);
+ if (skb == NULL)
+ goto out;
+
+ if (unlikely(!pskb_may_pull(skb, BRCM_TAG_LEN)))
+ goto out_drop;
+
+ /* skb->data points to the EtherType, the tag is right before it */
+ brcm_tag = skb->data - 2;
+
+ /* The opcode should never be different than 0b000 */
+ if (unlikely((brcm_tag[0] >> BRCM_OPCODE_SHIFT) & BRCM_OPCODE_MASK))
+ goto out_drop;
+
+ /* We should never see a reserved reason code without knowing how to
+ * handle it
+ */
+ WARN_ON(brcm_tag[2] & BRCM_EG_RC_RSVD);
+
+ /* Locate which port this is coming from */
+ source_port = brcm_tag[3] & BRCM_EG_PID_MASK;
+
+ /* Validate port against switch setup, either the port is totally */
+ if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
+ goto out_drop;
+
+ /* Remove Broadcom tag and update checksum */
+ skb_pull_rcsum(skb, BRCM_TAG_LEN);
+
+ /* Move the Ethernet DA and SA */
+ memmove(skb->data - ETH_HLEN,
+ skb->data - ETH_HLEN - BRCM_TAG_LEN,
+ 2 * ETH_ALEN);
+
+ skb_push(skb, ETH_HLEN);
+ skb->pkt_type = PACKET_HOST;
+ skb->dev = ds->ports[source_port];
+ skb->protocol = eth_type_trans(skb, skb->dev);
+
+ skb->dev->stats.rx_packets++;
+ skb->dev->stats.rx_bytes += skb->len;
+
+ netif_receive_skb(skb);
+
+ return 0;
+
+out_drop:
+ kfree_skb(skb);
+out:
+ return 0;
+}
+
+const struct dsa_device_ops brcm_netdev_ops = {
+ .xmit = brcm_tag_xmit,
+ .rcv = brcm_tag_rcv,
+};
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index cacce1e22f9c..ce90c8bdc658 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -10,13 +10,12 @@
#include <linux/etherdevice.h>
#include <linux/list.h>
-#include <linux/netdevice.h>
#include <linux/slab.h>
#include "dsa_priv.h"
#define DSA_HLEN 4
-netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
u8 *dsa_header;
@@ -186,7 +185,7 @@ out:
return 0;
}
-struct packet_type dsa_packet_type __read_mostly = {
- .type = cpu_to_be16(ETH_P_DSA),
- .func = dsa_rcv,
+const struct dsa_device_ops dsa_netdev_ops = {
+ .xmit = dsa_xmit,
+ .rcv = dsa_rcv,
};
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index e70c43c25e64..94fcce778679 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -10,14 +10,13 @@
#include <linux/etherdevice.h>
#include <linux/list.h>
-#include <linux/netdevice.h>
#include <linux/slab.h>
#include "dsa_priv.h"
#define DSA_HLEN 4
#define EDSA_HLEN 8
-netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
u8 *edsa_header;
@@ -205,7 +204,7 @@ out:
return 0;
}
-struct packet_type edsa_packet_type __read_mostly = {
- .type = cpu_to_be16(ETH_P_EDSA),
- .func = edsa_rcv,
+const struct dsa_device_ops edsa_netdev_ops = {
+ .xmit = edsa_xmit,
+ .rcv = edsa_rcv,
};
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index 94bc260d015d..115fdca34077 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -10,11 +10,10 @@
#include <linux/etherdevice.h>
#include <linux/list.h>
-#include <linux/netdevice.h>
#include <linux/slab.h>
#include "dsa_priv.h"
-netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
struct sk_buff *nskb;
@@ -114,7 +113,7 @@ out:
return 0;
}
-struct packet_type trailer_packet_type __read_mostly = {
- .type = cpu_to_be16(ETH_P_TRAILER),
- .func = trailer_rcv,
+const struct dsa_device_ops trailer_netdev_ops = {
+ .xmit = trailer_xmit,
+ .rcv = trailer_rcv,
};
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 5dc638cad2e1..33a140e15834 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -146,6 +146,33 @@ int eth_rebuild_header(struct sk_buff *skb)
EXPORT_SYMBOL(eth_rebuild_header);
/**
+ * eth_get_headlen - determine the the length of header for an ethernet frame
+ * @data: pointer to start of frame
+ * @len: total length of frame
+ *
+ * Make a best effort attempt to pull the length for all of the headers for
+ * a given frame in a linear buffer.
+ */
+u32 eth_get_headlen(void *data, unsigned int len)
+{
+ const struct ethhdr *eth = (const struct ethhdr *)data;
+ struct flow_keys keys;
+
+ /* this should never happen, but better safe than sorry */
+ if (len < sizeof(*eth))
+ return len;
+
+ /* parse any remaining L2/L3 headers, check for L4 */
+ if (!__skb_flow_dissect(NULL, &keys, data,
+ eth->h_proto, sizeof(*eth), len))
+ return max_t(u32, keys.thoff, sizeof(*eth));
+
+ /* parse for any L4 headers */
+ return min_t(u32, __skb_get_poff(NULL, data, &keys, len), len);
+}
+EXPORT_SYMBOL(eth_get_headlen);
+
+/**
* eth_type_trans - determine the packet's protocol ID.
* @skb: received socket data
* @dev: receiving network device
@@ -181,11 +208,8 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
* variants has been configured on the receiving interface,
* and if so, set skb->protocol without looking at the packet.
*/
- if (unlikely(netdev_uses_dsa_tags(dev)))
- return htons(ETH_P_DSA);
-
- if (unlikely(netdev_uses_trailer_tags(dev)))
- return htons(ETH_P_TRAILER);
+ if (unlikely(netdev_uses_dsa(dev)))
+ return htons(ETH_P_XDSA);
if (likely(ntohs(eth->h_proto) >= ETH_P_802_3_MIN))
return eth->h_proto;
@@ -390,7 +414,8 @@ EXPORT_SYMBOL(ether_setup);
struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
unsigned int rxqs)
{
- return alloc_netdev_mqs(sizeof_priv, "eth%d", ether_setup, txqs, rxqs);
+ return alloc_netdev_mqs(sizeof_priv, "eth%d", NET_NAME_UNKNOWN,
+ ether_setup, txqs, rxqs);
}
EXPORT_SYMBOL(alloc_etherdev_mqs);
diff --git a/net/hsr/Makefile b/net/hsr/Makefile
index b68359f181cc..9ae972a820f4 100644
--- a/net/hsr/Makefile
+++ b/net/hsr/Makefile
@@ -4,4 +4,5 @@
obj-$(CONFIG_HSR) += hsr.o
-hsr-y := hsr_main.o hsr_framereg.o hsr_device.o hsr_netlink.o
+hsr-y := hsr_main.o hsr_framereg.o hsr_device.o \
+ hsr_netlink.o hsr_slave.o hsr_forward.o
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index e5302b7f7ca9..a138d75751df 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -1,4 +1,4 @@
-/* Copyright 2011-2013 Autronica Fire and Security AS
+/* Copyright 2011-2014 Autronica Fire and Security AS
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
@@ -6,7 +6,7 @@
* any later version.
*
* Author(s):
- * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com
+ * 2011-2014 Arvid Brodin, arvid.brodin@alten.se
*
* This file contains device methods for creating, using and destroying
* virtual HSR devices.
@@ -15,12 +15,13 @@
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/etherdevice.h>
-#include <linux/if_arp.h>
#include <linux/rtnetlink.h>
#include <linux/pkt_sched.h>
#include "hsr_device.h"
+#include "hsr_slave.h"
#include "hsr_framereg.h"
#include "hsr_main.h"
+#include "hsr_forward.h"
static bool is_admin_up(struct net_device *dev)
@@ -45,75 +46,108 @@ static void __hsr_set_operstate(struct net_device *dev, int transition)
}
}
-void hsr_set_operstate(struct net_device *hsr_dev, struct net_device *slave1,
- struct net_device *slave2)
+static void hsr_set_operstate(struct hsr_port *master, bool has_carrier)
{
- if (!is_admin_up(hsr_dev)) {
- __hsr_set_operstate(hsr_dev, IF_OPER_DOWN);
+ if (!is_admin_up(master->dev)) {
+ __hsr_set_operstate(master->dev, IF_OPER_DOWN);
return;
}
- if (is_slave_up(slave1) || is_slave_up(slave2))
- __hsr_set_operstate(hsr_dev, IF_OPER_UP);
+ if (has_carrier)
+ __hsr_set_operstate(master->dev, IF_OPER_UP);
else
- __hsr_set_operstate(hsr_dev, IF_OPER_LOWERLAYERDOWN);
+ __hsr_set_operstate(master->dev, IF_OPER_LOWERLAYERDOWN);
}
-void hsr_set_carrier(struct net_device *hsr_dev, struct net_device *slave1,
- struct net_device *slave2)
+static bool hsr_check_carrier(struct hsr_port *master)
{
- if (is_slave_up(slave1) || is_slave_up(slave2))
- netif_carrier_on(hsr_dev);
+ struct hsr_port *port;
+ bool has_carrier;
+
+ has_carrier = false;
+
+ rcu_read_lock();
+ hsr_for_each_port(master->hsr, port)
+ if ((port->type != HSR_PT_MASTER) && is_slave_up(port->dev)) {
+ has_carrier = true;
+ break;
+ }
+ rcu_read_unlock();
+
+ if (has_carrier)
+ netif_carrier_on(master->dev);
else
- netif_carrier_off(hsr_dev);
+ netif_carrier_off(master->dev);
+
+ return has_carrier;
}
-void hsr_check_announce(struct net_device *hsr_dev, int old_operstate)
+static void hsr_check_announce(struct net_device *hsr_dev,
+ unsigned char old_operstate)
{
- struct hsr_priv *hsr_priv;
+ struct hsr_priv *hsr;
- hsr_priv = netdev_priv(hsr_dev);
+ hsr = netdev_priv(hsr_dev);
if ((hsr_dev->operstate == IF_OPER_UP) && (old_operstate != IF_OPER_UP)) {
/* Went up */
- hsr_priv->announce_count = 0;
- hsr_priv->announce_timer.expires = jiffies +
+ hsr->announce_count = 0;
+ hsr->announce_timer.expires = jiffies +
msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
- add_timer(&hsr_priv->announce_timer);
+ add_timer(&hsr->announce_timer);
}
if ((hsr_dev->operstate != IF_OPER_UP) && (old_operstate == IF_OPER_UP))
/* Went down */
- del_timer(&hsr_priv->announce_timer);
+ del_timer(&hsr->announce_timer);
}
-
-int hsr_get_max_mtu(struct hsr_priv *hsr_priv)
+void hsr_check_carrier_and_operstate(struct hsr_priv *hsr)
{
- int mtu_max;
-
- if (hsr_priv->slave[0] && hsr_priv->slave[1])
- mtu_max = min(hsr_priv->slave[0]->mtu, hsr_priv->slave[1]->mtu);
- else if (hsr_priv->slave[0])
- mtu_max = hsr_priv->slave[0]->mtu;
- else if (hsr_priv->slave[1])
- mtu_max = hsr_priv->slave[1]->mtu;
- else
- mtu_max = HSR_TAGLEN;
+ struct hsr_port *master;
+ unsigned char old_operstate;
+ bool has_carrier;
- return mtu_max - HSR_TAGLEN;
+ master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
+ /* netif_stacked_transfer_operstate() cannot be used here since
+ * it doesn't set IF_OPER_LOWERLAYERDOWN (?)
+ */
+ old_operstate = master->dev->operstate;
+ has_carrier = hsr_check_carrier(master);
+ hsr_set_operstate(master, has_carrier);
+ hsr_check_announce(master->dev, old_operstate);
}
+int hsr_get_max_mtu(struct hsr_priv *hsr)
+{
+ unsigned int mtu_max;
+ struct hsr_port *port;
+
+ mtu_max = ETH_DATA_LEN;
+ rcu_read_lock();
+ hsr_for_each_port(hsr, port)
+ if (port->type != HSR_PT_MASTER)
+ mtu_max = min(port->dev->mtu, mtu_max);
+ rcu_read_unlock();
+
+ if (mtu_max < HSR_HLEN)
+ return 0;
+ return mtu_max - HSR_HLEN;
+}
+
+
static int hsr_dev_change_mtu(struct net_device *dev, int new_mtu)
{
- struct hsr_priv *hsr_priv;
+ struct hsr_priv *hsr;
+ struct hsr_port *master;
- hsr_priv = netdev_priv(dev);
+ hsr = netdev_priv(dev);
+ master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
- if (new_mtu > hsr_get_max_mtu(hsr_priv)) {
- netdev_info(hsr_priv->dev, "A HSR master's MTU cannot be greater than the smallest MTU of its slaves minus the HSR Tag length (%d octets).\n",
- HSR_TAGLEN);
+ if (new_mtu > hsr_get_max_mtu(hsr)) {
+ netdev_info(master->dev, "A HSR master's MTU cannot be greater than the smallest MTU of its slaves minus the HSR Tag length (%d octets).\n",
+ HSR_HLEN);
return -EINVAL;
}
@@ -124,164 +158,95 @@ static int hsr_dev_change_mtu(struct net_device *dev, int new_mtu)
static int hsr_dev_open(struct net_device *dev)
{
- struct hsr_priv *hsr_priv;
- int i;
- char *slave_name;
+ struct hsr_priv *hsr;
+ struct hsr_port *port;
+ char designation;
- hsr_priv = netdev_priv(dev);
+ hsr = netdev_priv(dev);
+ designation = '\0';
- for (i = 0; i < HSR_MAX_SLAVE; i++) {
- if (hsr_priv->slave[i])
- slave_name = hsr_priv->slave[i]->name;
- else
- slave_name = "null";
-
- if (!is_slave_up(hsr_priv->slave[i]))
- netdev_warn(dev, "Slave %c (%s) is not up; please bring it up to get a working HSR network\n",
- 'A' + i, slave_name);
+ rcu_read_lock();
+ hsr_for_each_port(hsr, port) {
+ if (port->type == HSR_PT_MASTER)
+ continue;
+ switch (port->type) {
+ case HSR_PT_SLAVE_A:
+ designation = 'A';
+ break;
+ case HSR_PT_SLAVE_B:
+ designation = 'B';
+ break;
+ default:
+ designation = '?';
+ }
+ if (!is_slave_up(port->dev))
+ netdev_warn(dev, "Slave %c (%s) is not up; please bring it up to get a fully working HSR network\n",
+ designation, port->dev->name);
}
+ rcu_read_unlock();
+
+ if (designation == '\0')
+ netdev_warn(dev, "No slave devices configured\n");
return 0;
}
+
static int hsr_dev_close(struct net_device *dev)
{
- /* Nothing to do here. We could try to restore the state of the slaves
- * to what they were before being changed by the hsr master dev's state,
- * but they might have been changed manually in the mean time too, so
- * taking them up or down here might be confusing and is probably not a
- * good idea.
- */
+ /* Nothing to do here. */
return 0;
}
-static void hsr_fill_tag(struct hsr_ethhdr *hsr_ethhdr, struct hsr_priv *hsr_priv)
+static netdev_features_t hsr_features_recompute(struct hsr_priv *hsr,
+ netdev_features_t features)
{
- unsigned long irqflags;
+ netdev_features_t mask;
+ struct hsr_port *port;
- /* IEC 62439-1:2010, p 48, says the 4-bit "path" field can take values
- * between 0001-1001 ("ring identifier", for regular HSR frames),
- * or 1111 ("HSR management", supervision frames). Unfortunately, the
- * spec writers forgot to explain what a "ring identifier" is, or
- * how it is used. So we just set this to 0001 for regular frames,
- * and 1111 for supervision frames.
- */
- set_hsr_tag_path(&hsr_ethhdr->hsr_tag, 0x1);
+ mask = features;
- /* IEC 62439-1:2010, p 12: "The link service data unit in an Ethernet
- * frame is the content of the frame located between the Length/Type
- * field and the Frame Check Sequence."
+ /* Mask out all features that, if supported by one device, should be
+ * enabled for all devices (see NETIF_F_ONE_FOR_ALL).
*
- * IEC 62439-3, p 48, specifies the "original LPDU" to include the
- * original "LT" field (what "LT" means is not explained anywhere as
- * far as I can see - perhaps "Length/Type"?). So LSDU_size might
- * equal original length + 2.
- * Also, the fact that this field is not used anywhere (might be used
- * by a RedBox connecting HSR and PRP nets?) means I cannot test its
- * correctness. Instead of guessing, I set this to 0 here, to make any
- * problems immediately apparent. Anyone using this driver with PRP/HSR
- * RedBoxes might need to fix this...
+ * Anything that's off in mask will not be enabled - so only things
+ * that were in features originally, and also is in NETIF_F_ONE_FOR_ALL,
+ * may become enabled.
*/
- set_hsr_tag_LSDU_size(&hsr_ethhdr->hsr_tag, 0);
-
- spin_lock_irqsave(&hsr_priv->seqnr_lock, irqflags);
- hsr_ethhdr->hsr_tag.sequence_nr = htons(hsr_priv->sequence_nr);
- hsr_priv->sequence_nr++;
- spin_unlock_irqrestore(&hsr_priv->seqnr_lock, irqflags);
+ features &= ~NETIF_F_ONE_FOR_ALL;
+ hsr_for_each_port(hsr, port)
+ features = netdev_increment_features(features,
+ port->dev->features,
+ mask);
- hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto;
-
- hsr_ethhdr->ethhdr.h_proto = htons(ETH_P_PRP);
+ return features;
}
-static int slave_xmit(struct sk_buff *skb, struct hsr_priv *hsr_priv,
- enum hsr_dev_idx dev_idx)
+static netdev_features_t hsr_fix_features(struct net_device *dev,
+ netdev_features_t features)
{
- struct hsr_ethhdr *hsr_ethhdr;
-
- hsr_ethhdr = (struct hsr_ethhdr *) skb->data;
+ struct hsr_priv *hsr = netdev_priv(dev);
- skb->dev = hsr_priv->slave[dev_idx];
-
- hsr_addr_subst_dest(hsr_priv, &hsr_ethhdr->ethhdr, dev_idx);
-
- /* Address substitution (IEC62439-3 pp 26, 50): replace mac
- * address of outgoing frame with that of the outgoing slave's.
- */
- ether_addr_copy(hsr_ethhdr->ethhdr.h_source, skb->dev->dev_addr);
-
- return dev_queue_xmit(skb);
+ return hsr_features_recompute(hsr, features);
}
static int hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct hsr_priv *hsr_priv;
- struct hsr_ethhdr *hsr_ethhdr;
- struct sk_buff *skb2;
- int res1, res2;
-
- hsr_priv = netdev_priv(dev);
- hsr_ethhdr = (struct hsr_ethhdr *) skb->data;
-
- if ((skb->protocol != htons(ETH_P_PRP)) ||
- (hsr_ethhdr->ethhdr.h_proto != htons(ETH_P_PRP))) {
- hsr_fill_tag(hsr_ethhdr, hsr_priv);
- skb->protocol = htons(ETH_P_PRP);
- }
-
- skb2 = pskb_copy(skb, GFP_ATOMIC);
-
- res1 = NET_XMIT_DROP;
- if (likely(hsr_priv->slave[HSR_DEV_SLAVE_A]))
- res1 = slave_xmit(skb, hsr_priv, HSR_DEV_SLAVE_A);
+ struct hsr_priv *hsr = netdev_priv(dev);
+ struct hsr_port *master;
- res2 = NET_XMIT_DROP;
- if (likely(skb2 && hsr_priv->slave[HSR_DEV_SLAVE_B]))
- res2 = slave_xmit(skb2, hsr_priv, HSR_DEV_SLAVE_B);
-
- if (likely(res1 == NET_XMIT_SUCCESS || res1 == NET_XMIT_CN ||
- res2 == NET_XMIT_SUCCESS || res2 == NET_XMIT_CN)) {
- hsr_priv->dev->stats.tx_packets++;
- hsr_priv->dev->stats.tx_bytes += skb->len;
- } else {
- hsr_priv->dev->stats.tx_dropped++;
- }
+ master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
+ skb->dev = master->dev;
+ hsr_forward_skb(skb, master);
return NETDEV_TX_OK;
}
-static int hsr_header_create(struct sk_buff *skb, struct net_device *dev,
- unsigned short type, const void *daddr,
- const void *saddr, unsigned int len)
-{
- int res;
-
- /* Make room for the HSR tag now. We will fill it in later (in
- * hsr_dev_xmit)
- */
- if (skb_headroom(skb) < HSR_TAGLEN + ETH_HLEN)
- return -ENOBUFS;
- skb_push(skb, HSR_TAGLEN);
-
- /* To allow VLAN/HSR combos we should probably use
- * res = dev_hard_header(skb, dev, type, daddr, saddr, len + HSR_TAGLEN);
- * here instead. It would require other changes too, though - e.g.
- * separate headers for each slave etc...
- */
- res = eth_header(skb, dev, type, daddr, saddr, len + HSR_TAGLEN);
- if (res <= 0)
- return res;
- skb_reset_mac_header(skb);
-
- return res + HSR_TAGLEN;
-}
-
-
static const struct header_ops hsr_header_ops = {
- .create = hsr_header_create,
+ .create = eth_header,
.parse = eth_header_parse,
};
@@ -291,67 +256,63 @@ static const struct header_ops hsr_header_ops = {
*/
static int hsr_pad(int size)
{
- const int min_size = ETH_ZLEN - HSR_TAGLEN - ETH_HLEN;
+ const int min_size = ETH_ZLEN - HSR_HLEN - ETH_HLEN;
if (size >= min_size)
return size;
return min_size;
}
-static void send_hsr_supervision_frame(struct net_device *hsr_dev, u8 type)
+static void send_hsr_supervision_frame(struct hsr_port *master, u8 type)
{
- struct hsr_priv *hsr_priv;
struct sk_buff *skb;
int hlen, tlen;
struct hsr_sup_tag *hsr_stag;
struct hsr_sup_payload *hsr_sp;
unsigned long irqflags;
- hlen = LL_RESERVED_SPACE(hsr_dev);
- tlen = hsr_dev->needed_tailroom;
+ hlen = LL_RESERVED_SPACE(master->dev);
+ tlen = master->dev->needed_tailroom;
skb = alloc_skb(hsr_pad(sizeof(struct hsr_sup_payload)) + hlen + tlen,
GFP_ATOMIC);
if (skb == NULL)
return;
- hsr_priv = netdev_priv(hsr_dev);
-
skb_reserve(skb, hlen);
- skb->dev = hsr_dev;
+ skb->dev = master->dev;
skb->protocol = htons(ETH_P_PRP);
skb->priority = TC_PRIO_CONTROL;
if (dev_hard_header(skb, skb->dev, ETH_P_PRP,
- hsr_priv->sup_multicast_addr,
- skb->dev->dev_addr, skb->len) < 0)
+ master->hsr->sup_multicast_addr,
+ skb->dev->dev_addr, skb->len) <= 0)
goto out;
+ skb_reset_mac_header(skb);
- skb_pull(skb, sizeof(struct ethhdr));
- hsr_stag = (typeof(hsr_stag)) skb->data;
+ hsr_stag = (typeof(hsr_stag)) skb_put(skb, sizeof(*hsr_stag));
set_hsr_stag_path(hsr_stag, 0xf);
set_hsr_stag_HSR_Ver(hsr_stag, 0);
- spin_lock_irqsave(&hsr_priv->seqnr_lock, irqflags);
- hsr_stag->sequence_nr = htons(hsr_priv->sequence_nr);
- hsr_priv->sequence_nr++;
- spin_unlock_irqrestore(&hsr_priv->seqnr_lock, irqflags);
+ spin_lock_irqsave(&master->hsr->seqnr_lock, irqflags);
+ hsr_stag->sequence_nr = htons(master->hsr->sequence_nr);
+ master->hsr->sequence_nr++;
+ spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags);
hsr_stag->HSR_TLV_Type = type;
hsr_stag->HSR_TLV_Length = 12;
- skb_push(skb, sizeof(struct ethhdr));
-
/* Payload: MacAddressA */
hsr_sp = (typeof(hsr_sp)) skb_put(skb, sizeof(*hsr_sp));
- ether_addr_copy(hsr_sp->MacAddressA, hsr_dev->dev_addr);
+ ether_addr_copy(hsr_sp->MacAddressA, master->dev->dev_addr);
- dev_queue_xmit(skb);
+ hsr_forward_skb(skb, master);
return;
out:
+ WARN_ON_ONCE("HSR: Could not send supervision frame\n");
kfree_skb(skb);
}
@@ -360,59 +321,32 @@ out:
*/
static void hsr_announce(unsigned long data)
{
- struct hsr_priv *hsr_priv;
+ struct hsr_priv *hsr;
+ struct hsr_port *master;
- hsr_priv = (struct hsr_priv *) data;
+ hsr = (struct hsr_priv *) data;
- if (hsr_priv->announce_count < 3) {
- send_hsr_supervision_frame(hsr_priv->dev, HSR_TLV_ANNOUNCE);
- hsr_priv->announce_count++;
+ rcu_read_lock();
+ master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
+
+ if (hsr->announce_count < 3) {
+ send_hsr_supervision_frame(master, HSR_TLV_ANNOUNCE);
+ hsr->announce_count++;
} else {
- send_hsr_supervision_frame(hsr_priv->dev, HSR_TLV_LIFE_CHECK);
+ send_hsr_supervision_frame(master, HSR_TLV_LIFE_CHECK);
}
- if (hsr_priv->announce_count < 3)
- hsr_priv->announce_timer.expires = jiffies +
+ if (hsr->announce_count < 3)
+ hsr->announce_timer.expires = jiffies +
msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
else
- hsr_priv->announce_timer.expires = jiffies +
+ hsr->announce_timer.expires = jiffies +
msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL);
- if (is_admin_up(hsr_priv->dev))
- add_timer(&hsr_priv->announce_timer);
-}
-
-
-static void restore_slaves(struct net_device *hsr_dev)
-{
- struct hsr_priv *hsr_priv;
- int i;
- int res;
-
- hsr_priv = netdev_priv(hsr_dev);
-
- rtnl_lock();
-
- /* Restore promiscuity */
- for (i = 0; i < HSR_MAX_SLAVE; i++) {
- if (!hsr_priv->slave[i])
- continue;
- res = dev_set_promiscuity(hsr_priv->slave[i], -1);
- if (res)
- netdev_info(hsr_dev,
- "Cannot restore slave promiscuity (%s, %d)\n",
- hsr_priv->slave[i]->name, res);
- }
-
- rtnl_unlock();
-}
-
-static void reclaim_hsr_dev(struct rcu_head *rh)
-{
- struct hsr_priv *hsr_priv;
+ if (is_admin_up(master->dev))
+ add_timer(&hsr->announce_timer);
- hsr_priv = container_of(rh, struct hsr_priv, rcu_head);
- free_netdev(hsr_priv->dev);
+ rcu_read_unlock();
}
@@ -421,14 +355,18 @@ static void reclaim_hsr_dev(struct rcu_head *rh)
*/
static void hsr_dev_destroy(struct net_device *hsr_dev)
{
- struct hsr_priv *hsr_priv;
+ struct hsr_priv *hsr;
+ struct hsr_port *port;
- hsr_priv = netdev_priv(hsr_dev);
+ hsr = netdev_priv(hsr_dev);
+ hsr_for_each_port(hsr, port)
+ hsr_del_port(port);
- del_timer(&hsr_priv->announce_timer);
- unregister_hsr_master(hsr_priv); /* calls list_del_rcu on hsr_priv */
- restore_slaves(hsr_dev);
- call_rcu(&hsr_priv->rcu_head, reclaim_hsr_dev); /* reclaim hsr_priv */
+ del_timer_sync(&hsr->prune_timer);
+ del_timer_sync(&hsr->announce_timer);
+
+ synchronize_rcu();
+ free_netdev(hsr_dev);
}
static const struct net_device_ops hsr_device_ops = {
@@ -436,62 +374,51 @@ static const struct net_device_ops hsr_device_ops = {
.ndo_open = hsr_dev_open,
.ndo_stop = hsr_dev_close,
.ndo_start_xmit = hsr_dev_xmit,
+ .ndo_fix_features = hsr_fix_features,
};
+static struct device_type hsr_type = {
+ .name = "hsr",
+};
void hsr_dev_setup(struct net_device *dev)
{
random_ether_addr(dev->dev_addr);
ether_setup(dev);
- dev->header_ops = &hsr_header_ops;
- dev->netdev_ops = &hsr_device_ops;
- dev->tx_queue_len = 0;
+ dev->header_ops = &hsr_header_ops;
+ dev->netdev_ops = &hsr_device_ops;
+ SET_NETDEV_DEVTYPE(dev, &hsr_type);
+ dev->tx_queue_len = 0;
dev->destructor = hsr_dev_destroy;
+
+ dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
+ NETIF_F_GSO_MASK | NETIF_F_HW_CSUM |
+ NETIF_F_HW_VLAN_CTAG_TX;
+
+ dev->features = dev->hw_features;
+
+ /* Prevent recursive tx locking */
+ dev->features |= NETIF_F_LLTX;
+ /* VLAN on top of HSR needs testing and probably some work on
+ * hsr_header_create() etc.
+ */
+ dev->features |= NETIF_F_VLAN_CHALLENGED;
+ /* Not sure about this. Taken from bridge code. netdev_features.h says
+ * it means "Does not change network namespaces".
+ */
+ dev->features |= NETIF_F_NETNS_LOCAL;
}
/* Return true if dev is a HSR master; return false otherwise.
*/
-bool is_hsr_master(struct net_device *dev)
+inline bool is_hsr_master(struct net_device *dev)
{
return (dev->netdev_ops->ndo_start_xmit == hsr_dev_xmit);
}
-static int check_slave_ok(struct net_device *dev)
-{
- /* Don't allow HSR on non-ethernet like devices */
- if ((dev->flags & IFF_LOOPBACK) || (dev->type != ARPHRD_ETHER) ||
- (dev->addr_len != ETH_ALEN)) {
- netdev_info(dev, "Cannot use loopback or non-ethernet device as HSR slave.\n");
- return -EINVAL;
- }
-
- /* Don't allow enslaving hsr devices */
- if (is_hsr_master(dev)) {
- netdev_info(dev, "Cannot create trees of HSR devices.\n");
- return -EINVAL;
- }
-
- if (is_hsr_slave(dev)) {
- netdev_info(dev, "This device is already a HSR slave.\n");
- return -EINVAL;
- }
-
- if (dev->priv_flags & IFF_802_1Q_VLAN) {
- netdev_info(dev, "HSR on top of VLAN is not yet supported in this driver.\n");
- return -EINVAL;
- }
-
- /* HSR over bonded devices has not been tested, but I'm not sure it
- * won't work...
- */
-
- return 0;
-}
-
-
/* Default multicast address for HSR Supervision frames */
static const unsigned char def_multicast_addr[ETH_ALEN] __aligned(2) = {
0x01, 0x15, 0x4e, 0x00, 0x01, 0x00
@@ -500,97 +427,74 @@ static const unsigned char def_multicast_addr[ETH_ALEN] __aligned(2) = {
int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
unsigned char multicast_spec)
{
- struct hsr_priv *hsr_priv;
- int i;
+ struct hsr_priv *hsr;
+ struct hsr_port *port;
int res;
- hsr_priv = netdev_priv(hsr_dev);
- hsr_priv->dev = hsr_dev;
- INIT_LIST_HEAD(&hsr_priv->node_db);
- INIT_LIST_HEAD(&hsr_priv->self_node_db);
- for (i = 0; i < HSR_MAX_SLAVE; i++)
- hsr_priv->slave[i] = slave[i];
-
- spin_lock_init(&hsr_priv->seqnr_lock);
- /* Overflow soon to find bugs easier: */
- hsr_priv->sequence_nr = USHRT_MAX - 1024;
-
- init_timer(&hsr_priv->announce_timer);
- hsr_priv->announce_timer.function = hsr_announce;
- hsr_priv->announce_timer.data = (unsigned long) hsr_priv;
+ hsr = netdev_priv(hsr_dev);
+ INIT_LIST_HEAD(&hsr->ports);
+ INIT_LIST_HEAD(&hsr->node_db);
+ INIT_LIST_HEAD(&hsr->self_node_db);
- ether_addr_copy(hsr_priv->sup_multicast_addr, def_multicast_addr);
- hsr_priv->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec;
+ ether_addr_copy(hsr_dev->dev_addr, slave[0]->dev_addr);
-/* FIXME: should I modify the value of these?
- *
- * - hsr_dev->flags - i.e.
- * IFF_MASTER/SLAVE?
- * - hsr_dev->priv_flags - i.e.
- * IFF_EBRIDGE?
- * IFF_TX_SKB_SHARING?
- * IFF_HSR_MASTER/SLAVE?
- */
+ /* Make sure we recognize frames from ourselves in hsr_rcv() */
+ res = hsr_create_self_node(&hsr->self_node_db, hsr_dev->dev_addr,
+ slave[1]->dev_addr);
+ if (res < 0)
+ return res;
- for (i = 0; i < HSR_MAX_SLAVE; i++) {
- res = check_slave_ok(slave[i]);
- if (res)
- return res;
- }
+ spin_lock_init(&hsr->seqnr_lock);
+ /* Overflow soon to find bugs easier: */
+ hsr->sequence_nr = HSR_SEQNR_START;
- hsr_dev->features = slave[0]->features & slave[1]->features;
- /* Prevent recursive tx locking */
- hsr_dev->features |= NETIF_F_LLTX;
- /* VLAN on top of HSR needs testing and probably some work on
- * hsr_header_create() etc.
- */
- hsr_dev->features |= NETIF_F_VLAN_CHALLENGED;
+ init_timer(&hsr->announce_timer);
+ hsr->announce_timer.function = hsr_announce;
+ hsr->announce_timer.data = (unsigned long) hsr;
- /* Set hsr_dev's MAC address to that of mac_slave1 */
- ether_addr_copy(hsr_dev->dev_addr, hsr_priv->slave[0]->dev_addr);
+ init_timer(&hsr->prune_timer);
+ hsr->prune_timer.function = hsr_prune_nodes;
+ hsr->prune_timer.data = (unsigned long) hsr;
- /* Set required header length */
- for (i = 0; i < HSR_MAX_SLAVE; i++) {
- if (slave[i]->hard_header_len + HSR_TAGLEN >
- hsr_dev->hard_header_len)
- hsr_dev->hard_header_len =
- slave[i]->hard_header_len + HSR_TAGLEN;
- }
+ ether_addr_copy(hsr->sup_multicast_addr, def_multicast_addr);
+ hsr->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec;
- /* MTU */
- for (i = 0; i < HSR_MAX_SLAVE; i++)
- if (slave[i]->mtu - HSR_TAGLEN < hsr_dev->mtu)
- hsr_dev->mtu = slave[i]->mtu - HSR_TAGLEN;
+ /* FIXME: should I modify the value of these?
+ *
+ * - hsr_dev->flags - i.e.
+ * IFF_MASTER/SLAVE?
+ * - hsr_dev->priv_flags - i.e.
+ * IFF_EBRIDGE?
+ * IFF_TX_SKB_SHARING?
+ * IFF_HSR_MASTER/SLAVE?
+ */
/* Make sure the 1st call to netif_carrier_on() gets through */
netif_carrier_off(hsr_dev);
- /* Promiscuity */
- for (i = 0; i < HSR_MAX_SLAVE; i++) {
- res = dev_set_promiscuity(slave[i], 1);
- if (res) {
- netdev_info(hsr_dev, "Cannot set slave promiscuity (%s, %d)\n",
- slave[i]->name, res);
- goto fail;
- }
- }
+ res = hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER);
+ if (res)
+ return res;
- /* Make sure we recognize frames from ourselves in hsr_rcv() */
- res = hsr_create_self_node(&hsr_priv->self_node_db,
- hsr_dev->dev_addr,
- hsr_priv->slave[1]->dev_addr);
- if (res < 0)
+ res = register_netdevice(hsr_dev);
+ if (res)
goto fail;
- res = register_netdevice(hsr_dev);
+ res = hsr_add_port(hsr, slave[0], HSR_PT_SLAVE_A);
+ if (res)
+ goto fail;
+ res = hsr_add_port(hsr, slave[1], HSR_PT_SLAVE_B);
if (res)
goto fail;
- register_hsr_master(hsr_priv);
+ hsr->prune_timer.expires = jiffies + msecs_to_jiffies(PRUNE_PERIOD);
+ add_timer(&hsr->prune_timer);
return 0;
fail:
- restore_slaves(hsr_dev);
+ hsr_for_each_port(hsr, port)
+ hsr_del_port(port);
+
return res;
}
diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h
index 2c7148e73914..108a5d59d2a6 100644
--- a/net/hsr/hsr_device.h
+++ b/net/hsr/hsr_device.h
@@ -1,4 +1,4 @@
-/* Copyright 2011-2013 Autronica Fire and Security AS
+/* Copyright 2011-2014 Autronica Fire and Security AS
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
@@ -6,7 +6,7 @@
* any later version.
*
* Author(s):
- * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com
+ * 2011-2014 Arvid Brodin, arvid.brodin@alten.se
*/
#ifndef __HSR_DEVICE_H
@@ -18,12 +18,8 @@
void hsr_dev_setup(struct net_device *dev);
int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
unsigned char multicast_spec);
-void hsr_set_operstate(struct net_device *hsr_dev, struct net_device *slave1,
- struct net_device *slave2);
-void hsr_set_carrier(struct net_device *hsr_dev, struct net_device *slave1,
- struct net_device *slave2);
-void hsr_check_announce(struct net_device *hsr_dev, int old_operstate);
+void hsr_check_carrier_and_operstate(struct hsr_priv *hsr);
bool is_hsr_master(struct net_device *dev);
-int hsr_get_max_mtu(struct hsr_priv *hsr_priv);
+int hsr_get_max_mtu(struct hsr_priv *hsr);
#endif /* __HSR_DEVICE_H */
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
new file mode 100644
index 000000000000..7871ed6d3825
--- /dev/null
+++ b/net/hsr/hsr_forward.c
@@ -0,0 +1,368 @@
+/* Copyright 2011-2014 Autronica Fire and Security AS
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * Author(s):
+ * 2011-2014 Arvid Brodin, arvid.brodin@alten.se
+ */
+
+#include "hsr_forward.h"
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+#include <linux/if_vlan.h>
+#include "hsr_main.h"
+#include "hsr_framereg.h"
+
+
+struct hsr_node;
+
+struct hsr_frame_info {
+ struct sk_buff *skb_std;
+ struct sk_buff *skb_hsr;
+ struct hsr_port *port_rcv;
+ struct hsr_node *node_src;
+ u16 sequence_nr;
+ bool is_supervision;
+ bool is_vlan;
+ bool is_local_dest;
+ bool is_local_exclusive;
+};
+
+
+/* The uses I can see for these HSR supervision frames are:
+ * 1) Use the frames that are sent after node initialization ("HSR_TLV.Type =
+ * 22") to reset any sequence_nr counters belonging to that node. Useful if
+ * the other node's counter has been reset for some reason.
+ * --
+ * Or not - resetting the counter and bridging the frame would create a
+ * loop, unfortunately.
+ *
+ * 2) Use the LifeCheck frames to detect ring breaks. I.e. if no LifeCheck
+ * frame is received from a particular node, we know something is wrong.
+ * We just register these (as with normal frames) and throw them away.
+ *
+ * 3) Allow different MAC addresses for the two slave interfaces, using the
+ * MacAddressA field.
+ */
+static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb)
+{
+ struct hsr_ethhdr_sp *hdr;
+
+ WARN_ON_ONCE(!skb_mac_header_was_set(skb));
+ hdr = (struct hsr_ethhdr_sp *) skb_mac_header(skb);
+
+ if (!ether_addr_equal(hdr->ethhdr.h_dest,
+ hsr->sup_multicast_addr))
+ return false;
+
+ if (get_hsr_stag_path(&hdr->hsr_sup) != 0x0f)
+ return false;
+ if ((hdr->hsr_sup.HSR_TLV_Type != HSR_TLV_ANNOUNCE) &&
+ (hdr->hsr_sup.HSR_TLV_Type != HSR_TLV_LIFE_CHECK))
+ return false;
+ if (hdr->hsr_sup.HSR_TLV_Length != 12)
+ return false;
+
+ return true;
+}
+
+
+static struct sk_buff *create_stripped_skb(struct sk_buff *skb_in,
+ struct hsr_frame_info *frame)
+{
+ struct sk_buff *skb;
+ int copylen;
+ unsigned char *dst, *src;
+
+ skb_pull(skb_in, HSR_HLEN);
+ skb = __pskb_copy(skb_in, skb_headroom(skb_in) - HSR_HLEN, GFP_ATOMIC);
+ skb_push(skb_in, HSR_HLEN);
+ if (skb == NULL)
+ return NULL;
+
+ skb_reset_mac_header(skb);
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ skb->csum_start -= HSR_HLEN;
+
+ copylen = 2*ETH_ALEN;
+ if (frame->is_vlan)
+ copylen += VLAN_HLEN;
+ src = skb_mac_header(skb_in);
+ dst = skb_mac_header(skb);
+ memcpy(dst, src, copylen);
+
+ skb->protocol = eth_hdr(skb)->h_proto;
+ return skb;
+}
+
+static struct sk_buff *frame_get_stripped_skb(struct hsr_frame_info *frame,
+ struct hsr_port *port)
+{
+ if (!frame->skb_std)
+ frame->skb_std = create_stripped_skb(frame->skb_hsr, frame);
+ return skb_clone(frame->skb_std, GFP_ATOMIC);
+}
+
+
+static void hsr_fill_tag(struct sk_buff *skb, struct hsr_frame_info *frame,
+ struct hsr_port *port)
+{
+ struct hsr_ethhdr *hsr_ethhdr;
+ int lane_id;
+ int lsdu_size;
+
+ if (port->type == HSR_PT_SLAVE_A)
+ lane_id = 0;
+ else
+ lane_id = 1;
+
+ lsdu_size = skb->len - 14;
+ if (frame->is_vlan)
+ lsdu_size -= 4;
+
+ hsr_ethhdr = (struct hsr_ethhdr *) skb_mac_header(skb);
+
+ set_hsr_tag_path(&hsr_ethhdr->hsr_tag, lane_id);
+ set_hsr_tag_LSDU_size(&hsr_ethhdr->hsr_tag, lsdu_size);
+ hsr_ethhdr->hsr_tag.sequence_nr = htons(frame->sequence_nr);
+ hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto;
+ hsr_ethhdr->ethhdr.h_proto = htons(ETH_P_PRP);
+}
+
+static struct sk_buff *create_tagged_skb(struct sk_buff *skb_o,
+ struct hsr_frame_info *frame,
+ struct hsr_port *port)
+{
+ int movelen;
+ unsigned char *dst, *src;
+ struct sk_buff *skb;
+
+ /* Create the new skb with enough headroom to fit the HSR tag */
+ skb = __pskb_copy(skb_o, skb_headroom(skb_o) + HSR_HLEN, GFP_ATOMIC);
+ if (skb == NULL)
+ return NULL;
+ skb_reset_mac_header(skb);
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ skb->csum_start += HSR_HLEN;
+
+ movelen = ETH_HLEN;
+ if (frame->is_vlan)
+ movelen += VLAN_HLEN;
+
+ src = skb_mac_header(skb);
+ dst = skb_push(skb, HSR_HLEN);
+ memmove(dst, src, movelen);
+ skb_reset_mac_header(skb);
+
+ hsr_fill_tag(skb, frame, port);
+
+ return skb;
+}
+
+/* If the original frame was an HSR tagged frame, just clone it to be sent
+ * unchanged. Otherwise, create a private frame especially tagged for 'port'.
+ */
+static struct sk_buff *frame_get_tagged_skb(struct hsr_frame_info *frame,
+ struct hsr_port *port)
+{
+ if (frame->skb_hsr)
+ return skb_clone(frame->skb_hsr, GFP_ATOMIC);
+
+ if ((port->type != HSR_PT_SLAVE_A) && (port->type != HSR_PT_SLAVE_B)) {
+ WARN_ONCE(1, "HSR: Bug: trying to create a tagged frame for a non-ring port");
+ return NULL;
+ }
+
+ return create_tagged_skb(frame->skb_std, frame, port);
+}
+
+
+static void hsr_deliver_master(struct sk_buff *skb, struct net_device *dev,
+ struct hsr_node *node_src)
+{
+ bool was_multicast_frame;
+ int res;
+
+ was_multicast_frame = (skb->pkt_type == PACKET_MULTICAST);
+ hsr_addr_subst_source(node_src, skb);
+ skb_pull(skb, ETH_HLEN);
+ res = netif_rx(skb);
+ if (res == NET_RX_DROP) {
+ dev->stats.rx_dropped++;
+ } else {
+ dev->stats.rx_packets++;
+ dev->stats.rx_bytes += skb->len;
+ if (was_multicast_frame)
+ dev->stats.multicast++;
+ }
+}
+
+static int hsr_xmit(struct sk_buff *skb, struct hsr_port *port,
+ struct hsr_frame_info *frame)
+{
+ if (frame->port_rcv->type == HSR_PT_MASTER) {
+ hsr_addr_subst_dest(frame->node_src, skb, port);
+
+ /* Address substitution (IEC62439-3 pp 26, 50): replace mac
+ * address of outgoing frame with that of the outgoing slave's.
+ */
+ ether_addr_copy(eth_hdr(skb)->h_source, port->dev->dev_addr);
+ }
+ return dev_queue_xmit(skb);
+}
+
+
+/* Forward the frame through all devices except:
+ * - Back through the receiving device
+ * - If it's a HSR frame: through a device where it has passed before
+ * - To the local HSR master only if the frame is directly addressed to it, or
+ * a non-supervision multicast or broadcast frame.
+ *
+ * HSR slave devices should insert a HSR tag into the frame, or forward the
+ * frame unchanged if it's already tagged. Interlink devices should strip HSR
+ * tags if they're of the non-HSR type (but only after duplicate discard). The
+ * master device always strips HSR tags.
+ */
+static void hsr_forward_do(struct hsr_frame_info *frame)
+{
+ struct hsr_port *port;
+ struct sk_buff *skb;
+
+ hsr_for_each_port(frame->port_rcv->hsr, port) {
+ /* Don't send frame back the way it came */
+ if (port == frame->port_rcv)
+ continue;
+
+ /* Don't deliver locally unless we should */
+ if ((port->type == HSR_PT_MASTER) && !frame->is_local_dest)
+ continue;
+
+ /* Deliver frames directly addressed to us to master only */
+ if ((port->type != HSR_PT_MASTER) && frame->is_local_exclusive)
+ continue;
+
+ /* Don't send frame over port where it has been sent before */
+ if (hsr_register_frame_out(port, frame->node_src,
+ frame->sequence_nr))
+ continue;
+
+ if (frame->is_supervision && (port->type == HSR_PT_MASTER)) {
+ hsr_handle_sup_frame(frame->skb_hsr,
+ frame->node_src,
+ frame->port_rcv);
+ continue;
+ }
+
+ if (port->type != HSR_PT_MASTER)
+ skb = frame_get_tagged_skb(frame, port);
+ else
+ skb = frame_get_stripped_skb(frame, port);
+ if (skb == NULL) {
+ /* FIXME: Record the dropped frame? */
+ continue;
+ }
+
+ skb->dev = port->dev;
+ if (port->type == HSR_PT_MASTER)
+ hsr_deliver_master(skb, port->dev, frame->node_src);
+ else
+ hsr_xmit(skb, port, frame);
+ }
+}
+
+
+static void check_local_dest(struct hsr_priv *hsr, struct sk_buff *skb,
+ struct hsr_frame_info *frame)
+{
+ struct net_device *master_dev;
+
+ master_dev = hsr_port_get_hsr(hsr, HSR_PT_MASTER)->dev;
+
+ if (hsr_addr_is_self(hsr, eth_hdr(skb)->h_dest)) {
+ frame->is_local_exclusive = true;
+ skb->pkt_type = PACKET_HOST;
+ } else {
+ frame->is_local_exclusive = false;
+ }
+
+ if ((skb->pkt_type == PACKET_HOST) ||
+ (skb->pkt_type == PACKET_MULTICAST) ||
+ (skb->pkt_type == PACKET_BROADCAST)) {
+ frame->is_local_dest = true;
+ } else {
+ frame->is_local_dest = false;
+ }
+}
+
+
+static int hsr_fill_frame_info(struct hsr_frame_info *frame,
+ struct sk_buff *skb, struct hsr_port *port)
+{
+ struct ethhdr *ethhdr;
+ unsigned long irqflags;
+
+ frame->is_supervision = is_supervision_frame(port->hsr, skb);
+ frame->node_src = hsr_get_node(&port->hsr->node_db, skb,
+ frame->is_supervision);
+ if (frame->node_src == NULL)
+ return -1; /* Unknown node and !is_supervision, or no mem */
+
+ ethhdr = (struct ethhdr *) skb_mac_header(skb);
+ frame->is_vlan = false;
+ if (ethhdr->h_proto == htons(ETH_P_8021Q)) {
+ frame->is_vlan = true;
+ /* FIXME: */
+ WARN_ONCE(1, "HSR: VLAN not yet supported");
+ }
+ if (ethhdr->h_proto == htons(ETH_P_PRP)) {
+ frame->skb_std = NULL;
+ frame->skb_hsr = skb;
+ frame->sequence_nr = hsr_get_skb_sequence_nr(skb);
+ } else {
+ frame->skb_std = skb;
+ frame->skb_hsr = NULL;
+ /* Sequence nr for the master node */
+ spin_lock_irqsave(&port->hsr->seqnr_lock, irqflags);
+ frame->sequence_nr = port->hsr->sequence_nr;
+ port->hsr->sequence_nr++;
+ spin_unlock_irqrestore(&port->hsr->seqnr_lock, irqflags);
+ }
+
+ frame->port_rcv = port;
+ check_local_dest(port->hsr, skb, frame);
+
+ return 0;
+}
+
+/* Must be called holding rcu read lock (because of the port parameter) */
+void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port)
+{
+ struct hsr_frame_info frame;
+
+ if (skb_mac_header(skb) != skb->data) {
+ WARN_ONCE(1, "%s:%d: Malformed frame (port_src %s)\n",
+ __FILE__, __LINE__, port->dev->name);
+ goto out_drop;
+ }
+
+ if (hsr_fill_frame_info(&frame, skb, port) < 0)
+ goto out_drop;
+ hsr_register_frame_in(frame.node_src, port, frame.sequence_nr);
+ hsr_forward_do(&frame);
+
+ if (frame.skb_hsr != NULL)
+ kfree_skb(frame.skb_hsr);
+ if (frame.skb_std != NULL)
+ kfree_skb(frame.skb_std);
+ return;
+
+out_drop:
+ port->dev->stats.tx_dropped++;
+ kfree_skb(skb);
+}
diff --git a/net/hsr/hsr_forward.h b/net/hsr/hsr_forward.h
new file mode 100644
index 000000000000..5c5bc4b6b75f
--- /dev/null
+++ b/net/hsr/hsr_forward.h
@@ -0,0 +1,20 @@
+/* Copyright 2011-2014 Autronica Fire and Security AS
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * Author(s):
+ * 2011-2014 Arvid Brodin, arvid.brodin@alten.se
+ */
+
+#ifndef __HSR_FORWARD_H
+#define __HSR_FORWARD_H
+
+#include <linux/netdevice.h>
+#include "hsr_main.h"
+
+void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port);
+
+#endif /* __HSR_FORWARD_H */
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 83e58449366a..bace124d14ef 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -1,4 +1,4 @@
-/* Copyright 2011-2013 Autronica Fire and Security AS
+/* Copyright 2011-2014 Autronica Fire and Security AS
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
@@ -6,7 +6,7 @@
* any later version.
*
* Author(s):
- * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com
+ * 2011-2014 Arvid Brodin, arvid.brodin@alten.se
*
* The HSR spec says never to forward the same frame twice on the same
* interface. A frame is identified by its source MAC address and its HSR
@@ -23,71 +23,68 @@
#include "hsr_netlink.h"
-struct node_entry {
- struct list_head mac_list;
- unsigned char MacAddressA[ETH_ALEN];
- unsigned char MacAddressB[ETH_ALEN];
- enum hsr_dev_idx AddrB_if; /* The local slave through which AddrB
- * frames are received from this node
- */
- unsigned long time_in[HSR_MAX_SLAVE];
- bool time_in_stale[HSR_MAX_SLAVE];
- u16 seq_out[HSR_MAX_DEV];
- struct rcu_head rcu_head;
+struct hsr_node {
+ struct list_head mac_list;
+ unsigned char MacAddressA[ETH_ALEN];
+ unsigned char MacAddressB[ETH_ALEN];
+ /* Local slave through which AddrB frames are received from this node */
+ enum hsr_port_type AddrB_port;
+ unsigned long time_in[HSR_PT_PORTS];
+ bool time_in_stale[HSR_PT_PORTS];
+ u16 seq_out[HSR_PT_PORTS];
+ struct rcu_head rcu_head;
};
-/* TODO: use hash lists for mac addresses (linux/jhash.h)? */
+/* TODO: use hash lists for mac addresses (linux/jhash.h)? */
-/* Search for mac entry. Caller must hold rcu read lock.
+/* seq_nr_after(a, b) - return true if a is after (higher in sequence than) b,
+ * false otherwise.
*/
-static struct node_entry *find_node_by_AddrA(struct list_head *node_db,
- const unsigned char addr[ETH_ALEN])
+static bool seq_nr_after(u16 a, u16 b)
{
- struct node_entry *node;
-
- list_for_each_entry_rcu(node, node_db, mac_list) {
- if (ether_addr_equal(node->MacAddressA, addr))
- return node;
- }
+ /* Remove inconsistency where
+ * seq_nr_after(a, b) == seq_nr_before(a, b)
+ */
+ if ((int) b - a == 32768)
+ return false;
- return NULL;
+ return (((s16) (b - a)) < 0);
}
+#define seq_nr_before(a, b) seq_nr_after((b), (a))
+#define seq_nr_after_or_eq(a, b) (!seq_nr_before((a), (b)))
+#define seq_nr_before_or_eq(a, b) (!seq_nr_after((a), (b)))
-/* Search for mac entry. Caller must hold rcu read lock.
- */
-static struct node_entry *find_node_by_AddrB(struct list_head *node_db,
- const unsigned char addr[ETH_ALEN])
+bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr)
{
- struct node_entry *node;
+ struct hsr_node *node;
- list_for_each_entry_rcu(node, node_db, mac_list) {
- if (ether_addr_equal(node->MacAddressB, addr))
- return node;
+ node = list_first_or_null_rcu(&hsr->self_node_db, struct hsr_node,
+ mac_list);
+ if (!node) {
+ WARN_ONCE(1, "HSR: No self node\n");
+ return false;
}
- return NULL;
-}
+ if (ether_addr_equal(addr, node->MacAddressA))
+ return true;
+ if (ether_addr_equal(addr, node->MacAddressB))
+ return true;
+ return false;
+}
/* Search for mac entry. Caller must hold rcu read lock.
*/
-struct node_entry *hsr_find_node(struct list_head *node_db, struct sk_buff *skb)
+static struct hsr_node *find_node_by_AddrA(struct list_head *node_db,
+ const unsigned char addr[ETH_ALEN])
{
- struct node_entry *node;
- struct ethhdr *ethhdr;
-
- if (!skb_mac_header_was_set(skb))
- return NULL;
-
- ethhdr = (struct ethhdr *) skb_mac_header(skb);
+ struct hsr_node *node;
list_for_each_entry_rcu(node, node_db, mac_list) {
- if (ether_addr_equal(node->MacAddressA, ethhdr->h_source))
- return node;
- if (ether_addr_equal(node->MacAddressB, ethhdr->h_source))
+ if (ether_addr_equal(node->MacAddressA, addr))
return node;
}
@@ -102,7 +99,7 @@ int hsr_create_self_node(struct list_head *self_node_db,
unsigned char addr_a[ETH_ALEN],
unsigned char addr_b[ETH_ALEN])
{
- struct node_entry *node, *oldnode;
+ struct hsr_node *node, *oldnode;
node = kmalloc(sizeof(*node), GFP_KERNEL);
if (!node)
@@ -113,7 +110,7 @@ int hsr_create_self_node(struct list_head *self_node_db,
rcu_read_lock();
oldnode = list_first_or_null_rcu(self_node_db,
- struct node_entry, mac_list);
+ struct hsr_node, mac_list);
if (oldnode) {
list_replace_rcu(&oldnode->mac_list, &node->mac_list);
rcu_read_unlock();
@@ -128,135 +125,144 @@ int hsr_create_self_node(struct list_head *self_node_db,
}
-/* Add/merge node to the database of nodes. 'skb' must contain an HSR
- * supervision frame.
- * - If the supervision header's MacAddressA field is not yet in the database,
- * this frame is from an hitherto unknown node - add it to the database.
- * - If the sender's MAC address is not the same as its MacAddressA address,
- * the node is using PICS_SUBS (address substitution). Record the sender's
- * address as the node's MacAddressB.
- *
- * This function needs to work even if the sender node has changed one of its
- * slaves' MAC addresses. In this case, there are four different cases described
- * by (Addr-changed, received-from) pairs as follows. Note that changing the
- * SlaveA address is equal to changing the node's own address:
- *
- * - (AddrB, SlaveB): The new AddrB will be recorded by PICS_SUBS code since
- * node == NULL.
- * - (AddrB, SlaveA): Will work as usual (the AddrB change won't be detected
- * from this frame).
- *
- * - (AddrA, SlaveB): The old node will be found. We need to detect this and
- * remove the node.
- * - (AddrA, SlaveA): A new node will be registered (non-PICS_SUBS at first).
- * The old one will be pruned after HSR_NODE_FORGET_TIME.
- *
- * We also need to detect if the sender's SlaveA and SlaveB cables have been
- * swapped.
+/* Allocate an hsr_node and add it to node_db. 'addr' is the node's AddressA;
+ * seq_out is used to initialize filtering of outgoing duplicate frames
+ * originating from the newly added node.
*/
-struct node_entry *hsr_merge_node(struct hsr_priv *hsr_priv,
- struct node_entry *node,
- struct sk_buff *skb,
- enum hsr_dev_idx dev_idx)
+struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[],
+ u16 seq_out)
{
- struct hsr_sup_payload *hsr_sp;
- struct hsr_ethhdr_sp *hsr_ethsup;
- int i;
+ struct hsr_node *node;
unsigned long now;
-
- hsr_ethsup = (struct hsr_ethhdr_sp *) skb_mac_header(skb);
- hsr_sp = (struct hsr_sup_payload *) skb->data;
-
- if (node && !ether_addr_equal(node->MacAddressA, hsr_sp->MacAddressA)) {
- /* Node has changed its AddrA, frame was received from SlaveB */
- list_del_rcu(&node->mac_list);
- kfree_rcu(node, rcu_head);
- node = NULL;
- }
-
- if (node && (dev_idx == node->AddrB_if) &&
- !ether_addr_equal(node->MacAddressB, hsr_ethsup->ethhdr.h_source)) {
- /* Cables have been swapped */
- list_del_rcu(&node->mac_list);
- kfree_rcu(node, rcu_head);
- node = NULL;
- }
-
- if (node && (dev_idx != node->AddrB_if) &&
- (node->AddrB_if != HSR_DEV_NONE) &&
- !ether_addr_equal(node->MacAddressA, hsr_ethsup->ethhdr.h_source)) {
- /* Cables have been swapped */
- list_del_rcu(&node->mac_list);
- kfree_rcu(node, rcu_head);
- node = NULL;
- }
-
- if (node)
- return node;
-
- node = find_node_by_AddrA(&hsr_priv->node_db, hsr_sp->MacAddressA);
- if (node) {
- /* Node is known, but frame was received from an unknown
- * address. Node is PICS_SUBS capable; merge its AddrB.
- */
- ether_addr_copy(node->MacAddressB, hsr_ethsup->ethhdr.h_source);
- node->AddrB_if = dev_idx;
- return node;
- }
+ int i;
node = kzalloc(sizeof(*node), GFP_ATOMIC);
if (!node)
return NULL;
- ether_addr_copy(node->MacAddressA, hsr_sp->MacAddressA);
- ether_addr_copy(node->MacAddressB, hsr_ethsup->ethhdr.h_source);
- if (!ether_addr_equal(hsr_sp->MacAddressA, hsr_ethsup->ethhdr.h_source))
- node->AddrB_if = dev_idx;
- else
- node->AddrB_if = HSR_DEV_NONE;
+ ether_addr_copy(node->MacAddressA, addr);
/* We are only interested in time diffs here, so use current jiffies
* as initialization. (0 could trigger an spurious ring error warning).
*/
now = jiffies;
- for (i = 0; i < HSR_MAX_SLAVE; i++)
+ for (i = 0; i < HSR_PT_PORTS; i++)
node->time_in[i] = now;
- for (i = 0; i < HSR_MAX_DEV; i++)
- node->seq_out[i] = ntohs(hsr_ethsup->hsr_sup.sequence_nr) - 1;
+ for (i = 0; i < HSR_PT_PORTS; i++)
+ node->seq_out[i] = seq_out;
- list_add_tail_rcu(&node->mac_list, &hsr_priv->node_db);
+ list_add_tail_rcu(&node->mac_list, node_db);
return node;
}
+/* Get the hsr_node from which 'skb' was sent.
+ */
+struct hsr_node *hsr_get_node(struct list_head *node_db, struct sk_buff *skb,
+ bool is_sup)
+{
+ struct hsr_node *node;
+ struct ethhdr *ethhdr;
+ u16 seq_out;
+
+ if (!skb_mac_header_was_set(skb))
+ return NULL;
+
+ ethhdr = (struct ethhdr *) skb_mac_header(skb);
+
+ list_for_each_entry_rcu(node, node_db, mac_list) {
+ if (ether_addr_equal(node->MacAddressA, ethhdr->h_source))
+ return node;
+ if (ether_addr_equal(node->MacAddressB, ethhdr->h_source))
+ return node;
+ }
+
+ if (!is_sup)
+ return NULL; /* Only supervision frame may create node entry */
+
+ if (ethhdr->h_proto == htons(ETH_P_PRP)) {
+ /* Use the existing sequence_nr from the tag as starting point
+ * for filtering duplicate frames.
+ */
+ seq_out = hsr_get_skb_sequence_nr(skb) - 1;
+ } else {
+ WARN_ONCE(1, "%s: Non-HSR frame\n", __func__);
+ seq_out = 0;
+ }
+
+ return hsr_add_node(node_db, ethhdr->h_source, seq_out);
+}
+
+/* Use the Supervision frame's info about an eventual MacAddressB for merging
+ * nodes that has previously had their MacAddressB registered as a separate
+ * node.
+ */
+void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
+ struct hsr_port *port_rcv)
+{
+ struct hsr_node *node_real;
+ struct hsr_sup_payload *hsr_sp;
+ struct list_head *node_db;
+ int i;
+
+ skb_pull(skb, sizeof(struct hsr_ethhdr_sp));
+ hsr_sp = (struct hsr_sup_payload *) skb->data;
+
+ if (ether_addr_equal(eth_hdr(skb)->h_source, hsr_sp->MacAddressA))
+ /* Not sent from MacAddressB of a PICS_SUBS capable node */
+ goto done;
+
+ /* Merge node_curr (registered on MacAddressB) into node_real */
+ node_db = &port_rcv->hsr->node_db;
+ node_real = find_node_by_AddrA(node_db, hsr_sp->MacAddressA);
+ if (!node_real)
+ /* No frame received from AddrA of this node yet */
+ node_real = hsr_add_node(node_db, hsr_sp->MacAddressA,
+ HSR_SEQNR_START - 1);
+ if (!node_real)
+ goto done; /* No mem */
+ if (node_real == node_curr)
+ /* Node has already been merged */
+ goto done;
+
+ ether_addr_copy(node_real->MacAddressB, eth_hdr(skb)->h_source);
+ for (i = 0; i < HSR_PT_PORTS; i++) {
+ if (!node_curr->time_in_stale[i] &&
+ time_after(node_curr->time_in[i], node_real->time_in[i])) {
+ node_real->time_in[i] = node_curr->time_in[i];
+ node_real->time_in_stale[i] = node_curr->time_in_stale[i];
+ }
+ if (seq_nr_after(node_curr->seq_out[i], node_real->seq_out[i]))
+ node_real->seq_out[i] = node_curr->seq_out[i];
+ }
+ node_real->AddrB_port = port_rcv->type;
+
+ list_del_rcu(&node_curr->mac_list);
+ kfree_rcu(node_curr, rcu_head);
+
+done:
+ skb_push(skb, sizeof(struct hsr_ethhdr_sp));
+}
+
/* 'skb' is a frame meant for this host, that is to be passed to upper layers.
*
- * If the frame was sent by a node's B interface, replace the sender
+ * If the frame was sent by a node's B interface, replace the source
* address with that node's "official" address (MacAddressA) so that upper
* layers recognize where it came from.
*/
-void hsr_addr_subst_source(struct hsr_priv *hsr_priv, struct sk_buff *skb)
+void hsr_addr_subst_source(struct hsr_node *node, struct sk_buff *skb)
{
- struct ethhdr *ethhdr;
- struct node_entry *node;
-
if (!skb_mac_header_was_set(skb)) {
WARN_ONCE(1, "%s: Mac header not set\n", __func__);
return;
}
- ethhdr = (struct ethhdr *) skb_mac_header(skb);
- rcu_read_lock();
- node = find_node_by_AddrB(&hsr_priv->node_db, ethhdr->h_source);
- if (node)
- ether_addr_copy(ethhdr->h_source, node->MacAddressA);
- rcu_read_unlock();
+ memcpy(&eth_hdr(skb)->h_source, node->MacAddressA, ETH_ALEN);
}
-
/* 'skb' is a frame meant for another host.
- * 'hsr_dev_idx' is the HSR index of the outgoing device
+ * 'port' is the outgoing interface
*
* Substitute the target (dest) MAC address if necessary, so the it matches the
* recipient interface MAC address, regardless of whether that is the
@@ -264,47 +270,44 @@ void hsr_addr_subst_source(struct hsr_priv *hsr_priv, struct sk_buff *skb)
* This is needed to keep the packets flowing through switches that learn on
* which "side" the different interfaces are.
*/
-void hsr_addr_subst_dest(struct hsr_priv *hsr_priv, struct ethhdr *ethhdr,
- enum hsr_dev_idx dev_idx)
+void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb,
+ struct hsr_port *port)
{
- struct node_entry *node;
+ struct hsr_node *node_dst;
- rcu_read_lock();
- node = find_node_by_AddrA(&hsr_priv->node_db, ethhdr->h_dest);
- if (node && (node->AddrB_if == dev_idx))
- ether_addr_copy(ethhdr->h_dest, node->MacAddressB);
- rcu_read_unlock();
-}
+ if (!skb_mac_header_was_set(skb)) {
+ WARN_ONCE(1, "%s: Mac header not set\n", __func__);
+ return;
+ }
+ if (!is_unicast_ether_addr(eth_hdr(skb)->h_dest))
+ return;
-/* seq_nr_after(a, b) - return true if a is after (higher in sequence than) b,
- * false otherwise.
- */
-static bool seq_nr_after(u16 a, u16 b)
-{
- /* Remove inconsistency where
- * seq_nr_after(a, b) == seq_nr_before(a, b)
- */
- if ((int) b - a == 32768)
- return false;
+ node_dst = find_node_by_AddrA(&port->hsr->node_db, eth_hdr(skb)->h_dest);
+ if (!node_dst) {
+ WARN_ONCE(1, "%s: Unknown node\n", __func__);
+ return;
+ }
+ if (port->type != node_dst->AddrB_port)
+ return;
- return (((s16) (b - a)) < 0);
+ ether_addr_copy(eth_hdr(skb)->h_dest, node_dst->MacAddressB);
}
-#define seq_nr_before(a, b) seq_nr_after((b), (a))
-#define seq_nr_after_or_eq(a, b) (!seq_nr_before((a), (b)))
-#define seq_nr_before_or_eq(a, b) (!seq_nr_after((a), (b)))
-void hsr_register_frame_in(struct node_entry *node, enum hsr_dev_idx dev_idx)
+void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port,
+ u16 sequence_nr)
{
- if ((dev_idx < 0) || (dev_idx >= HSR_MAX_SLAVE)) {
- WARN_ONCE(1, "%s: Invalid dev_idx (%d)\n", __func__, dev_idx);
+ /* Don't register incoming frames without a valid sequence number. This
+ * ensures entries of restarted nodes gets pruned so that they can
+ * re-register and resume communications.
+ */
+ if (seq_nr_before(sequence_nr, node->seq_out[port->type]))
return;
- }
- node->time_in[dev_idx] = jiffies;
- node->time_in_stale[dev_idx] = false;
-}
+ node->time_in[port->type] = jiffies;
+ node->time_in_stale[port->type] = false;
+}
/* 'skb' is a HSR Ethernet frame (with a HSR tag inserted), with a valid
* ethhdr->h_source address and skb->mac_header set.
@@ -314,102 +317,87 @@ void hsr_register_frame_in(struct node_entry *node, enum hsr_dev_idx dev_idx)
* 0 otherwise, or
* negative error code on error
*/
-int hsr_register_frame_out(struct node_entry *node, enum hsr_dev_idx dev_idx,
- struct sk_buff *skb)
+int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node,
+ u16 sequence_nr)
{
- struct hsr_ethhdr *hsr_ethhdr;
- u16 sequence_nr;
-
- if ((dev_idx < 0) || (dev_idx >= HSR_MAX_DEV)) {
- WARN_ONCE(1, "%s: Invalid dev_idx (%d)\n", __func__, dev_idx);
- return -EINVAL;
- }
- if (!skb_mac_header_was_set(skb)) {
- WARN_ONCE(1, "%s: Mac header not set\n", __func__);
- return -EINVAL;
- }
- hsr_ethhdr = (struct hsr_ethhdr *) skb_mac_header(skb);
-
- sequence_nr = ntohs(hsr_ethhdr->hsr_tag.sequence_nr);
- if (seq_nr_before_or_eq(sequence_nr, node->seq_out[dev_idx]))
+ if (seq_nr_before_or_eq(sequence_nr, node->seq_out[port->type]))
return 1;
- node->seq_out[dev_idx] = sequence_nr;
+ node->seq_out[port->type] = sequence_nr;
return 0;
}
-
-static bool is_late(struct node_entry *node, enum hsr_dev_idx dev_idx)
+static struct hsr_port *get_late_port(struct hsr_priv *hsr,
+ struct hsr_node *node)
{
- enum hsr_dev_idx other;
-
- if (node->time_in_stale[dev_idx])
- return true;
-
- if (dev_idx == HSR_DEV_SLAVE_A)
- other = HSR_DEV_SLAVE_B;
- else
- other = HSR_DEV_SLAVE_A;
-
- if (node->time_in_stale[other])
- return false;
+ if (node->time_in_stale[HSR_PT_SLAVE_A])
+ return hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A);
+ if (node->time_in_stale[HSR_PT_SLAVE_B])
+ return hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B);
+
+ if (time_after(node->time_in[HSR_PT_SLAVE_B],
+ node->time_in[HSR_PT_SLAVE_A] +
+ msecs_to_jiffies(MAX_SLAVE_DIFF)))
+ return hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A);
+ if (time_after(node->time_in[HSR_PT_SLAVE_A],
+ node->time_in[HSR_PT_SLAVE_B] +
+ msecs_to_jiffies(MAX_SLAVE_DIFF)))
+ return hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B);
- if (time_after(node->time_in[other], node->time_in[dev_idx] +
- msecs_to_jiffies(MAX_SLAVE_DIFF)))
- return true;
-
- return false;
+ return NULL;
}
/* Remove stale sequence_nr records. Called by timer every
* HSR_LIFE_CHECK_INTERVAL (two seconds or so).
*/
-void hsr_prune_nodes(struct hsr_priv *hsr_priv)
+void hsr_prune_nodes(unsigned long data)
{
- struct node_entry *node;
+ struct hsr_priv *hsr;
+ struct hsr_node *node;
+ struct hsr_port *port;
unsigned long timestamp;
unsigned long time_a, time_b;
+ hsr = (struct hsr_priv *) data;
+
rcu_read_lock();
- list_for_each_entry_rcu(node, &hsr_priv->node_db, mac_list) {
+ list_for_each_entry_rcu(node, &hsr->node_db, mac_list) {
/* Shorthand */
- time_a = node->time_in[HSR_DEV_SLAVE_A];
- time_b = node->time_in[HSR_DEV_SLAVE_B];
+ time_a = node->time_in[HSR_PT_SLAVE_A];
+ time_b = node->time_in[HSR_PT_SLAVE_B];
/* Check for timestamps old enough to risk wrap-around */
if (time_after(jiffies, time_a + MAX_JIFFY_OFFSET/2))
- node->time_in_stale[HSR_DEV_SLAVE_A] = true;
+ node->time_in_stale[HSR_PT_SLAVE_A] = true;
if (time_after(jiffies, time_b + MAX_JIFFY_OFFSET/2))
- node->time_in_stale[HSR_DEV_SLAVE_B] = true;
+ node->time_in_stale[HSR_PT_SLAVE_B] = true;
/* Get age of newest frame from node.
* At least one time_in is OK here; nodes get pruned long
* before both time_ins can get stale
*/
timestamp = time_a;
- if (node->time_in_stale[HSR_DEV_SLAVE_A] ||
- (!node->time_in_stale[HSR_DEV_SLAVE_B] &&
+ if (node->time_in_stale[HSR_PT_SLAVE_A] ||
+ (!node->time_in_stale[HSR_PT_SLAVE_B] &&
time_after(time_b, time_a)))
timestamp = time_b;
/* Warn of ring error only as long as we get frames at all */
if (time_is_after_jiffies(timestamp +
msecs_to_jiffies(1.5*MAX_SLAVE_DIFF))) {
-
- if (is_late(node, HSR_DEV_SLAVE_A))
- hsr_nl_ringerror(hsr_priv, node->MacAddressA,
- HSR_DEV_SLAVE_A);
- else if (is_late(node, HSR_DEV_SLAVE_B))
- hsr_nl_ringerror(hsr_priv, node->MacAddressA,
- HSR_DEV_SLAVE_B);
+ rcu_read_lock();
+ port = get_late_port(hsr, node);
+ if (port != NULL)
+ hsr_nl_ringerror(hsr, node->MacAddressA, port);
+ rcu_read_unlock();
}
/* Prune old entries */
if (time_is_before_jiffies(timestamp +
msecs_to_jiffies(HSR_NODE_FORGET_TIME))) {
- hsr_nl_nodedown(hsr_priv, node->MacAddressA);
+ hsr_nl_nodedown(hsr, node->MacAddressA);
list_del_rcu(&node->mac_list);
/* Note that we need to free this entry later: */
kfree_rcu(node, rcu_head);
@@ -419,21 +407,21 @@ void hsr_prune_nodes(struct hsr_priv *hsr_priv)
}
-void *hsr_get_next_node(struct hsr_priv *hsr_priv, void *_pos,
+void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos,
unsigned char addr[ETH_ALEN])
{
- struct node_entry *node;
+ struct hsr_node *node;
if (!_pos) {
- node = list_first_or_null_rcu(&hsr_priv->node_db,
- struct node_entry, mac_list);
+ node = list_first_or_null_rcu(&hsr->node_db,
+ struct hsr_node, mac_list);
if (node)
ether_addr_copy(addr, node->MacAddressA);
return node;
}
node = _pos;
- list_for_each_entry_continue_rcu(node, &hsr_priv->node_db, mac_list) {
+ list_for_each_entry_continue_rcu(node, &hsr->node_db, mac_list) {
ether_addr_copy(addr, node->MacAddressA);
return node;
}
@@ -442,7 +430,7 @@ void *hsr_get_next_node(struct hsr_priv *hsr_priv, void *_pos,
}
-int hsr_get_node_data(struct hsr_priv *hsr_priv,
+int hsr_get_node_data(struct hsr_priv *hsr,
const unsigned char *addr,
unsigned char addr_b[ETH_ALEN],
unsigned int *addr_b_ifindex,
@@ -451,12 +439,13 @@ int hsr_get_node_data(struct hsr_priv *hsr_priv,
int *if2_age,
u16 *if2_seq)
{
- struct node_entry *node;
+ struct hsr_node *node;
+ struct hsr_port *port;
unsigned long tdiff;
rcu_read_lock();
- node = find_node_by_AddrA(&hsr_priv->node_db, addr);
+ node = find_node_by_AddrA(&hsr->node_db, addr);
if (!node) {
rcu_read_unlock();
return -ENOENT; /* No such entry */
@@ -464,8 +453,8 @@ int hsr_get_node_data(struct hsr_priv *hsr_priv,
ether_addr_copy(addr_b, node->MacAddressB);
- tdiff = jiffies - node->time_in[HSR_DEV_SLAVE_A];
- if (node->time_in_stale[HSR_DEV_SLAVE_A])
+ tdiff = jiffies - node->time_in[HSR_PT_SLAVE_A];
+ if (node->time_in_stale[HSR_PT_SLAVE_A])
*if1_age = INT_MAX;
#if HZ <= MSEC_PER_SEC
else if (tdiff > msecs_to_jiffies(INT_MAX))
@@ -474,8 +463,8 @@ int hsr_get_node_data(struct hsr_priv *hsr_priv,
else
*if1_age = jiffies_to_msecs(tdiff);
- tdiff = jiffies - node->time_in[HSR_DEV_SLAVE_B];
- if (node->time_in_stale[HSR_DEV_SLAVE_B])
+ tdiff = jiffies - node->time_in[HSR_PT_SLAVE_B];
+ if (node->time_in_stale[HSR_PT_SLAVE_B])
*if2_age = INT_MAX;
#if HZ <= MSEC_PER_SEC
else if (tdiff > msecs_to_jiffies(INT_MAX))
@@ -485,13 +474,15 @@ int hsr_get_node_data(struct hsr_priv *hsr_priv,
*if2_age = jiffies_to_msecs(tdiff);
/* Present sequence numbers as if they were incoming on interface */
- *if1_seq = node->seq_out[HSR_DEV_SLAVE_B];
- *if2_seq = node->seq_out[HSR_DEV_SLAVE_A];
+ *if1_seq = node->seq_out[HSR_PT_SLAVE_B];
+ *if2_seq = node->seq_out[HSR_PT_SLAVE_A];
- if ((node->AddrB_if != HSR_DEV_NONE) && hsr_priv->slave[node->AddrB_if])
- *addr_b_ifindex = hsr_priv->slave[node->AddrB_if]->ifindex;
- else
+ if (node->AddrB_port != HSR_PT_NONE) {
+ port = hsr_port_get_hsr(hsr, node->AddrB_port);
+ *addr_b_ifindex = port->dev->ifindex;
+ } else {
*addr_b_ifindex = -1;
+ }
rcu_read_unlock();
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index e6c4022030ad..438b40f98f5a 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -1,4 +1,4 @@
-/* Copyright 2011-2013 Autronica Fire and Security AS
+/* Copyright 2011-2014 Autronica Fire and Security AS
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
@@ -6,42 +6,43 @@
* any later version.
*
* Author(s):
- * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com
+ * 2011-2014 Arvid Brodin, arvid.brodin@alten.se
*/
-#ifndef _HSR_FRAMEREG_H
-#define _HSR_FRAMEREG_H
+#ifndef __HSR_FRAMEREG_H
+#define __HSR_FRAMEREG_H
#include "hsr_main.h"
-struct node_entry;
+struct hsr_node;
-struct node_entry *hsr_find_node(struct list_head *node_db, struct sk_buff *skb);
+struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[],
+ u16 seq_out);
+struct hsr_node *hsr_get_node(struct list_head *node_db, struct sk_buff *skb,
+ bool is_sup);
+void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
+ struct hsr_port *port);
+bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr);
-struct node_entry *hsr_merge_node(struct hsr_priv *hsr_priv,
- struct node_entry *node,
- struct sk_buff *skb,
- enum hsr_dev_idx dev_idx);
+void hsr_addr_subst_source(struct hsr_node *node, struct sk_buff *skb);
+void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb,
+ struct hsr_port *port);
-void hsr_addr_subst_source(struct hsr_priv *hsr_priv, struct sk_buff *skb);
-void hsr_addr_subst_dest(struct hsr_priv *hsr_priv, struct ethhdr *ethhdr,
- enum hsr_dev_idx dev_idx);
+void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port,
+ u16 sequence_nr);
+int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node,
+ u16 sequence_nr);
-void hsr_register_frame_in(struct node_entry *node, enum hsr_dev_idx dev_idx);
-
-int hsr_register_frame_out(struct node_entry *node, enum hsr_dev_idx dev_idx,
- struct sk_buff *skb);
-
-void hsr_prune_nodes(struct hsr_priv *hsr_priv);
+void hsr_prune_nodes(unsigned long data);
int hsr_create_self_node(struct list_head *self_node_db,
unsigned char addr_a[ETH_ALEN],
unsigned char addr_b[ETH_ALEN]);
-void *hsr_get_next_node(struct hsr_priv *hsr_priv, void *_pos,
+void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos,
unsigned char addr[ETH_ALEN]);
-int hsr_get_node_data(struct hsr_priv *hsr_priv,
+int hsr_get_node_data(struct hsr_priv *hsr,
const unsigned char *addr,
unsigned char addr_b[ETH_ALEN],
unsigned int *addr_b_ifindex,
@@ -50,4 +51,4 @@ int hsr_get_node_data(struct hsr_priv *hsr_priv,
int *if2_age,
u16 *if2_seq);
-#endif /* _HSR_FRAMEREG_H */
+#endif /* __HSR_FRAMEREG_H */
diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
index 3fee5218a691..779d28b65417 100644
--- a/net/hsr/hsr_main.c
+++ b/net/hsr/hsr_main.c
@@ -1,4 +1,4 @@
-/* Copyright 2011-2013 Autronica Fire and Security AS
+/* Copyright 2011-2014 Autronica Fire and Security AS
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
@@ -6,11 +6,7 @@
* any later version.
*
* Author(s):
- * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com
- *
- * In addition to routines for registering and unregistering HSR support, this
- * file also contains the receive routine that handles all incoming frames with
- * Ethertype (protocol) ETH_P_PRP (HSRv0), and network device event handling.
+ * 2011-2014 Arvid Brodin, arvid.brodin@alten.se
*/
#include <linux/netdevice.h>
@@ -21,154 +17,71 @@
#include "hsr_device.h"
#include "hsr_netlink.h"
#include "hsr_framereg.h"
-
-
-/* List of all registered virtual HSR devices */
-static LIST_HEAD(hsr_list);
-
-void register_hsr_master(struct hsr_priv *hsr_priv)
-{
- list_add_tail_rcu(&hsr_priv->hsr_list, &hsr_list);
-}
-
-void unregister_hsr_master(struct hsr_priv *hsr_priv)
-{
- struct hsr_priv *hsr_priv_it;
-
- list_for_each_entry(hsr_priv_it, &hsr_list, hsr_list)
- if (hsr_priv_it == hsr_priv) {
- list_del_rcu(&hsr_priv_it->hsr_list);
- return;
- }
-}
-
-bool is_hsr_slave(struct net_device *dev)
-{
- struct hsr_priv *hsr_priv_it;
-
- list_for_each_entry_rcu(hsr_priv_it, &hsr_list, hsr_list) {
- if (dev == hsr_priv_it->slave[0])
- return true;
- if (dev == hsr_priv_it->slave[1])
- return true;
- }
-
- return false;
-}
-
-
-/* If dev is a HSR slave device, return the virtual master device. Return NULL
- * otherwise.
- */
-static struct hsr_priv *get_hsr_master(struct net_device *dev)
-{
- struct hsr_priv *hsr_priv;
-
- rcu_read_lock();
- list_for_each_entry_rcu(hsr_priv, &hsr_list, hsr_list)
- if ((dev == hsr_priv->slave[0]) ||
- (dev == hsr_priv->slave[1])) {
- rcu_read_unlock();
- return hsr_priv;
- }
-
- rcu_read_unlock();
- return NULL;
-}
-
-
-/* If dev is a HSR slave device, return the other slave device. Return NULL
- * otherwise.
- */
-static struct net_device *get_other_slave(struct hsr_priv *hsr_priv,
- struct net_device *dev)
-{
- if (dev == hsr_priv->slave[0])
- return hsr_priv->slave[1];
- if (dev == hsr_priv->slave[1])
- return hsr_priv->slave[0];
-
- return NULL;
-}
+#include "hsr_slave.h"
static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event,
void *ptr)
{
- struct net_device *slave, *other_slave;
- struct hsr_priv *hsr_priv;
- int old_operstate;
+ struct net_device *dev;
+ struct hsr_port *port, *master;
+ struct hsr_priv *hsr;
int mtu_max;
int res;
- struct net_device *dev;
dev = netdev_notifier_info_to_dev(ptr);
-
- hsr_priv = get_hsr_master(dev);
- if (hsr_priv) {
- /* dev is a slave device */
- slave = dev;
- other_slave = get_other_slave(hsr_priv, slave);
- } else {
+ port = hsr_port_get_rtnl(dev);
+ if (port == NULL) {
if (!is_hsr_master(dev))
- return NOTIFY_DONE;
- hsr_priv = netdev_priv(dev);
- slave = hsr_priv->slave[0];
- other_slave = hsr_priv->slave[1];
+ return NOTIFY_DONE; /* Not an HSR device */
+ hsr = netdev_priv(dev);
+ port = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
+ } else {
+ hsr = port->hsr;
}
switch (event) {
case NETDEV_UP: /* Administrative state DOWN */
case NETDEV_DOWN: /* Administrative state UP */
case NETDEV_CHANGE: /* Link (carrier) state changes */
- old_operstate = hsr_priv->dev->operstate;
- hsr_set_carrier(hsr_priv->dev, slave, other_slave);
- /* netif_stacked_transfer_operstate() cannot be used here since
- * it doesn't set IF_OPER_LOWERLAYERDOWN (?)
- */
- hsr_set_operstate(hsr_priv->dev, slave, other_slave);
- hsr_check_announce(hsr_priv->dev, old_operstate);
+ hsr_check_carrier_and_operstate(hsr);
break;
case NETDEV_CHANGEADDR:
-
- /* This should not happen since there's no ndo_set_mac_address()
- * for HSR devices - i.e. not supported.
- */
- if (dev == hsr_priv->dev)
+ if (port->type == HSR_PT_MASTER) {
+ /* This should not happen since there's no
+ * ndo_set_mac_address() for HSR devices - i.e. not
+ * supported.
+ */
break;
+ }
- if (dev == hsr_priv->slave[0])
- ether_addr_copy(hsr_priv->dev->dev_addr,
- hsr_priv->slave[0]->dev_addr);
+ master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
+
+ if (port->type == HSR_PT_SLAVE_A) {
+ ether_addr_copy(master->dev->dev_addr, dev->dev_addr);
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, master->dev);
+ }
/* Make sure we recognize frames from ourselves in hsr_rcv() */
- res = hsr_create_self_node(&hsr_priv->self_node_db,
- hsr_priv->dev->dev_addr,
- hsr_priv->slave[1] ?
- hsr_priv->slave[1]->dev_addr :
- hsr_priv->dev->dev_addr);
+ port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B);
+ res = hsr_create_self_node(&hsr->self_node_db,
+ master->dev->dev_addr,
+ port ?
+ port->dev->dev_addr :
+ master->dev->dev_addr);
if (res)
- netdev_warn(hsr_priv->dev,
+ netdev_warn(master->dev,
"Could not update HSR node address.\n");
-
- if (dev == hsr_priv->slave[0])
- call_netdevice_notifiers(NETDEV_CHANGEADDR, hsr_priv->dev);
break;
case NETDEV_CHANGEMTU:
- if (dev == hsr_priv->dev)
+ if (port->type == HSR_PT_MASTER)
break; /* Handled in ndo_change_mtu() */
- mtu_max = hsr_get_max_mtu(hsr_priv);
- if (hsr_priv->dev->mtu > mtu_max)
- dev_set_mtu(hsr_priv->dev, mtu_max);
+ mtu_max = hsr_get_max_mtu(port->hsr);
+ master = hsr_port_get_hsr(port->hsr, HSR_PT_MASTER);
+ master->dev->mtu = mtu_max;
break;
case NETDEV_UNREGISTER:
- if (dev == hsr_priv->slave[0])
- hsr_priv->slave[0] = NULL;
- if (dev == hsr_priv->slave[1])
- hsr_priv->slave[1] = NULL;
-
- /* There should really be a way to set a new slave device... */
-
+ hsr_del_port(port);
break;
case NETDEV_PRE_TYPE_CHANGE:
/* HSR works only on Ethernet devices. Refuse slave to change
@@ -181,255 +94,16 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event,
}
-static struct timer_list prune_timer;
-
-static void prune_nodes_all(unsigned long data)
-{
- struct hsr_priv *hsr_priv;
-
- rcu_read_lock();
- list_for_each_entry_rcu(hsr_priv, &hsr_list, hsr_list)
- hsr_prune_nodes(hsr_priv);
- rcu_read_unlock();
-
- prune_timer.expires = jiffies + msecs_to_jiffies(PRUNE_PERIOD);
- add_timer(&prune_timer);
-}
-
-
-static struct sk_buff *hsr_pull_tag(struct sk_buff *skb)
+struct hsr_port *hsr_port_get_hsr(struct hsr_priv *hsr, enum hsr_port_type pt)
{
- struct hsr_tag *hsr_tag;
- struct sk_buff *skb2;
-
- skb2 = skb_share_check(skb, GFP_ATOMIC);
- if (unlikely(!skb2))
- goto err_free;
- skb = skb2;
-
- if (unlikely(!pskb_may_pull(skb, HSR_TAGLEN)))
- goto err_free;
+ struct hsr_port *port;
- hsr_tag = (struct hsr_tag *) skb->data;
- skb->protocol = hsr_tag->encap_proto;
- skb_pull(skb, HSR_TAGLEN);
-
- return skb;
-
-err_free:
- kfree_skb(skb);
+ hsr_for_each_port(hsr, port)
+ if (port->type == pt)
+ return port;
return NULL;
}
-
-/* The uses I can see for these HSR supervision frames are:
- * 1) Use the frames that are sent after node initialization ("HSR_TLV.Type =
- * 22") to reset any sequence_nr counters belonging to that node. Useful if
- * the other node's counter has been reset for some reason.
- * --
- * Or not - resetting the counter and bridging the frame would create a
- * loop, unfortunately.
- *
- * 2) Use the LifeCheck frames to detect ring breaks. I.e. if no LifeCheck
- * frame is received from a particular node, we know something is wrong.
- * We just register these (as with normal frames) and throw them away.
- *
- * 3) Allow different MAC addresses for the two slave interfaces, using the
- * MacAddressA field.
- */
-static bool is_supervision_frame(struct hsr_priv *hsr_priv, struct sk_buff *skb)
-{
- struct hsr_sup_tag *hsr_stag;
-
- if (!ether_addr_equal(eth_hdr(skb)->h_dest,
- hsr_priv->sup_multicast_addr))
- return false;
-
- hsr_stag = (struct hsr_sup_tag *) skb->data;
- if (get_hsr_stag_path(hsr_stag) != 0x0f)
- return false;
- if ((hsr_stag->HSR_TLV_Type != HSR_TLV_ANNOUNCE) &&
- (hsr_stag->HSR_TLV_Type != HSR_TLV_LIFE_CHECK))
- return false;
- if (hsr_stag->HSR_TLV_Length != 12)
- return false;
-
- return true;
-}
-
-
-/* Implementation somewhat according to IEC-62439-3, p. 43
- */
-static int hsr_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev)
-{
- struct hsr_priv *hsr_priv;
- struct net_device *other_slave;
- struct node_entry *node;
- bool deliver_to_self;
- struct sk_buff *skb_deliver;
- enum hsr_dev_idx dev_in_idx, dev_other_idx;
- bool dup_out;
- int ret;
-
- hsr_priv = get_hsr_master(dev);
-
- if (!hsr_priv) {
- /* Non-HSR-slave device 'dev' is connected to a HSR network */
- kfree_skb(skb);
- dev->stats.rx_errors++;
- return NET_RX_SUCCESS;
- }
-
- if (dev == hsr_priv->slave[0]) {
- dev_in_idx = HSR_DEV_SLAVE_A;
- dev_other_idx = HSR_DEV_SLAVE_B;
- } else {
- dev_in_idx = HSR_DEV_SLAVE_B;
- dev_other_idx = HSR_DEV_SLAVE_A;
- }
-
- node = hsr_find_node(&hsr_priv->self_node_db, skb);
- if (node) {
- /* Always kill frames sent by ourselves */
- kfree_skb(skb);
- return NET_RX_SUCCESS;
- }
-
- /* Is this frame a candidate for local reception? */
- deliver_to_self = false;
- if ((skb->pkt_type == PACKET_HOST) ||
- (skb->pkt_type == PACKET_MULTICAST) ||
- (skb->pkt_type == PACKET_BROADCAST))
- deliver_to_self = true;
- else if (ether_addr_equal(eth_hdr(skb)->h_dest,
- hsr_priv->dev->dev_addr)) {
- skb->pkt_type = PACKET_HOST;
- deliver_to_self = true;
- }
-
-
- rcu_read_lock(); /* node_db */
- node = hsr_find_node(&hsr_priv->node_db, skb);
-
- if (is_supervision_frame(hsr_priv, skb)) {
- skb_pull(skb, sizeof(struct hsr_sup_tag));
- node = hsr_merge_node(hsr_priv, node, skb, dev_in_idx);
- if (!node) {
- rcu_read_unlock(); /* node_db */
- kfree_skb(skb);
- hsr_priv->dev->stats.rx_dropped++;
- return NET_RX_DROP;
- }
- skb_push(skb, sizeof(struct hsr_sup_tag));
- deliver_to_self = false;
- }
-
- if (!node) {
- /* Source node unknown; this might be a HSR frame from
- * another net (different multicast address). Ignore it.
- */
- rcu_read_unlock(); /* node_db */
- kfree_skb(skb);
- return NET_RX_SUCCESS;
- }
-
- /* Register ALL incoming frames as outgoing through the other interface.
- * This allows us to register frames as incoming only if they are valid
- * for the receiving interface, without using a specific counter for
- * incoming frames.
- */
- dup_out = hsr_register_frame_out(node, dev_other_idx, skb);
- if (!dup_out)
- hsr_register_frame_in(node, dev_in_idx);
-
- /* Forward this frame? */
- if (!dup_out && (skb->pkt_type != PACKET_HOST))
- other_slave = get_other_slave(hsr_priv, dev);
- else
- other_slave = NULL;
-
- if (hsr_register_frame_out(node, HSR_DEV_MASTER, skb))
- deliver_to_self = false;
-
- rcu_read_unlock(); /* node_db */
-
- if (!deliver_to_self && !other_slave) {
- kfree_skb(skb);
- /* Circulated frame; silently remove it. */
- return NET_RX_SUCCESS;
- }
-
- skb_deliver = skb;
- if (deliver_to_self && other_slave) {
- /* skb_clone() is not enough since we will strip the hsr tag
- * and do address substitution below
- */
- skb_deliver = pskb_copy(skb, GFP_ATOMIC);
- if (!skb_deliver) {
- deliver_to_self = false;
- hsr_priv->dev->stats.rx_dropped++;
- }
- }
-
- if (deliver_to_self) {
- bool multicast_frame;
-
- skb_deliver = hsr_pull_tag(skb_deliver);
- if (!skb_deliver) {
- hsr_priv->dev->stats.rx_dropped++;
- goto forward;
- }
-#if !defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
- /* Move everything in the header that is after the HSR tag,
- * to work around alignment problems caused by the 6-byte HSR
- * tag. In practice, this removes/overwrites the HSR tag in
- * the header and restores a "standard" packet.
- */
- memmove(skb_deliver->data - HSR_TAGLEN, skb_deliver->data,
- skb_headlen(skb_deliver));
-
- /* Adjust skb members so they correspond with the move above.
- * This cannot possibly underflow skb->data since hsr_pull_tag()
- * above succeeded.
- * At this point in the protocol stack, the transport and
- * network headers have not been set yet, and we haven't touched
- * the mac header nor the head. So we only need to adjust data
- * and tail:
- */
- skb_deliver->data -= HSR_TAGLEN;
- skb_deliver->tail -= HSR_TAGLEN;
-#endif
- skb_deliver->dev = hsr_priv->dev;
- hsr_addr_subst_source(hsr_priv, skb_deliver);
- multicast_frame = (skb_deliver->pkt_type == PACKET_MULTICAST);
- ret = netif_rx(skb_deliver);
- if (ret == NET_RX_DROP) {
- hsr_priv->dev->stats.rx_dropped++;
- } else {
- hsr_priv->dev->stats.rx_packets++;
- hsr_priv->dev->stats.rx_bytes += skb->len;
- if (multicast_frame)
- hsr_priv->dev->stats.multicast++;
- }
- }
-
-forward:
- if (other_slave) {
- skb_push(skb, ETH_HLEN);
- skb->dev = other_slave;
- dev_queue_xmit(skb);
- }
-
- return NET_RX_SUCCESS;
-}
-
-
-static struct packet_type hsr_pt __read_mostly = {
- .type = htons(ETH_P_PRP),
- .func = hsr_rcv,
-};
-
static struct notifier_block hsr_nb = {
.notifier_call = hsr_netdev_notify, /* Slave event notifications */
};
@@ -439,18 +113,9 @@ static int __init hsr_init(void)
{
int res;
- BUILD_BUG_ON(sizeof(struct hsr_tag) != HSR_TAGLEN);
-
- dev_add_pack(&hsr_pt);
-
- init_timer(&prune_timer);
- prune_timer.function = prune_nodes_all;
- prune_timer.data = 0;
- prune_timer.expires = jiffies + msecs_to_jiffies(PRUNE_PERIOD);
- add_timer(&prune_timer);
+ BUILD_BUG_ON(sizeof(struct hsr_tag) != HSR_HLEN);
register_netdevice_notifier(&hsr_nb);
-
res = hsr_netlink_init();
return res;
@@ -459,9 +124,7 @@ static int __init hsr_init(void)
static void __exit hsr_exit(void)
{
unregister_netdevice_notifier(&hsr_nb);
- del_timer_sync(&prune_timer);
hsr_netlink_exit();
- dev_remove_pack(&hsr_pt);
}
module_init(hsr_init);
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index 56fe060c0ab1..5a9c69962ded 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -1,4 +1,4 @@
-/* Copyright 2011-2013 Autronica Fire and Security AS
+/* Copyright 2011-2014 Autronica Fire and Security AS
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
@@ -6,11 +6,11 @@
* any later version.
*
* Author(s):
- * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com
+ * 2011-2014 Arvid Brodin, arvid.brodin@alten.se
*/
-#ifndef _HSR_PRIVATE_H
-#define _HSR_PRIVATE_H
+#ifndef __HSR_PRIVATE_H
+#define __HSR_PRIVATE_H
#include <linux/netdevice.h>
#include <linux/list.h>
@@ -29,6 +29,7 @@
* each node differ before we notify of communication problem?
*/
#define MAX_SLAVE_DIFF 3000 /* ms */
+#define HSR_SEQNR_START (USHRT_MAX - 1024)
/* How often shall we check for broken ring and remove node entries older than
@@ -46,16 +47,16 @@
* path, LSDU_size, sequence Nr }. But we let eth_header() create { h_dest,
* h_source, h_proto = 0x88FB }, and add { path, LSDU_size, sequence Nr,
* encapsulated protocol } instead.
+ *
+ * Field names as defined in the IEC:2010 standard for HSR.
*/
-#define HSR_TAGLEN 6
-
-/* Field names below as defined in the IEC:2010 standard for HSR. */
struct hsr_tag {
__be16 path_and_LSDU_size;
__be16 sequence_nr;
__be16 encap_proto;
} __packed;
+#define HSR_HLEN 6
/* The helper functions below assumes that 'path' occupies the 4 most
* significant bits of the 16-bit field shared by 'path' and 'LSDU_size' (or
@@ -136,31 +137,47 @@ struct hsr_ethhdr_sp {
} __packed;
-enum hsr_dev_idx {
- HSR_DEV_NONE = -1,
- HSR_DEV_SLAVE_A = 0,
- HSR_DEV_SLAVE_B,
- HSR_DEV_MASTER,
+enum hsr_port_type {
+ HSR_PT_NONE = 0, /* Must be 0, used by framereg */
+ HSR_PT_SLAVE_A,
+ HSR_PT_SLAVE_B,
+ HSR_PT_INTERLINK,
+ HSR_PT_MASTER,
+ HSR_PT_PORTS, /* This must be the last item in the enum */
+};
+
+struct hsr_port {
+ struct list_head port_list;
+ struct net_device *dev;
+ struct hsr_priv *hsr;
+ enum hsr_port_type type;
};
-#define HSR_MAX_SLAVE (HSR_DEV_SLAVE_B + 1)
-#define HSR_MAX_DEV (HSR_DEV_MASTER + 1)
struct hsr_priv {
- struct list_head hsr_list; /* List of hsr devices */
struct rcu_head rcu_head;
- struct net_device *dev;
- struct net_device *slave[HSR_MAX_SLAVE];
- struct list_head node_db; /* Other HSR nodes */
+ struct list_head ports;
+ struct list_head node_db; /* Known HSR nodes */
struct list_head self_node_db; /* MACs of slaves */
struct timer_list announce_timer; /* Supervision frame dispatch */
+ struct timer_list prune_timer;
int announce_count;
u16 sequence_nr;
spinlock_t seqnr_lock; /* locking for sequence_nr */
unsigned char sup_multicast_addr[ETH_ALEN];
};
-void register_hsr_master(struct hsr_priv *hsr_priv);
-void unregister_hsr_master(struct hsr_priv *hsr_priv);
-bool is_hsr_slave(struct net_device *dev);
+#define hsr_for_each_port(hsr, port) \
+ list_for_each_entry_rcu((port), &(hsr)->ports, port_list)
+
+struct hsr_port *hsr_port_get_hsr(struct hsr_priv *hsr, enum hsr_port_type pt);
+
+/* Caller must ensure skb is a valid HSR frame */
+static inline u16 hsr_get_skb_sequence_nr(struct sk_buff *skb)
+{
+ struct hsr_ethhdr *hsr_ethhdr;
+
+ hsr_ethhdr = (struct hsr_ethhdr *) skb_mac_header(skb);
+ return ntohs(hsr_ethhdr->hsr_tag.sequence_nr);
+}
-#endif /* _HSR_PRIVATE_H */
+#endif /* __HSR_PRIVATE_H */
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index 01a5261ac7a5..a2c7e4c0ac1e 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -1,4 +1,4 @@
-/* Copyright 2011-2013 Autronica Fire and Security AS
+/* Copyright 2011-2014 Autronica Fire and Security AS
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
@@ -6,7 +6,7 @@
* any later version.
*
* Author(s):
- * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com
+ * 2011-2014 Arvid Brodin, arvid.brodin@alten.se
*
* Routines for handling Netlink messages for HSR.
*/
@@ -37,13 +37,17 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev,
struct net_device *link[2];
unsigned char multicast_spec;
+ if (!data) {
+ netdev_info(dev, "HSR: No slave devices specified\n");
+ return -EINVAL;
+ }
if (!data[IFLA_HSR_SLAVE1]) {
- netdev_info(dev, "IFLA_HSR_SLAVE1 missing!\n");
+ netdev_info(dev, "HSR: Slave1 device not specified\n");
return -EINVAL;
}
link[0] = __dev_get_by_index(src_net, nla_get_u32(data[IFLA_HSR_SLAVE1]));
if (!data[IFLA_HSR_SLAVE2]) {
- netdev_info(dev, "IFLA_HSR_SLAVE2 missing!\n");
+ netdev_info(dev, "HSR: Slave2 device not specified\n");
return -EINVAL;
}
link[1] = __dev_get_by_index(src_net, nla_get_u32(data[IFLA_HSR_SLAVE2]));
@@ -63,21 +67,33 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev,
static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
- struct hsr_priv *hsr_priv;
+ struct hsr_priv *hsr;
+ struct hsr_port *port;
+ int res;
- hsr_priv = netdev_priv(dev);
+ hsr = netdev_priv(dev);
- if (hsr_priv->slave[0])
- if (nla_put_u32(skb, IFLA_HSR_SLAVE1, hsr_priv->slave[0]->ifindex))
- goto nla_put_failure;
+ res = 0;
- if (hsr_priv->slave[1])
- if (nla_put_u32(skb, IFLA_HSR_SLAVE2, hsr_priv->slave[1]->ifindex))
- goto nla_put_failure;
+ rcu_read_lock();
+ port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A);
+ if (port)
+ res = nla_put_u32(skb, IFLA_HSR_SLAVE1, port->dev->ifindex);
+ rcu_read_unlock();
+ if (res)
+ goto nla_put_failure;
+
+ rcu_read_lock();
+ port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B);
+ if (port)
+ res = nla_put_u32(skb, IFLA_HSR_SLAVE2, port->dev->ifindex);
+ rcu_read_unlock();
+ if (res)
+ goto nla_put_failure;
if (nla_put(skb, IFLA_HSR_SUPERVISION_ADDR, ETH_ALEN,
- hsr_priv->sup_multicast_addr) ||
- nla_put_u16(skb, IFLA_HSR_SEQ_NR, hsr_priv->sequence_nr))
+ hsr->sup_multicast_addr) ||
+ nla_put_u16(skb, IFLA_HSR_SEQ_NR, hsr->sequence_nr))
goto nla_put_failure;
return 0;
@@ -128,13 +144,13 @@ static const struct genl_multicast_group hsr_mcgrps[] = {
* over one of the slave interfaces. This would indicate an open network ring
* (i.e. a link has failed somewhere).
*/
-void hsr_nl_ringerror(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN],
- enum hsr_dev_idx dev_idx)
+void hsr_nl_ringerror(struct hsr_priv *hsr, unsigned char addr[ETH_ALEN],
+ struct hsr_port *port)
{
struct sk_buff *skb;
void *msg_head;
+ struct hsr_port *master;
int res;
- int ifindex;
skb = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
if (!skb)
@@ -148,11 +164,7 @@ void hsr_nl_ringerror(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN],
if (res < 0)
goto nla_put_failure;
- if (hsr_priv->slave[dev_idx])
- ifindex = hsr_priv->slave[dev_idx]->ifindex;
- else
- ifindex = -1;
- res = nla_put_u32(skb, HSR_A_IFINDEX, ifindex);
+ res = nla_put_u32(skb, HSR_A_IFINDEX, port->dev->ifindex);
if (res < 0)
goto nla_put_failure;
@@ -165,16 +177,20 @@ nla_put_failure:
kfree_skb(skb);
fail:
- netdev_warn(hsr_priv->dev, "Could not send HSR ring error message\n");
+ rcu_read_lock();
+ master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
+ netdev_warn(master->dev, "Could not send HSR ring error message\n");
+ rcu_read_unlock();
}
/* This is called when we haven't heard from the node with MAC address addr for
* some time (just before the node is removed from the node table/list).
*/
-void hsr_nl_nodedown(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN])
+void hsr_nl_nodedown(struct hsr_priv *hsr, unsigned char addr[ETH_ALEN])
{
struct sk_buff *skb;
void *msg_head;
+ struct hsr_port *master;
int res;
skb = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
@@ -199,7 +215,10 @@ nla_put_failure:
kfree_skb(skb);
fail:
- netdev_warn(hsr_priv->dev, "Could not send HSR node down\n");
+ rcu_read_lock();
+ master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
+ netdev_warn(master->dev, "Could not send HSR node down\n");
+ rcu_read_unlock();
}
@@ -220,7 +239,8 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info)
/* For sending */
struct sk_buff *skb_out;
void *msg_head;
- struct hsr_priv *hsr_priv;
+ struct hsr_priv *hsr;
+ struct hsr_port *port;
unsigned char hsr_node_addr_b[ETH_ALEN];
int hsr_node_if1_age;
u16 hsr_node_if1_seq;
@@ -267,8 +287,8 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info)
if (res < 0)
goto nla_put_failure;
- hsr_priv = netdev_priv(hsr_dev);
- res = hsr_get_node_data(hsr_priv,
+ hsr = netdev_priv(hsr_dev);
+ res = hsr_get_node_data(hsr,
(unsigned char *) nla_data(info->attrs[HSR_A_NODE_ADDR]),
hsr_node_addr_b,
&addr_b_ifindex,
@@ -301,9 +321,12 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info)
res = nla_put_u16(skb_out, HSR_A_IF1_SEQ, hsr_node_if1_seq);
if (res < 0)
goto nla_put_failure;
- if (hsr_priv->slave[0])
+ rcu_read_lock();
+ port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A);
+ if (port)
res = nla_put_u32(skb_out, HSR_A_IF1_IFINDEX,
- hsr_priv->slave[0]->ifindex);
+ port->dev->ifindex);
+ rcu_read_unlock();
if (res < 0)
goto nla_put_failure;
@@ -313,9 +336,14 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info)
res = nla_put_u16(skb_out, HSR_A_IF2_SEQ, hsr_node_if2_seq);
if (res < 0)
goto nla_put_failure;
- if (hsr_priv->slave[1])
+ rcu_read_lock();
+ port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B);
+ if (port)
res = nla_put_u32(skb_out, HSR_A_IF2_IFINDEX,
- hsr_priv->slave[1]->ifindex);
+ port->dev->ifindex);
+ rcu_read_unlock();
+ if (res < 0)
+ goto nla_put_failure;
genlmsg_end(skb_out, msg_head);
genlmsg_unicast(genl_info_net(info), skb_out, info->snd_portid);
@@ -334,7 +362,7 @@ fail:
return res;
}
-/* Get a list of MacAddressA of all nodes known to this node (other than self).
+/* Get a list of MacAddressA of all nodes known to this node (including self).
*/
static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info)
{
@@ -345,7 +373,7 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info)
/* For sending */
struct sk_buff *skb_out;
void *msg_head;
- struct hsr_priv *hsr_priv;
+ struct hsr_priv *hsr;
void *pos;
unsigned char addr[ETH_ALEN];
int res;
@@ -385,17 +413,17 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info)
if (res < 0)
goto nla_put_failure;
- hsr_priv = netdev_priv(hsr_dev);
+ hsr = netdev_priv(hsr_dev);
rcu_read_lock();
- pos = hsr_get_next_node(hsr_priv, NULL, addr);
+ pos = hsr_get_next_node(hsr, NULL, addr);
while (pos) {
res = nla_put(skb_out, HSR_A_NODE_ADDR, ETH_ALEN, addr);
if (res < 0) {
rcu_read_unlock();
goto nla_put_failure;
}
- pos = hsr_get_next_node(hsr_priv, pos, addr);
+ pos = hsr_get_next_node(hsr, pos, addr);
}
rcu_read_unlock();
diff --git a/net/hsr/hsr_netlink.h b/net/hsr/hsr_netlink.h
index d4579dcc3c7d..3f6b95b5b6b8 100644
--- a/net/hsr/hsr_netlink.h
+++ b/net/hsr/hsr_netlink.h
@@ -1,4 +1,4 @@
-/* Copyright 2011-2013 Autronica Fire and Security AS
+/* Copyright 2011-2014 Autronica Fire and Security AS
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
@@ -6,7 +6,7 @@
* any later version.
*
* Author(s):
- * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com
+ * 2011-2014 Arvid Brodin, arvid.brodin@alten.se
*/
#ifndef __HSR_NETLINK_H
@@ -17,13 +17,14 @@
#include <uapi/linux/hsr_netlink.h>
struct hsr_priv;
+struct hsr_port;
int __init hsr_netlink_init(void);
void __exit hsr_netlink_exit(void);
-void hsr_nl_ringerror(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN],
- int dev_idx);
-void hsr_nl_nodedown(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN]);
+void hsr_nl_ringerror(struct hsr_priv *hsr, unsigned char addr[ETH_ALEN],
+ struct hsr_port *port);
+void hsr_nl_nodedown(struct hsr_priv *hsr, unsigned char addr[ETH_ALEN]);
void hsr_nl_framedrop(int dropcount, int dev_idx);
void hsr_nl_linkdown(int dev_idx);
diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
new file mode 100644
index 000000000000..a348dcbcd683
--- /dev/null
+++ b/net/hsr/hsr_slave.c
@@ -0,0 +1,196 @@
+/* Copyright 2011-2014 Autronica Fire and Security AS
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * Author(s):
+ * 2011-2014 Arvid Brodin, arvid.brodin@alten.se
+ */
+
+#include "hsr_slave.h"
+#include <linux/etherdevice.h>
+#include <linux/if_arp.h>
+#include "hsr_main.h"
+#include "hsr_device.h"
+#include "hsr_forward.h"
+#include "hsr_framereg.h"
+
+
+static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb)
+{
+ struct sk_buff *skb = *pskb;
+ struct hsr_port *port;
+
+ if (!skb_mac_header_was_set(skb)) {
+ WARN_ONCE(1, "%s: skb invalid", __func__);
+ return RX_HANDLER_PASS;
+ }
+
+ rcu_read_lock(); /* hsr->node_db, hsr->ports */
+ port = hsr_port_get_rcu(skb->dev);
+
+ if (hsr_addr_is_self(port->hsr, eth_hdr(skb)->h_source)) {
+ /* Directly kill frames sent by ourselves */
+ kfree_skb(skb);
+ goto finish_consume;
+ }
+
+ if (eth_hdr(skb)->h_proto != htons(ETH_P_PRP))
+ goto finish_pass;
+
+ skb_push(skb, ETH_HLEN);
+
+ hsr_forward_skb(skb, port);
+
+finish_consume:
+ rcu_read_unlock(); /* hsr->node_db, hsr->ports */
+ return RX_HANDLER_CONSUMED;
+
+finish_pass:
+ rcu_read_unlock(); /* hsr->node_db, hsr->ports */
+ return RX_HANDLER_PASS;
+}
+
+bool hsr_port_exists(const struct net_device *dev)
+{
+ return rcu_access_pointer(dev->rx_handler) == hsr_handle_frame;
+}
+
+
+static int hsr_check_dev_ok(struct net_device *dev)
+{
+ /* Don't allow HSR on non-ethernet like devices */
+ if ((dev->flags & IFF_LOOPBACK) || (dev->type != ARPHRD_ETHER) ||
+ (dev->addr_len != ETH_ALEN)) {
+ netdev_info(dev, "Cannot use loopback or non-ethernet device as HSR slave.\n");
+ return -EINVAL;
+ }
+
+ /* Don't allow enslaving hsr devices */
+ if (is_hsr_master(dev)) {
+ netdev_info(dev, "Cannot create trees of HSR devices.\n");
+ return -EINVAL;
+ }
+
+ if (hsr_port_exists(dev)) {
+ netdev_info(dev, "This device is already a HSR slave.\n");
+ return -EINVAL;
+ }
+
+ if (dev->priv_flags & IFF_802_1Q_VLAN) {
+ netdev_info(dev, "HSR on top of VLAN is not yet supported in this driver.\n");
+ return -EINVAL;
+ }
+
+ if (dev->priv_flags & IFF_DONT_BRIDGE) {
+ netdev_info(dev, "This device does not support bridging.\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* HSR over bonded devices has not been tested, but I'm not sure it
+ * won't work...
+ */
+
+ return 0;
+}
+
+
+/* Setup device to be added to the HSR bridge. */
+static int hsr_portdev_setup(struct net_device *dev, struct hsr_port *port)
+{
+ int res;
+
+ dev_hold(dev);
+ res = dev_set_promiscuity(dev, 1);
+ if (res)
+ goto fail_promiscuity;
+
+ /* FIXME:
+ * What does net device "adjacency" mean? Should we do
+ * res = netdev_master_upper_dev_link(port->dev, port->hsr->dev); ?
+ */
+
+ res = netdev_rx_handler_register(dev, hsr_handle_frame, port);
+ if (res)
+ goto fail_rx_handler;
+ dev_disable_lro(dev);
+
+ return 0;
+
+fail_rx_handler:
+ dev_set_promiscuity(dev, -1);
+fail_promiscuity:
+ dev_put(dev);
+
+ return res;
+}
+
+int hsr_add_port(struct hsr_priv *hsr, struct net_device *dev,
+ enum hsr_port_type type)
+{
+ struct hsr_port *port, *master;
+ int res;
+
+ if (type != HSR_PT_MASTER) {
+ res = hsr_check_dev_ok(dev);
+ if (res)
+ return res;
+ }
+
+ port = hsr_port_get_hsr(hsr, type);
+ if (port != NULL)
+ return -EBUSY; /* This port already exists */
+
+ port = kzalloc(sizeof(*port), GFP_KERNEL);
+ if (port == NULL)
+ return -ENOMEM;
+
+ if (type != HSR_PT_MASTER) {
+ res = hsr_portdev_setup(dev, port);
+ if (res)
+ goto fail_dev_setup;
+ }
+
+ port->hsr = hsr;
+ port->dev = dev;
+ port->type = type;
+
+ list_add_tail_rcu(&port->port_list, &hsr->ports);
+ synchronize_rcu();
+
+ master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
+ netdev_update_features(master->dev);
+ dev_set_mtu(master->dev, hsr_get_max_mtu(hsr));
+
+ return 0;
+
+fail_dev_setup:
+ kfree(port);
+ return res;
+}
+
+void hsr_del_port(struct hsr_port *port)
+{
+ struct hsr_priv *hsr;
+ struct hsr_port *master;
+
+ hsr = port->hsr;
+ master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
+ list_del_rcu(&port->port_list);
+
+ if (port != master) {
+ netdev_update_features(master->dev);
+ dev_set_mtu(master->dev, hsr_get_max_mtu(hsr));
+ netdev_rx_handler_unregister(port->dev);
+ dev_set_promiscuity(port->dev, -1);
+ }
+
+ /* FIXME?
+ * netdev_upper_dev_unlink(port->dev, port->hsr->dev);
+ */
+
+ synchronize_rcu();
+ dev_put(port->dev);
+}
diff --git a/net/hsr/hsr_slave.h b/net/hsr/hsr_slave.h
new file mode 100644
index 000000000000..3ccfbf71c92e
--- /dev/null
+++ b/net/hsr/hsr_slave.h
@@ -0,0 +1,38 @@
+/* Copyright 2011-2014 Autronica Fire and Security AS
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * Author(s):
+ * 2011-2014 Arvid Brodin, arvid.brodin@alten.se
+ */
+
+#ifndef __HSR_SLAVE_H
+#define __HSR_SLAVE_H
+
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include "hsr_main.h"
+
+int hsr_add_port(struct hsr_priv *hsr, struct net_device *dev,
+ enum hsr_port_type pt);
+void hsr_del_port(struct hsr_port *port);
+bool hsr_port_exists(const struct net_device *dev);
+
+static inline struct hsr_port *hsr_port_get_rtnl(const struct net_device *dev)
+{
+ ASSERT_RTNL();
+ return hsr_port_exists(dev) ?
+ rtnl_dereference(dev->rx_handler_data) : NULL;
+}
+
+static inline struct hsr_port *hsr_port_get_rcu(const struct net_device *dev)
+{
+ return hsr_port_exists(dev) ?
+ rcu_dereference(dev->rx_handler_data) : NULL;
+}
+
+#endif /* __HSR_SLAVE_H */
diff --git a/net/ieee802154/6lowpan_rtnl.c b/net/ieee802154/6lowpan_rtnl.c
index fe6bd7a71081..44136297b673 100644
--- a/net/ieee802154/6lowpan_rtnl.c
+++ b/net/ieee802154/6lowpan_rtnl.c
@@ -71,28 +71,42 @@ struct lowpan_dev_record {
struct list_head list;
};
+/* don't save pan id, it's intra pan */
+struct lowpan_addr {
+ u8 mode;
+ union {
+ /* IPv6 needs big endian here */
+ __be64 extended_addr;
+ __be16 short_addr;
+ } u;
+};
+
+struct lowpan_addr_info {
+ struct lowpan_addr daddr;
+ struct lowpan_addr saddr;
+};
+
static inline struct
lowpan_dev_info *lowpan_dev_info(const struct net_device *dev)
{
return netdev_priv(dev);
}
-static inline void lowpan_address_flip(u8 *src, u8 *dest)
+static inline struct
+lowpan_addr_info *lowpan_skb_priv(const struct sk_buff *skb)
{
- int i;
- for (i = 0; i < IEEE802154_ADDR_LEN; i++)
- (dest)[IEEE802154_ADDR_LEN - i - 1] = (src)[i];
+ WARN_ON_ONCE(skb_headroom(skb) < sizeof(struct lowpan_addr_info));
+ return (struct lowpan_addr_info *)(skb->data -
+ sizeof(struct lowpan_addr_info));
}
-static int lowpan_header_create(struct sk_buff *skb,
- struct net_device *dev,
- unsigned short type, const void *_daddr,
- const void *_saddr, unsigned int len)
+static int lowpan_header_create(struct sk_buff *skb, struct net_device *dev,
+ unsigned short type, const void *_daddr,
+ const void *_saddr, unsigned int len)
{
const u8 *saddr = _saddr;
const u8 *daddr = _daddr;
- struct ieee802154_addr sa, da;
- struct ieee802154_mac_cb *cb = mac_cb_init(skb);
+ struct lowpan_addr_info *info;
/* TODO:
* if this package isn't ipv6 one, where should it be routed?
@@ -106,45 +120,21 @@ static int lowpan_header_create(struct sk_buff *skb,
raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8);
raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8);
- lowpan_header_compress(skb, dev, type, daddr, saddr, len);
+ info = lowpan_skb_priv(skb);
- /* NOTE1: I'm still unsure about the fact that compression and WPAN
- * header are created here and not later in the xmit. So wait for
- * an opinion of net maintainers.
- */
- /* NOTE2: to be absolutely correct, we must derive PANid information
- * from MAC subif of the 'dev' and 'real_dev' network devices, but
- * this isn't implemented in mainline yet, so currently we assign 0xff
- */
- cb->type = IEEE802154_FC_TYPE_DATA;
+ /* TODO: Currently we only support extended_addr */
+ info->daddr.mode = IEEE802154_ADDR_LONG;
+ memcpy(&info->daddr.u.extended_addr, daddr,
+ sizeof(info->daddr.u.extended_addr));
+ info->saddr.mode = IEEE802154_ADDR_LONG;
+ memcpy(&info->saddr.u.extended_addr, saddr,
+ sizeof(info->daddr.u.extended_addr));
- /* prepare wpan address data */
- sa.mode = IEEE802154_ADDR_LONG;
- sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
- sa.extended_addr = ieee802154_devaddr_from_raw(saddr);
-
- /* intra-PAN communications */
- da.pan_id = sa.pan_id;
-
- /* if the destination address is the broadcast address, use the
- * corresponding short address
- */
- if (lowpan_is_addr_broadcast(daddr)) {
- da.mode = IEEE802154_ADDR_SHORT;
- da.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST);
- } else {
- da.mode = IEEE802154_ADDR_LONG;
- da.extended_addr = ieee802154_devaddr_from_raw(daddr);
- }
-
- cb->ackreq = !lowpan_is_addr_broadcast(daddr);
-
- return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev,
- type, (void *)&da, (void *)&sa, 0);
+ return 0;
}
static int lowpan_give_skb_to_devices(struct sk_buff *skb,
- struct net_device *dev)
+ struct net_device *dev)
{
struct lowpan_dev_record *entry;
struct sk_buff *skb_cp;
@@ -246,7 +236,7 @@ lowpan_alloc_frag(struct sk_buff *skb, int size,
return ERR_PTR(-rc);
}
} else {
- frag = ERR_PTR(ENOMEM);
+ frag = ERR_PTR(-ENOMEM);
}
return frag;
@@ -338,13 +328,68 @@ err:
return rc;
}
+static int lowpan_header(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ieee802154_addr sa, da;
+ struct ieee802154_mac_cb *cb = mac_cb_init(skb);
+ struct lowpan_addr_info info;
+ void *daddr, *saddr;
+
+ memcpy(&info, lowpan_skb_priv(skb), sizeof(info));
+
+ /* TODO: Currently we only support extended_addr */
+ daddr = &info.daddr.u.extended_addr;
+ saddr = &info.saddr.u.extended_addr;
+
+ lowpan_header_compress(skb, dev, ETH_P_IPV6, daddr, saddr, skb->len);
+
+ cb->type = IEEE802154_FC_TYPE_DATA;
+
+ /* prepare wpan address data */
+ sa.mode = IEEE802154_ADDR_LONG;
+ sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
+ sa.extended_addr = ieee802154_devaddr_from_raw(saddr);
+
+ /* intra-PAN communications */
+ da.pan_id = sa.pan_id;
+
+ /* if the destination address is the broadcast address, use the
+ * corresponding short address
+ */
+ if (lowpan_is_addr_broadcast((const u8 *)daddr)) {
+ da.mode = IEEE802154_ADDR_SHORT;
+ da.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST);
+ cb->ackreq = false;
+ } else {
+ da.mode = IEEE802154_ADDR_LONG;
+ da.extended_addr = ieee802154_devaddr_from_raw(daddr);
+ cb->ackreq = true;
+ }
+
+ return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev,
+ ETH_P_IPV6, (void *)&da, (void *)&sa, 0);
+}
+
static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ieee802154_hdr wpan_hdr;
- int max_single;
+ int max_single, ret;
pr_debug("package xmit\n");
+ /* We must take a copy of the skb before we modify/replace the ipv6
+ * header as the header could be used elsewhere
+ */
+ skb = skb_unshare(skb, GFP_ATOMIC);
+ if (!skb)
+ return NET_XMIT_DROP;
+
+ ret = lowpan_header(skb, dev);
+ if (ret < 0) {
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
+ }
+
if (ieee802154_hdr_peek(skb, &wpan_hdr) < 0) {
kfree_skb(skb);
return NET_XMIT_DROP;
@@ -368,24 +413,28 @@ static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev)
static struct wpan_phy *lowpan_get_phy(const struct net_device *dev)
{
struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
+
return ieee802154_mlme_ops(real_dev)->get_phy(real_dev);
}
static __le16 lowpan_get_pan_id(const struct net_device *dev)
{
struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
+
return ieee802154_mlme_ops(real_dev)->get_pan_id(real_dev);
}
static __le16 lowpan_get_short_addr(const struct net_device *dev)
{
struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
+
return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev);
}
static u8 lowpan_get_dsn(const struct net_device *dev)
{
struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
+
return ieee802154_mlme_ops(real_dev)->get_dsn(real_dev);
}
@@ -433,7 +482,7 @@ static void lowpan_setup(struct net_device *dev)
/* Frame Control + Sequence Number + Address fields + Security Header */
dev->hard_header_len = 2 + 1 + 20 + 14;
dev->needed_tailroom = 2; /* FCS */
- dev->mtu = 1281;
+ dev->mtu = IPV6_MIN_MTU;
dev->tx_queue_len = 0;
dev->flags = IFF_BROADCAST | IFF_MULTICAST;
dev->watchdog_timeo = 0;
@@ -454,7 +503,7 @@ static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[])
}
static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev)
+ struct packet_type *pt, struct net_device *orig_dev)
{
struct ieee802154_hdr hdr;
int ret;
diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig
index 8af1330b3137..c0d4154d144f 100644
--- a/net/ieee802154/Kconfig
+++ b/net/ieee802154/Kconfig
@@ -12,13 +12,6 @@ config IEEE802154
config IEEE802154_6LOWPAN
tristate "6lowpan support over IEEE 802.15.4"
- depends on IEEE802154 && IPV6
- select 6LOWPAN_IPHC
+ depends on IEEE802154 && 6LOWPAN
---help---
IPv6 compression over IEEE 802.15.4.
-
-config 6LOWPAN_IPHC
- tristate
- ---help---
- 6lowpan compression code which is shared between IEEE 802.15.4 and Bluetooth
- stacks.
diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile
index bf1b51497a41..3914b1ed4274 100644
--- a/net/ieee802154/Makefile
+++ b/net/ieee802154/Makefile
@@ -1,8 +1,7 @@
obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o
-obj-$(CONFIG_IEEE802154_6LOWPAN) += 6lowpan.o
-obj-$(CONFIG_6LOWPAN_IPHC) += 6lowpan_iphc.o
+obj-$(CONFIG_IEEE802154_6LOWPAN) += ieee802154_6lowpan.o
-6lowpan-y := 6lowpan_rtnl.o reassembly.o
+ieee802154_6lowpan-y := 6lowpan_rtnl.o reassembly.o
ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o \
header_ops.o
af_802154-y := af_ieee802154.o raw.o dgram.o
diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c
index 351d9a94ec2f..29e0de63001b 100644
--- a/net/ieee802154/af_ieee802154.c
+++ b/net/ieee802154/af_ieee802154.c
@@ -40,9 +40,7 @@
#include "af802154.h"
-/*
- * Utility function for families
- */
+/* Utility function for families */
struct net_device*
ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr)
{
@@ -87,8 +85,8 @@ ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr)
rtnl_unlock();
break;
default:
- pr_warning("Unsupported ieee802154 address type: %d\n",
- addr->mode);
+ pr_warn("Unsupported ieee802154 address type: %d\n",
+ addr->mode);
break;
}
@@ -106,7 +104,7 @@ static int ieee802154_sock_release(struct socket *sock)
return 0;
}
static int ieee802154_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+ struct msghdr *msg, size_t len)
{
struct sock *sk = sock->sk;
@@ -114,7 +112,7 @@ static int ieee802154_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
}
static int ieee802154_sock_bind(struct socket *sock, struct sockaddr *uaddr,
- int addr_len)
+ int addr_len)
{
struct sock *sk = sock->sk;
@@ -125,7 +123,7 @@ static int ieee802154_sock_bind(struct socket *sock, struct sockaddr *uaddr,
}
static int ieee802154_sock_connect(struct socket *sock, struct sockaddr *uaddr,
- int addr_len, int flags)
+ int addr_len, int flags)
{
struct sock *sk = sock->sk;
@@ -139,7 +137,7 @@ static int ieee802154_sock_connect(struct socket *sock, struct sockaddr *uaddr,
}
static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg,
- unsigned int cmd)
+ unsigned int cmd)
{
struct ifreq ifr;
int ret = -ENOIOCTLCMD;
@@ -167,7 +165,7 @@ static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg,
}
static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd,
- unsigned long arg)
+ unsigned long arg)
{
struct sock *sk = sock->sk;
@@ -238,8 +236,7 @@ static const struct proto_ops ieee802154_dgram_ops = {
};
-/*
- * Create a socket. Initialise the socket, blank the addresses
+/* Create a socket. Initialise the socket, blank the addresses
* set the state.
*/
static int ieee802154_create(struct net *net, struct socket *sock,
@@ -301,13 +298,14 @@ static const struct net_proto_family ieee802154_family_ops = {
};
static int ieee802154_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev)
+ struct packet_type *pt, struct net_device *orig_dev)
{
if (!netif_running(dev))
goto drop;
pr_debug("got frame, type %d, dev %p\n", dev->type, dev);
#ifdef DEBUG
- print_hex_dump_bytes("ieee802154_rcv ", DUMP_PREFIX_NONE, skb->data, skb->len);
+ print_hex_dump_bytes("ieee802154_rcv ",
+ DUMP_PREFIX_NONE, skb->data, skb->len);
#endif
if (!net_eq(dev_net(dev), &init_net))
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
index 4f0ed8780194..ef2ad8aaef13 100644
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -149,8 +149,7 @@ static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg)
spin_lock_bh(&sk->sk_receive_queue.lock);
skb = skb_peek(&sk->sk_receive_queue);
if (skb != NULL) {
- /*
- * We will only return the amount
+ /* We will only return the amount
* of this packet since that is all
* that will be read.
*/
@@ -161,12 +160,13 @@ static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg)
}
}
+
return -ENOIOCTLCMD;
}
/* FIXME: autobind */
static int dgram_connect(struct sock *sk, struct sockaddr *uaddr,
- int len)
+ int len)
{
struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr;
struct dgram_sock *ro = dgram_sk(sk);
@@ -205,7 +205,7 @@ static int dgram_disconnect(struct sock *sk, int flags)
}
static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t size)
+ struct msghdr *msg, size_t size)
{
struct net_device *dev;
unsigned int mtu;
@@ -248,8 +248,8 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk,
hlen = LL_RESERVED_SPACE(dev);
tlen = dev->needed_tailroom;
skb = sock_alloc_send_skb(sk, hlen + tlen + size,
- msg->msg_flags & MSG_DONTWAIT,
- &err);
+ msg->msg_flags & MSG_DONTWAIT,
+ &err);
if (!skb)
goto out_dev;
@@ -262,7 +262,8 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk,
cb->ackreq = ro->want_ack;
if (msg->msg_name) {
- DECLARE_SOCKADDR(struct sockaddr_ieee802154*, daddr, msg->msg_name);
+ DECLARE_SOCKADDR(struct sockaddr_ieee802154*,
+ daddr, msg->msg_name);
ieee802154_addr_from_sa(&dst_addr, &daddr->addr);
} else {
@@ -304,8 +305,8 @@ out:
}
static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t len, int noblock, int flags,
- int *addr_len)
+ struct msghdr *msg, size_t len, int noblock,
+ int flags, int *addr_len)
{
size_t copied = 0;
int err = -EOPNOTSUPP;
@@ -398,6 +399,7 @@ int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb)
dgram_sk(sk))) {
if (prev) {
struct sk_buff *clone;
+
clone = skb_clone(skb, GFP_ATOMIC);
if (clone)
dgram_rcv_skb(prev, clone);
@@ -407,9 +409,9 @@ int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb)
}
}
- if (prev)
+ if (prev) {
dgram_rcv_skb(prev, skb);
- else {
+ } else {
kfree_skb(skb);
ret = NET_RX_DROP;
}
@@ -419,7 +421,7 @@ int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb)
}
static int dgram_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
+ char __user *optval, int __user *optlen)
{
struct dgram_sock *ro = dgram_sk(sk);
@@ -463,7 +465,7 @@ static int dgram_getsockopt(struct sock *sk, int level, int optname,
}
static int dgram_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+ char __user *optval, unsigned int optlen)
{
struct dgram_sock *ro = dgram_sk(sk);
struct net *net = sock_net(sk);
diff --git a/net/ieee802154/ieee802154.h b/net/ieee802154/ieee802154.h
index 8b83a231299e..5d352f86979e 100644
--- a/net/ieee802154/ieee802154.h
+++ b/net/ieee802154/ieee802154.h
@@ -43,7 +43,7 @@ struct genl_info;
struct sk_buff *ieee802154_nl_create(int flags, u8 req);
int ieee802154_nl_mcast(struct sk_buff *msg, unsigned int group);
struct sk_buff *ieee802154_nl_new_reply(struct genl_info *info,
- int flags, u8 req);
+ int flags, u8 req);
int ieee802154_nl_reply(struct sk_buff *msg, struct genl_info *info);
extern struct genl_family nl802154_family;
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index 26efcf4fd2ff..9222966f5e6d 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -52,7 +52,7 @@ struct sk_buff *ieee802154_nl_create(int flags, u8 req)
spin_lock_irqsave(&ieee802154_seq_lock, f);
hdr = genlmsg_put(msg, 0, ieee802154_seq_num++,
- &nl802154_family, flags, req);
+ &nl802154_family, flags, req);
spin_unlock_irqrestore(&ieee802154_seq_lock, f);
if (!hdr) {
nlmsg_free(msg);
@@ -86,7 +86,7 @@ struct sk_buff *ieee802154_nl_new_reply(struct genl_info *info,
return NULL;
hdr = genlmsg_put_reply(msg, info,
- &nl802154_family, flags, req);
+ &nl802154_family, flags, req);
if (!hdr) {
nlmsg_free(msg);
return NULL;
diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index a3281b8bfd5b..c6bfe22bfa5e 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -60,7 +60,8 @@ static __le16 nla_get_shortaddr(const struct nlattr *nla)
}
int ieee802154_nl_assoc_indic(struct net_device *dev,
- struct ieee802154_addr *addr, u8 cap)
+ struct ieee802154_addr *addr,
+ u8 cap)
{
struct sk_buff *msg;
@@ -93,7 +94,7 @@ nla_put_failure:
EXPORT_SYMBOL(ieee802154_nl_assoc_indic);
int ieee802154_nl_assoc_confirm(struct net_device *dev, __le16 short_addr,
- u8 status)
+ u8 status)
{
struct sk_buff *msg;
@@ -119,7 +120,8 @@ nla_put_failure:
EXPORT_SYMBOL(ieee802154_nl_assoc_confirm);
int ieee802154_nl_disassoc_indic(struct net_device *dev,
- struct ieee802154_addr *addr, u8 reason)
+ struct ieee802154_addr *addr,
+ u8 reason)
{
struct sk_buff *msg;
@@ -205,8 +207,9 @@ nla_put_failure:
EXPORT_SYMBOL(ieee802154_nl_beacon_indic);
int ieee802154_nl_scan_confirm(struct net_device *dev,
- u8 status, u8 scan_type, u32 unscanned, u8 page,
- u8 *edl/* , struct list_head *pan_desc_list */)
+ u8 status, u8 scan_type,
+ u32 unscanned, u8 page,
+ u8 *edl/* , struct list_head *pan_desc_list */)
{
struct sk_buff *msg;
@@ -260,7 +263,7 @@ nla_put_failure:
EXPORT_SYMBOL(ieee802154_nl_start_confirm);
static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid,
- u32 seq, int flags, struct net_device *dev)
+ u32 seq, int flags, struct net_device *dev)
{
void *hdr;
struct wpan_phy *phy;
@@ -270,7 +273,7 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid,
pr_debug("%s\n", __func__);
hdr = genlmsg_put(msg, 0, seq, &nl802154_family, flags,
- IEEE802154_LIST_IFACE);
+ IEEE802154_LIST_IFACE);
if (!hdr)
goto out;
@@ -330,14 +333,16 @@ static struct net_device *ieee802154_nl_get_dev(struct genl_info *info)
if (info->attrs[IEEE802154_ATTR_DEV_NAME]) {
char name[IFNAMSIZ + 1];
+
nla_strlcpy(name, info->attrs[IEEE802154_ATTR_DEV_NAME],
- sizeof(name));
+ sizeof(name));
dev = dev_get_by_name(&init_net, name);
- } else if (info->attrs[IEEE802154_ATTR_DEV_INDEX])
+ } else if (info->attrs[IEEE802154_ATTR_DEV_INDEX]) {
dev = dev_get_by_index(&init_net,
nla_get_u32(info->attrs[IEEE802154_ATTR_DEV_INDEX]));
- else
+ } else {
return NULL;
+ }
if (!dev)
return NULL;
@@ -435,7 +440,7 @@ int ieee802154_disassociate_req(struct sk_buff *skb, struct genl_info *info)
int ret = -EOPNOTSUPP;
if ((!info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] &&
- !info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]) ||
+ !info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]) ||
!info->attrs[IEEE802154_ATTR_REASON])
return -EINVAL;
@@ -464,8 +469,7 @@ out:
return ret;
}
-/*
- * PANid, channel, beacon_order = 15, superframe_order = 15,
+/* PANid, channel, beacon_order = 15, superframe_order = 15,
* PAN_coordinator, battery_life_extension = 0,
* coord_realignment = 0, security_enable = 0
*/
@@ -559,8 +563,8 @@ int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info)
page = 0;
- ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels, page,
- duration);
+ ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels,
+ page, duration);
out:
dev_put(dev);
@@ -570,7 +574,8 @@ out:
int ieee802154_list_iface(struct sk_buff *skb, struct genl_info *info)
{
/* Request for interface name, index, type, IEEE address,
- PAN Id, short address */
+ * PAN Id, short address
+ */
struct sk_buff *msg;
struct net_device *dev = NULL;
int rc = -ENOBUFS;
@@ -586,7 +591,7 @@ int ieee802154_list_iface(struct sk_buff *skb, struct genl_info *info)
goto out_dev;
rc = ieee802154_nl_fill_iface(msg, info->snd_portid, info->snd_seq,
- 0, dev);
+ 0, dev);
if (rc < 0)
goto out_free;
@@ -598,7 +603,6 @@ out_free:
out_dev:
dev_put(dev);
return rc;
-
}
int ieee802154_dump_iface(struct sk_buff *skb, struct netlink_callback *cb)
@@ -616,7 +620,8 @@ int ieee802154_dump_iface(struct sk_buff *skb, struct netlink_callback *cb)
goto cont;
if (ieee802154_nl_fill_iface(skb, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI, dev) < 0)
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI, dev) < 0)
break;
cont:
idx++;
@@ -765,6 +770,7 @@ ieee802154_llsec_parse_key_id(struct genl_info *info,
case IEEE802154_SCF_KEY_SHORT_INDEX:
{
u32 source = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT]);
+
desc->short_source = cpu_to_le32(source);
break;
}
@@ -842,7 +848,7 @@ int ieee802154_llsec_getparams(struct sk_buff *skb, struct genl_info *info)
goto out_dev;
hdr = genlmsg_put(msg, 0, info->snd_seq, &nl802154_family, 0,
- IEEE802154_LLSEC_GETPARAMS);
+ IEEE802154_LLSEC_GETPARAMS);
if (!hdr)
goto out_free;
@@ -946,7 +952,7 @@ struct llsec_dump_data {
static int
ieee802154_llsec_dump_table(struct sk_buff *skb, struct netlink_callback *cb,
- int (*step)(struct llsec_dump_data*))
+ int (*step)(struct llsec_dump_data *))
{
struct net *net = sock_net(skb->sk);
struct net_device *dev;
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index 89b265aea151..972baf83411a 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -36,7 +36,7 @@
#include "ieee802154.h"
static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid,
- u32 seq, int flags, struct wpan_phy *phy)
+ u32 seq, int flags, struct wpan_phy *phy)
{
void *hdr;
int i, pages = 0;
@@ -48,7 +48,7 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid,
return -EMSGSIZE;
hdr = genlmsg_put(msg, 0, seq, &nl802154_family, flags,
- IEEE802154_LIST_PHY);
+ IEEE802154_LIST_PHY);
if (!hdr)
goto out;
@@ -80,7 +80,8 @@ out:
int ieee802154_list_phy(struct sk_buff *skb, struct genl_info *info)
{
/* Request for interface name, index, type, IEEE address,
- PAN Id, short address */
+ * PAN Id, short address
+ */
struct sk_buff *msg;
struct wpan_phy *phy;
const char *name;
@@ -105,7 +106,7 @@ int ieee802154_list_phy(struct sk_buff *skb, struct genl_info *info)
goto out_dev;
rc = ieee802154_nl_fill_phy(msg, info->snd_portid, info->snd_seq,
- 0, phy);
+ 0, phy);
if (rc < 0)
goto out_free;
@@ -117,7 +118,6 @@ out_free:
out_dev:
wpan_phy_put(phy);
return rc;
-
}
struct dump_phy_data {
@@ -137,10 +137,10 @@ static int ieee802154_dump_phy_iter(struct wpan_phy *phy, void *_data)
return 0;
rc = ieee802154_nl_fill_phy(data->skb,
- NETLINK_CB(data->cb->skb).portid,
- data->cb->nlh->nlmsg_seq,
- NLM_F_MULTI,
- phy);
+ NETLINK_CB(data->cb->skb).portid,
+ data->cb->nlh->nlmsg_seq,
+ NLM_F_MULTI,
+ phy);
if (rc < 0) {
data->idx--;
@@ -238,10 +238,9 @@ int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info)
addr.sa_family = ARPHRD_IEEE802154;
nla_memcpy(&addr.sa_data, info->attrs[IEEE802154_ATTR_HW_ADDR],
- IEEE802154_ADDR_LEN);
+ IEEE802154_ADDR_LEN);
- /*
- * strangely enough, some callbacks (inetdev_event) from
+ /* strangely enough, some callbacks (inetdev_event) from
* dev_set_mac_address require RTNL_LOCK
*/
rtnl_lock();
diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c
index 74d54fae33d7..9d1f64806f02 100644
--- a/net/ieee802154/raw.c
+++ b/net/ieee802154/raw.c
@@ -96,7 +96,7 @@ out:
}
static int raw_connect(struct sock *sk, struct sockaddr *uaddr,
- int addr_len)
+ int addr_len)
{
return -ENOTSUPP;
}
@@ -106,8 +106,8 @@ static int raw_disconnect(struct sock *sk, int flags)
return 0;
}
-static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
- size_t size)
+static int raw_sendmsg(struct kiocb *iocb, struct sock *sk,
+ struct msghdr *msg, size_t size)
{
struct net_device *dev;
unsigned int mtu;
@@ -145,7 +145,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
hlen = LL_RESERVED_SPACE(dev);
tlen = dev->needed_tailroom;
skb = sock_alloc_send_skb(sk, hlen + tlen + size,
- msg->msg_flags & MSG_DONTWAIT, &err);
+ msg->msg_flags & MSG_DONTWAIT, &err);
if (!skb)
goto out_dev;
@@ -235,7 +235,6 @@ void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb)
bh_lock_sock(sk);
if (!sk->sk_bound_dev_if ||
sk->sk_bound_dev_if == dev->ifindex) {
-
struct sk_buff *clone;
clone = skb_clone(skb, GFP_ATOMIC);
@@ -248,13 +247,13 @@ void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb)
}
static int raw_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
+ char __user *optval, int __user *optlen)
{
return -EOPNOTSUPP;
}
static int raw_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+ char __user *optval, unsigned int optlen)
{
return -EOPNOTSUPP;
}
@@ -274,4 +273,3 @@ struct proto ieee802154_raw_prot = {
.getsockopt = raw_getsockopt,
.setsockopt = raw_setsockopt,
};
-
diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/reassembly.c
index 6f1428c4870b..7cfcd6885225 100644
--- a/net/ieee802154/reassembly.c
+++ b/net/ieee802154/reassembly.c
@@ -30,6 +30,8 @@
#include "reassembly.h"
+static const char lowpan_frags_cache_name[] = "lowpan-frags";
+
struct lowpan_frag_info {
__be16 d_tag;
u16 d_size;
@@ -50,29 +52,25 @@ static unsigned int lowpan_hash_frag(__be16 tag, u16 d_size,
const struct ieee802154_addr *saddr,
const struct ieee802154_addr *daddr)
{
- u32 c;
-
net_get_random_once(&lowpan_frags.rnd, sizeof(lowpan_frags.rnd));
- c = jhash_3words(ieee802154_addr_hash(saddr),
- ieee802154_addr_hash(daddr),
- (__force u32)(tag + (d_size << 16)),
- lowpan_frags.rnd);
-
- return c & (INETFRAGS_HASHSZ - 1);
+ return jhash_3words(ieee802154_addr_hash(saddr),
+ ieee802154_addr_hash(daddr),
+ (__force u32)(tag + (d_size << 16)),
+ lowpan_frags.rnd);
}
-static unsigned int lowpan_hashfn(struct inet_frag_queue *q)
+static unsigned int lowpan_hashfn(const struct inet_frag_queue *q)
{
- struct lowpan_frag_queue *fq;
+ const struct lowpan_frag_queue *fq;
fq = container_of(q, struct lowpan_frag_queue, q);
return lowpan_hash_frag(fq->tag, fq->d_size, &fq->saddr, &fq->daddr);
}
-static bool lowpan_frag_match(struct inet_frag_queue *q, void *a)
+static bool lowpan_frag_match(const struct inet_frag_queue *q, const void *a)
{
- struct lowpan_frag_queue *fq;
- struct lowpan_create_arg *arg = a;
+ const struct lowpan_frag_queue *fq;
+ const struct lowpan_create_arg *arg = a;
fq = container_of(q, struct lowpan_frag_queue, q);
return fq->tag == arg->tag && fq->d_size == arg->d_size &&
@@ -80,10 +78,10 @@ static bool lowpan_frag_match(struct inet_frag_queue *q, void *a)
ieee802154_addr_equal(&fq->daddr, arg->dst);
}
-static void lowpan_frag_init(struct inet_frag_queue *q, void *a)
+static void lowpan_frag_init(struct inet_frag_queue *q, const void *a)
{
+ const struct lowpan_create_arg *arg = a;
struct lowpan_frag_queue *fq;
- struct lowpan_create_arg *arg = a;
fq = container_of(q, struct lowpan_frag_queue, q);
@@ -103,7 +101,7 @@ static void lowpan_frag_expire(unsigned long data)
spin_lock(&fq->q.lock);
- if (fq->q.last_in & INET_FRAG_COMPLETE)
+ if (fq->q.flags & INET_FRAG_COMPLETE)
goto out;
inet_frag_kill(&fq->q, &lowpan_frags);
@@ -128,7 +126,6 @@ fq_find(struct net *net, const struct lowpan_frag_info *frag_info,
arg.src = src;
arg.dst = dst;
- read_lock(&lowpan_frags.lock);
hash = lowpan_hash_frag(frag_info->d_tag, frag_info->d_size, src, dst);
q = inet_frag_find(&ieee802154_lowpan->frags,
@@ -147,7 +144,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
struct net_device *dev;
int end, offset;
- if (fq->q.last_in & INET_FRAG_COMPLETE)
+ if (fq->q.flags & INET_FRAG_COMPLETE)
goto err;
offset = lowpan_cb(skb)->d_offset << 3;
@@ -159,14 +156,14 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
* or have different end, the segment is corrupted.
*/
if (end < fq->q.len ||
- ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len))
+ ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len))
goto err;
- fq->q.last_in |= INET_FRAG_LAST_IN;
+ fq->q.flags |= INET_FRAG_LAST_IN;
fq->q.len = end;
} else {
if (end > fq->q.len) {
/* Some bits beyond end -> corruption. */
- if (fq->q.last_in & INET_FRAG_LAST_IN)
+ if (fq->q.flags & INET_FRAG_LAST_IN)
goto err;
fq->q.len = end;
}
@@ -206,13 +203,13 @@ found:
if (frag_type == LOWPAN_DISPATCH_FRAG1) {
/* Calculate uncomp. 6lowpan header to estimate full size */
fq->q.meat += lowpan_uncompress_size(skb, NULL);
- fq->q.last_in |= INET_FRAG_FIRST_IN;
+ fq->q.flags |= INET_FRAG_FIRST_IN;
} else {
fq->q.meat += skb->len;
}
add_frag_mem_limit(&fq->q, skb->truesize);
- if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
+ if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
fq->q.meat == fq->q.len) {
int res;
unsigned long orefdst = skb->_skb_refdst;
@@ -223,7 +220,6 @@ found:
return res;
}
- inet_frag_lru_move(&fq->q);
return -1;
err:
kfree_skb(skb);
@@ -359,8 +355,6 @@ int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type)
struct net *net = dev_net(skb->dev);
struct lowpan_frag_info *frag_info = lowpan_cb(skb);
struct ieee802154_addr source, dest;
- struct netns_ieee802154_lowpan *ieee802154_lowpan =
- net_ieee802154_lowpan(net);
int err;
source = mac_cb(skb)->source;
@@ -370,14 +364,15 @@ int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type)
if (err < 0)
goto err;
- if (frag_info->d_size > ieee802154_lowpan->max_dsize)
+ if (frag_info->d_size > IPV6_MIN_MTU) {
+ net_warn_ratelimited("lowpan_frag_rcv: datagram size exceeds MTU\n");
goto err;
-
- inet_frag_evictor(&ieee802154_lowpan->frags, &lowpan_frags, false);
+ }
fq = fq_find(net, frag_info, &source, &dest);
if (fq != NULL) {
int ret;
+
spin_lock(&fq->q.lock);
ret = lowpan_frag_queue(fq, skb, frag_type);
spin_unlock(&fq->q.lock);
@@ -393,20 +388,25 @@ err:
EXPORT_SYMBOL(lowpan_frag_rcv);
#ifdef CONFIG_SYSCTL
+static int zero;
+
static struct ctl_table lowpan_frags_ns_ctl_table[] = {
{
.procname = "6lowpanfrag_high_thresh",
.data = &init_net.ieee802154_lowpan.frags.high_thresh,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &init_net.ieee802154_lowpan.frags.low_thresh
},
{
.procname = "6lowpanfrag_low_thresh",
.data = &init_net.ieee802154_lowpan.frags.low_thresh,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &init_net.ieee802154_lowpan.frags.high_thresh
},
{
.procname = "6lowpanfrag_time",
@@ -415,20 +415,15 @@ static struct ctl_table lowpan_frags_ns_ctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- {
- .procname = "6lowpanfrag_max_datagram_size",
- .data = &init_net.ieee802154_lowpan.max_dsize,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
{ }
};
+/* secret interval has been deprecated */
+static int lowpan_frags_secret_interval_unused;
static struct ctl_table lowpan_frags_ctl_table[] = {
{
.procname = "6lowpanfrag_secret_interval",
- .data = &lowpan_frags.secret_interval,
+ .data = &lowpan_frags_secret_interval_unused,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
@@ -451,9 +446,11 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
goto err_alloc;
table[0].data = &ieee802154_lowpan->frags.high_thresh;
+ table[0].extra1 = &ieee802154_lowpan->frags.low_thresh;
+ table[0].extra2 = &init_net.ieee802154_lowpan.frags.high_thresh;
table[1].data = &ieee802154_lowpan->frags.low_thresh;
+ table[1].extra2 = &ieee802154_lowpan->frags.high_thresh;
table[2].data = &ieee802154_lowpan->frags.timeout;
- table[3].data = &ieee802154_lowpan->max_dsize;
/* Don't export sysctls to unprivileged users */
if (net->user_ns != &init_user_ns)
@@ -488,7 +485,7 @@ static void __net_exit lowpan_frags_ns_sysctl_unregister(struct net *net)
static struct ctl_table_header *lowpan_ctl_header;
-static int lowpan_frags_sysctl_register(void)
+static int __init lowpan_frags_sysctl_register(void)
{
lowpan_ctl_header = register_net_sysctl(&init_net,
"net/ieee802154/6lowpan",
@@ -510,7 +507,7 @@ static inline void lowpan_frags_ns_sysctl_unregister(struct net *net)
{
}
-static inline int lowpan_frags_sysctl_register(void)
+static inline int __init lowpan_frags_sysctl_register(void)
{
return 0;
}
@@ -528,7 +525,6 @@ static int __net_init lowpan_frags_init_net(struct net *net)
ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
- ieee802154_lowpan->max_dsize = 0xFFFF;
inet_frags_init_net(&ieee802154_lowpan->frags);
@@ -568,8 +564,10 @@ int __init lowpan_net_frag_init(void)
lowpan_frags.qsize = sizeof(struct frag_queue);
lowpan_frags.match = lowpan_frag_match;
lowpan_frags.frag_expire = lowpan_frag_expire;
- lowpan_frags.secret_interval = 10 * 60 * HZ;
- inet_frags_init(&lowpan_frags);
+ lowpan_frags.frags_cache_name = lowpan_frags_cache_name;
+ ret = inet_frags_init(&lowpan_frags);
+ if (ret)
+ goto err_pernet;
return ret;
err_pernet:
diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c
index 8d6f6704da84..4955e0fe5883 100644
--- a/net/ieee802154/wpan-class.c
+++ b/net/ieee802154/wpan-class.c
@@ -48,7 +48,8 @@ MASTER_SHOW(transmit_power, "%d +- 1 dB");
MASTER_SHOW(cca_mode, "%d");
static ssize_t channels_supported_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr,
+ char *buf)
{
struct wpan_phy *phy = container_of(dev, struct wpan_phy, dev);
int ret;
@@ -57,7 +58,7 @@ static ssize_t channels_supported_show(struct device *dev,
mutex_lock(&phy->pib_lock);
for (i = 0; i < 32; i++) {
ret = snprintf(buf + len, PAGE_SIZE - len,
- "%#09x\n", phy->channels_supported[i]);
+ "%#09x\n", phy->channels_supported[i]);
if (ret < 0)
break;
len += ret;
@@ -80,6 +81,7 @@ ATTRIBUTE_GROUPS(pmib);
static void wpan_phy_release(struct device *d)
{
struct wpan_phy *phy = container_of(d, struct wpan_phy, dev);
+
kfree(phy);
}
@@ -121,11 +123,12 @@ static int wpan_phy_iter(struct device *dev, void *_data)
{
struct wpan_phy_iter_data *wpid = _data;
struct wpan_phy *phy = container_of(dev, struct wpan_phy, dev);
+
return wpid->fn(phy, wpid->data);
}
int wpan_phy_for_each(int (*fn)(struct wpan_phy *phy, void *data),
- void *data)
+ void *data)
{
struct wpan_phy_iter_data wpid = {
.fn = fn,
@@ -197,6 +200,7 @@ EXPORT_SYMBOL(wpan_phy_free);
static int __init wpan_phy_class_init(void)
{
int rc;
+
rc = class_register(&wpan_phy_class);
if (rc)
goto err;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 05c57f0fcabe..e682b48e0709 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -307,6 +307,35 @@ config NET_IPVTI
the notion of a secure tunnel for IPSEC and then use routing protocol
on top.
+config NET_UDP_TUNNEL
+ tristate
+ select NET_IP_TUNNEL
+ default n
+
+config NET_FOU
+ tristate "IP: Foo (IP protocols) over UDP"
+ select XFRM
+ select NET_UDP_TUNNEL
+ ---help---
+ Foo over UDP allows any IP protocol to be directly encapsulated
+ over UDP include tunnels (IPIP, GRE, SIT). By encapsulating in UDP
+ network mechanisms and optimizations for UDP (such as ECMP
+ and RSS) can be leveraged to provide better service.
+
+config GENEVE
+ tristate "Generic Network Virtualization Encapsulation (Geneve)"
+ depends on INET
+ select NET_UDP_TUNNEL
+ ---help---
+ This allows one to create Geneve virtual interfaces that provide
+ Layer 2 Networks over Layer 3 Networks. Geneve is often used
+ to tunnel virtual network infrastructure in virtualized environments.
+ For more information see:
+ http://tools.ietf.org/html/draft-gross-geneve-01
+
+ To compile this driver as a module, choose M here: the module
+
+
config INET_AH
tristate "IP: AH transformation"
select XFRM_ALGO
@@ -556,6 +585,27 @@ config TCP_CONG_ILLINOIS
For further details see:
http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html
+config TCP_CONG_DCTCP
+ tristate "DataCenter TCP (DCTCP)"
+ default n
+ ---help---
+ DCTCP leverages Explicit Congestion Notification (ECN) in the network to
+ provide multi-bit feedback to the end hosts. It is designed to provide:
+
+ - High burst tolerance (incast due to partition/aggregate),
+ - Low latency (short flows, queries),
+ - High throughput (continuous data updates, large file transfers) with
+ commodity, shallow-buffered switches.
+
+ All switches in the data center network running DCTCP must support
+ ECN marking and be configured for marking when reaching defined switch
+ buffer thresholds. The default ECN marking threshold heuristic for
+ DCTCP on switches is 20 packets (30KB) at 1Gbps, and 65 packets
+ (~100KB) at 10Gbps, but might need further careful tweaking.
+
+ For further details see:
+ http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp-final.pdf
+
choice
prompt "Default TCP congestion control"
default DEFAULT_CUBIC
@@ -584,9 +634,11 @@ choice
config DEFAULT_WESTWOOD
bool "Westwood" if TCP_CONG_WESTWOOD=y
+ config DEFAULT_DCTCP
+ bool "DCTCP" if TCP_CONG_DCTCP=y
+
config DEFAULT_RENO
bool "Reno"
-
endchoice
endif
@@ -606,6 +658,7 @@ config DEFAULT_TCP_CONG
default "westwood" if DEFAULT_WESTWOOD
default "veno" if DEFAULT_VENO
default "reno" if DEFAULT_RENO
+ default "dctcp" if DEFAULT_DCTCP
default "cubic"
config TCP_MD5SIG
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index f032688d20d3..518c04ed666e 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -20,8 +20,10 @@ obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
obj-$(CONFIG_IP_MROUTE) += ipmr.o
obj-$(CONFIG_NET_IPIP) += ipip.o
gre-y := gre_demux.o
+obj-$(CONFIG_NET_FOU) += fou.o
obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o
obj-$(CONFIG_NET_IPGRE) += ip_gre.o
+obj-$(CONFIG_NET_UDP_TUNNEL) += udp_tunnel.o
obj-$(CONFIG_NET_IPVTI) += ip_vti.o
obj-$(CONFIG_SYN_COOKIES) += syncookies.o
obj-$(CONFIG_INET_AH) += ah4.o
@@ -41,6 +43,7 @@ obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o
obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o
obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o
+obj-$(CONFIG_TCP_CONG_DCTCP) += tcp_dctcp.o
obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o
obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o
obj-$(CONFIG_TCP_CONG_HYBLA) += tcp_hybla.o
@@ -53,6 +56,7 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_GENEVE) += geneve.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o xfrm4_protocol.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d156b3c5f363..92db7a69f2b9 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -418,10 +418,6 @@ int inet_release(struct socket *sock)
}
EXPORT_SYMBOL(inet_release);
-/* It is off by default, see below. */
-int sysctl_ip_nonlocal_bind __read_mostly;
-EXPORT_SYMBOL(sysctl_ip_nonlocal_bind);
-
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
@@ -461,7 +457,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
* is temporarily down)
*/
err = -EADDRNOTAVAIL;
- if (!sysctl_ip_nonlocal_bind &&
+ if (!net->ipv4.sysctl_ip_nonlocal_bind &&
!(inet->freebind || inet->transparent) &&
addr->sin_addr.s_addr != htonl(INADDR_ANY) &&
chk_addr_ret != RTN_LOCAL &&
@@ -1201,40 +1197,6 @@ int inet_sk_rebuild_header(struct sock *sk)
}
EXPORT_SYMBOL(inet_sk_rebuild_header);
-static int inet_gso_send_check(struct sk_buff *skb)
-{
- const struct net_offload *ops;
- const struct iphdr *iph;
- int proto;
- int ihl;
- int err = -EINVAL;
-
- if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
- goto out;
-
- iph = ip_hdr(skb);
- ihl = iph->ihl * 4;
- if (ihl < sizeof(*iph))
- goto out;
-
- proto = iph->protocol;
-
- /* Warning: after this point, iph might be no longer valid */
- if (unlikely(!pskb_may_pull(skb, ihl)))
- goto out;
- __skb_pull(skb, ihl);
-
- skb_reset_transport_header(skb);
- err = -EPROTONOSUPPORT;
-
- ops = rcu_dereference(inet_offloads[proto]);
- if (likely(ops && ops->callbacks.gso_send_check))
- err = ops->callbacks.gso_send_check(skb);
-
-out:
- return err;
-}
-
static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
@@ -1407,6 +1369,9 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
* immediately following this IP hdr.
*/
+ /* Note : No need to call skb_gro_postpull_rcsum() here,
+ * as we already checked checksum over ipv4 header was 0
+ */
skb_gro_pull(skb, sizeof(*iph));
skb_set_transport_header(skb, skb_gro_offset(skb));
@@ -1659,7 +1624,6 @@ static int ipv4_proc_init(void);
static struct packet_offload ip_packet_offload __read_mostly = {
.type = cpu_to_be16(ETH_P_IP),
.callbacks = {
- .gso_send_check = inet_gso_send_check,
.gso_segment = inet_gso_segment,
.gro_receive = inet_gro_receive,
.gro_complete = inet_gro_complete,
@@ -1668,8 +1632,9 @@ static struct packet_offload ip_packet_offload __read_mostly = {
static const struct net_offload ipip_offload = {
.callbacks = {
- .gso_send_check = inet_gso_send_check,
.gso_segment = inet_gso_segment,
+ .gro_receive = inet_gro_receive,
+ .gro_complete = inet_gro_complete,
},
};
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index a2afa89513a0..ac9a32ec3ee4 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -505,8 +505,6 @@ static int ah_init_state(struct xfrm_state *x)
ahp->icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8;
ahp->icv_trunc_len = x->aalg->alg_trunc_len/8;
- BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN);
-
if (x->props.flags & XFRM_STATE_ALIGN4)
x->props.header_len = XFRM_ALIGN4(sizeof(struct ip_auth_hdr) +
ahp->icv_trunc_len);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 1a9b99e04465..16acb59d665e 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -953,10 +953,11 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
{
const struct arphdr *arp;
+ /* do not tweak dropwatch on an ARP we will ignore */
if (dev->flags & IFF_NOARP ||
skb->pkt_type == PACKET_OTHERHOST ||
skb->pkt_type == PACKET_LOOPBACK)
- goto freeskb;
+ goto consumeskb;
skb = skb_share_check(skb, GFP_ATOMIC);
if (!skb)
@@ -974,6 +975,9 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, skb, dev, NULL, arp_process);
+consumeskb:
+ consume_skb(skb);
+ return 0;
freeskb:
kfree_skb(skb);
out_of_mem:
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 05b708bbdb0d..4715f25dfe03 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -246,7 +246,7 @@ static u32 cipso_v4_map_cache_hash(const unsigned char *key, u32 key_len)
* success, negative values on error.
*
*/
-static int cipso_v4_cache_init(void)
+static int __init cipso_v4_cache_init(void)
{
u32 iter;
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index a3095fdefbed..90c0e8386116 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -76,6 +76,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_daddr = fl4->daddr;
inet->inet_dport = usin->sin_port;
sk->sk_state = TCP_ESTABLISHED;
+ inet_set_txhash(sk);
inet->inet_id = jiffies;
sk_dst_set(sk, &rt->dst);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e9449376b58e..214882e7d6de 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -180,11 +180,12 @@ static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
int destroy);
#ifdef CONFIG_SYSCTL
-static void devinet_sysctl_register(struct in_device *idev);
+static int devinet_sysctl_register(struct in_device *idev);
static void devinet_sysctl_unregister(struct in_device *idev);
#else
-static void devinet_sysctl_register(struct in_device *idev)
+static int devinet_sysctl_register(struct in_device *idev)
{
+ return 0;
}
static void devinet_sysctl_unregister(struct in_device *idev)
{
@@ -232,6 +233,7 @@ EXPORT_SYMBOL(in_dev_finish_destroy);
static struct in_device *inetdev_init(struct net_device *dev)
{
struct in_device *in_dev;
+ int err = -ENOMEM;
ASSERT_RTNL();
@@ -252,7 +254,13 @@ static struct in_device *inetdev_init(struct net_device *dev)
/* Account for reference dev->ip_ptr (below) */
in_dev_hold(in_dev);
- devinet_sysctl_register(in_dev);
+ err = devinet_sysctl_register(in_dev);
+ if (err) {
+ in_dev->dead = 1;
+ in_dev_put(in_dev);
+ in_dev = NULL;
+ goto out;
+ }
ip_mc_init_dev(in_dev);
if (dev->flags & IFF_UP)
ip_mc_up(in_dev);
@@ -260,7 +268,7 @@ static struct in_device *inetdev_init(struct net_device *dev)
/* we can receive as soon as ip_ptr is set -- do this last */
rcu_assign_pointer(dev->ip_ptr, in_dev);
out:
- return in_dev;
+ return in_dev ?: ERR_PTR(err);
out_kfree:
kfree(in_dev);
in_dev = NULL;
@@ -1347,8 +1355,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
if (!in_dev) {
if (event == NETDEV_REGISTER) {
in_dev = inetdev_init(dev);
- if (!in_dev)
- return notifier_from_errno(-ENOMEM);
+ if (IS_ERR(in_dev))
+ return notifier_from_errno(PTR_ERR(in_dev));
if (dev->flags & IFF_LOOPBACK) {
IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
@@ -2182,11 +2190,21 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
kfree(t);
}
-static void devinet_sysctl_register(struct in_device *idev)
+static int devinet_sysctl_register(struct in_device *idev)
{
- neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
- __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
+ int err;
+
+ if (!sysctl_dev_name_is_allowed(idev->dev->name))
+ return -EINVAL;
+
+ err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
+ if (err)
+ return err;
+ err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
&idev->cnf);
+ if (err)
+ neigh_sysctl_unregister(idev->arp_parms);
+ return err;
}
static void devinet_sysctl_unregister(struct in_device *idev)
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 255aa9946fe7..23104a3f2924 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -243,7 +243,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
u8 tos, int oif, struct net_device *dev,
int rpf, struct in_device *idev, u32 *itag)
{
- int ret, no_addr, accept_local;
+ int ret, no_addr;
struct fib_result res;
struct flowi4 fl4;
struct net *net;
@@ -258,16 +258,17 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
no_addr = idev->ifa_list == NULL;
- accept_local = IN_DEV_ACCEPT_LOCAL(idev);
fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0;
net = dev_net(dev);
if (fib_lookup(net, &fl4, &res))
goto last_resort;
- if (res.type != RTN_UNICAST) {
- if (res.type != RTN_LOCAL || !accept_local)
- goto e_inval;
- }
+ if (res.type != RTN_UNICAST &&
+ (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
+ goto e_inval;
+ if (!rpf && !fib_num_tclassid_users(dev_net(dev)) &&
+ (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev)))
+ goto last_resort;
fib_combine_itag(itag, &res);
dev_match = false;
@@ -321,6 +322,7 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev);
if (!r && !fib_num_tclassid_users(dev_net(dev)) &&
+ IN_DEV_ACCEPT_LOCAL(idev) &&
(dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) {
*itag = 0;
return 0;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index b10cd43a4722..5b6efb3d2308 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -157,9 +157,12 @@ static void rt_fibinfo_free(struct rtable __rcu **rtp)
static void free_nh_exceptions(struct fib_nh *nh)
{
- struct fnhe_hash_bucket *hash = nh->nh_exceptions;
+ struct fnhe_hash_bucket *hash;
int i;
+ hash = rcu_dereference_protected(nh->nh_exceptions, 1);
+ if (!hash)
+ return;
for (i = 0; i < FNHE_HASH_SIZE; i++) {
struct fib_nh_exception *fnhe;
@@ -205,8 +208,7 @@ static void free_fib_info_rcu(struct rcu_head *head)
change_nexthops(fi) {
if (nexthop_nh->nh_dev)
dev_put(nexthop_nh->nh_dev);
- if (nexthop_nh->nh_exceptions)
- free_nh_exceptions(nexthop_nh);
+ free_nh_exceptions(nexthop_nh);
rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output);
rt_fibinfo_free(&nexthop_nh->nh_rth_input);
} endfor_nexthops(fi);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 5afeb5aa4c7c..e9cb2588e416 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -940,7 +940,7 @@ static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
last = li;
}
if (last)
- hlist_add_after_rcu(&last->hlist, &new->hlist);
+ hlist_add_behind_rcu(&new->hlist, &last->hlist);
else
hlist_add_before_rcu(&new->hlist, &li->hlist);
}
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
new file mode 100644
index 000000000000..efa70ad44906
--- /dev/null
+++ b/net/ipv4/fou.c
@@ -0,0 +1,514 @@
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <net/genetlink.h>
+#include <net/gue.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/udp.h>
+#include <net/udp_tunnel.h>
+#include <net/xfrm.h>
+#include <uapi/linux/fou.h>
+#include <uapi/linux/genetlink.h>
+
+static DEFINE_SPINLOCK(fou_lock);
+static LIST_HEAD(fou_list);
+
+struct fou {
+ struct socket *sock;
+ u8 protocol;
+ u16 port;
+ struct udp_offload udp_offloads;
+ struct list_head list;
+};
+
+struct fou_cfg {
+ u16 type;
+ u8 protocol;
+ struct udp_port_cfg udp_config;
+};
+
+static inline struct fou *fou_from_sock(struct sock *sk)
+{
+ return sk->sk_user_data;
+}
+
+static int fou_udp_encap_recv_deliver(struct sk_buff *skb,
+ u8 protocol, size_t len)
+{
+ struct iphdr *iph = ip_hdr(skb);
+
+ /* Remove 'len' bytes from the packet (UDP header and
+ * FOU header if present), modify the protocol to the one
+ * we found, and then call rcv_encap.
+ */
+ iph->tot_len = htons(ntohs(iph->tot_len) - len);
+ __skb_pull(skb, len);
+ skb_postpull_rcsum(skb, udp_hdr(skb), len);
+ skb_reset_transport_header(skb);
+
+ return -protocol;
+}
+
+static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
+{
+ struct fou *fou = fou_from_sock(sk);
+
+ if (!fou)
+ return 1;
+
+ return fou_udp_encap_recv_deliver(skb, fou->protocol,
+ sizeof(struct udphdr));
+}
+
+static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
+{
+ struct fou *fou = fou_from_sock(sk);
+ size_t len;
+ struct guehdr *guehdr;
+ struct udphdr *uh;
+
+ if (!fou)
+ return 1;
+
+ len = sizeof(struct udphdr) + sizeof(struct guehdr);
+ if (!pskb_may_pull(skb, len))
+ goto drop;
+
+ uh = udp_hdr(skb);
+ guehdr = (struct guehdr *)&uh[1];
+
+ len += guehdr->hlen << 2;
+ if (!pskb_may_pull(skb, len))
+ goto drop;
+
+ if (guehdr->version != 0)
+ goto drop;
+
+ if (guehdr->flags) {
+ /* No support yet */
+ goto drop;
+ }
+
+ return fou_udp_encap_recv_deliver(skb, guehdr->next_hdr, len);
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
+static struct sk_buff **fou_gro_receive(struct sk_buff **head,
+ struct sk_buff *skb)
+{
+ const struct net_offload *ops;
+ struct sk_buff **pp = NULL;
+ u8 proto = NAPI_GRO_CB(skb)->proto;
+ const struct net_offload **offloads;
+
+ rcu_read_lock();
+ offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
+ ops = rcu_dereference(offloads[proto]);
+ if (!ops || !ops->callbacks.gro_receive)
+ goto out_unlock;
+
+ pp = ops->callbacks.gro_receive(head, skb);
+
+out_unlock:
+ rcu_read_unlock();
+
+ return pp;
+}
+
+static int fou_gro_complete(struct sk_buff *skb, int nhoff)
+{
+ const struct net_offload *ops;
+ u8 proto = NAPI_GRO_CB(skb)->proto;
+ int err = -ENOSYS;
+ const struct net_offload **offloads;
+
+ rcu_read_lock();
+ offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
+ ops = rcu_dereference(offloads[proto]);
+ if (WARN_ON(!ops || !ops->callbacks.gro_complete))
+ goto out_unlock;
+
+ err = ops->callbacks.gro_complete(skb, nhoff);
+
+out_unlock:
+ rcu_read_unlock();
+
+ return err;
+}
+
+static struct sk_buff **gue_gro_receive(struct sk_buff **head,
+ struct sk_buff *skb)
+{
+ const struct net_offload **offloads;
+ const struct net_offload *ops;
+ struct sk_buff **pp = NULL;
+ struct sk_buff *p;
+ u8 proto;
+ struct guehdr *guehdr;
+ unsigned int hlen, guehlen;
+ unsigned int off;
+ int flush = 1;
+
+ off = skb_gro_offset(skb);
+ hlen = off + sizeof(*guehdr);
+ guehdr = skb_gro_header_fast(skb, off);
+ if (skb_gro_header_hard(skb, hlen)) {
+ guehdr = skb_gro_header_slow(skb, hlen, off);
+ if (unlikely(!guehdr))
+ goto out;
+ }
+
+ proto = guehdr->next_hdr;
+
+ rcu_read_lock();
+ offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
+ ops = rcu_dereference(offloads[proto]);
+ if (WARN_ON(!ops || !ops->callbacks.gro_receive))
+ goto out_unlock;
+
+ guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
+
+ hlen = off + guehlen;
+ if (skb_gro_header_hard(skb, hlen)) {
+ guehdr = skb_gro_header_slow(skb, hlen, off);
+ if (unlikely(!guehdr))
+ goto out_unlock;
+ }
+
+ flush = 0;
+
+ for (p = *head; p; p = p->next) {
+ const struct guehdr *guehdr2;
+
+ if (!NAPI_GRO_CB(p)->same_flow)
+ continue;
+
+ guehdr2 = (struct guehdr *)(p->data + off);
+
+ /* Compare base GUE header to be equal (covers
+ * hlen, version, next_hdr, and flags.
+ */
+ if (guehdr->word != guehdr2->word) {
+ NAPI_GRO_CB(p)->same_flow = 0;
+ continue;
+ }
+
+ /* Compare optional fields are the same. */
+ if (guehdr->hlen && memcmp(&guehdr[1], &guehdr2[1],
+ guehdr->hlen << 2)) {
+ NAPI_GRO_CB(p)->same_flow = 0;
+ continue;
+ }
+ }
+
+ skb_gro_pull(skb, guehlen);
+
+ /* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/
+ skb_gro_postpull_rcsum(skb, guehdr, guehlen);
+
+ pp = ops->callbacks.gro_receive(head, skb);
+
+out_unlock:
+ rcu_read_unlock();
+out:
+ NAPI_GRO_CB(skb)->flush |= flush;
+
+ return pp;
+}
+
+static int gue_gro_complete(struct sk_buff *skb, int nhoff)
+{
+ const struct net_offload **offloads;
+ struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff);
+ const struct net_offload *ops;
+ unsigned int guehlen;
+ u8 proto;
+ int err = -ENOENT;
+
+ proto = guehdr->next_hdr;
+
+ guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
+
+ rcu_read_lock();
+ offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
+ ops = rcu_dereference(offloads[proto]);
+ if (WARN_ON(!ops || !ops->callbacks.gro_complete))
+ goto out_unlock;
+
+ err = ops->callbacks.gro_complete(skb, nhoff + guehlen);
+
+out_unlock:
+ rcu_read_unlock();
+ return err;
+}
+
+static int fou_add_to_port_list(struct fou *fou)
+{
+ struct fou *fout;
+
+ spin_lock(&fou_lock);
+ list_for_each_entry(fout, &fou_list, list) {
+ if (fou->port == fout->port) {
+ spin_unlock(&fou_lock);
+ return -EALREADY;
+ }
+ }
+
+ list_add(&fou->list, &fou_list);
+ spin_unlock(&fou_lock);
+
+ return 0;
+}
+
+static void fou_release(struct fou *fou)
+{
+ struct socket *sock = fou->sock;
+ struct sock *sk = sock->sk;
+
+ udp_del_offload(&fou->udp_offloads);
+
+ list_del(&fou->list);
+
+ /* Remove hooks into tunnel socket */
+ sk->sk_user_data = NULL;
+
+ sock_release(sock);
+
+ kfree(fou);
+}
+
+static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg)
+{
+ udp_sk(sk)->encap_rcv = fou_udp_recv;
+ fou->protocol = cfg->protocol;
+ fou->udp_offloads.callbacks.gro_receive = fou_gro_receive;
+ fou->udp_offloads.callbacks.gro_complete = fou_gro_complete;
+ fou->udp_offloads.port = cfg->udp_config.local_udp_port;
+ fou->udp_offloads.ipproto = cfg->protocol;
+
+ return 0;
+}
+
+static int gue_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg)
+{
+ udp_sk(sk)->encap_rcv = gue_udp_recv;
+ fou->udp_offloads.callbacks.gro_receive = gue_gro_receive;
+ fou->udp_offloads.callbacks.gro_complete = gue_gro_complete;
+ fou->udp_offloads.port = cfg->udp_config.local_udp_port;
+
+ return 0;
+}
+
+static int fou_create(struct net *net, struct fou_cfg *cfg,
+ struct socket **sockp)
+{
+ struct fou *fou = NULL;
+ int err;
+ struct socket *sock = NULL;
+ struct sock *sk;
+
+ /* Open UDP socket */
+ err = udp_sock_create(net, &cfg->udp_config, &sock);
+ if (err < 0)
+ goto error;
+
+ /* Allocate FOU port structure */
+ fou = kzalloc(sizeof(*fou), GFP_KERNEL);
+ if (!fou) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ sk = sock->sk;
+
+ fou->port = cfg->udp_config.local_udp_port;
+
+ /* Initial for fou type */
+ switch (cfg->type) {
+ case FOU_ENCAP_DIRECT:
+ err = fou_encap_init(sk, fou, cfg);
+ if (err)
+ goto error;
+ break;
+ case FOU_ENCAP_GUE:
+ err = gue_encap_init(sk, fou, cfg);
+ if (err)
+ goto error;
+ break;
+ default:
+ err = -EINVAL;
+ goto error;
+ }
+
+ udp_sk(sk)->encap_type = 1;
+ udp_encap_enable();
+
+ sk->sk_user_data = fou;
+ fou->sock = sock;
+
+ udp_set_convert_csum(sk, true);
+
+ sk->sk_allocation = GFP_ATOMIC;
+
+ if (cfg->udp_config.family == AF_INET) {
+ err = udp_add_offload(&fou->udp_offloads);
+ if (err)
+ goto error;
+ }
+
+ err = fou_add_to_port_list(fou);
+ if (err)
+ goto error;
+
+ if (sockp)
+ *sockp = sock;
+
+ return 0;
+
+error:
+ kfree(fou);
+ if (sock)
+ sock_release(sock);
+
+ return err;
+}
+
+static int fou_destroy(struct net *net, struct fou_cfg *cfg)
+{
+ struct fou *fou;
+ u16 port = cfg->udp_config.local_udp_port;
+ int err = -EINVAL;
+
+ spin_lock(&fou_lock);
+ list_for_each_entry(fou, &fou_list, list) {
+ if (fou->port == port) {
+ udp_del_offload(&fou->udp_offloads);
+ fou_release(fou);
+ err = 0;
+ break;
+ }
+ }
+ spin_unlock(&fou_lock);
+
+ return err;
+}
+
+static struct genl_family fou_nl_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = 0,
+ .name = FOU_GENL_NAME,
+ .version = FOU_GENL_VERSION,
+ .maxattr = FOU_ATTR_MAX,
+ .netnsok = true,
+};
+
+static struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
+ [FOU_ATTR_PORT] = { .type = NLA_U16, },
+ [FOU_ATTR_AF] = { .type = NLA_U8, },
+ [FOU_ATTR_IPPROTO] = { .type = NLA_U8, },
+ [FOU_ATTR_TYPE] = { .type = NLA_U8, },
+};
+
+static int parse_nl_config(struct genl_info *info,
+ struct fou_cfg *cfg)
+{
+ memset(cfg, 0, sizeof(*cfg));
+
+ cfg->udp_config.family = AF_INET;
+
+ if (info->attrs[FOU_ATTR_AF]) {
+ u8 family = nla_get_u8(info->attrs[FOU_ATTR_AF]);
+
+ if (family != AF_INET && family != AF_INET6)
+ return -EINVAL;
+
+ cfg->udp_config.family = family;
+ }
+
+ if (info->attrs[FOU_ATTR_PORT]) {
+ u16 port = nla_get_u16(info->attrs[FOU_ATTR_PORT]);
+
+ cfg->udp_config.local_udp_port = port;
+ }
+
+ if (info->attrs[FOU_ATTR_IPPROTO])
+ cfg->protocol = nla_get_u8(info->attrs[FOU_ATTR_IPPROTO]);
+
+ if (info->attrs[FOU_ATTR_TYPE])
+ cfg->type = nla_get_u8(info->attrs[FOU_ATTR_TYPE]);
+
+ return 0;
+}
+
+static int fou_nl_cmd_add_port(struct sk_buff *skb, struct genl_info *info)
+{
+ struct fou_cfg cfg;
+ int err;
+
+ err = parse_nl_config(info, &cfg);
+ if (err)
+ return err;
+
+ return fou_create(&init_net, &cfg, NULL);
+}
+
+static int fou_nl_cmd_rm_port(struct sk_buff *skb, struct genl_info *info)
+{
+ struct fou_cfg cfg;
+
+ parse_nl_config(info, &cfg);
+
+ return fou_destroy(&init_net, &cfg);
+}
+
+static const struct genl_ops fou_nl_ops[] = {
+ {
+ .cmd = FOU_CMD_ADD,
+ .doit = fou_nl_cmd_add_port,
+ .policy = fou_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = FOU_CMD_DEL,
+ .doit = fou_nl_cmd_rm_port,
+ .policy = fou_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+};
+
+static int __init fou_init(void)
+{
+ int ret;
+
+ ret = genl_register_family_with_ops(&fou_nl_family,
+ fou_nl_ops);
+
+ return ret;
+}
+
+static void __exit fou_fini(void)
+{
+ struct fou *fou, *next;
+
+ genl_unregister_family(&fou_nl_family);
+
+ /* Close all the FOU sockets */
+
+ spin_lock(&fou_lock);
+ list_for_each_entry_safe(fou, next, &fou_list, list)
+ fou_release(fou);
+ spin_unlock(&fou_lock);
+}
+
+module_init(fou_init);
+module_exit(fou_fini);
+MODULE_AUTHOR("Tom Herbert <therbert@google.com>");
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
new file mode 100644
index 000000000000..065cd94c640c
--- /dev/null
+++ b/net/ipv4/geneve.c
@@ -0,0 +1,373 @@
+/*
+ * Geneve: Generic Network Virtualization Encapsulation
+ *
+ * Copyright (c) 2014 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+#include <linux/rculist.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/igmp.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/hash.h>
+#include <linux/ethtool.h>
+#include <net/arp.h>
+#include <net/ndisc.h>
+#include <net/ip.h>
+#include <net/ip_tunnels.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/rtnetlink.h>
+#include <net/route.h>
+#include <net/dsfield.h>
+#include <net/inet_ecn.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/geneve.h>
+#include <net/protocol.h>
+#include <net/udp_tunnel.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#include <net/ip6_tunnel.h>
+#include <net/ip6_checksum.h>
+#endif
+
+#define PORT_HASH_BITS 8
+#define PORT_HASH_SIZE (1<<PORT_HASH_BITS)
+
+/* per-network namespace private data for this module */
+struct geneve_net {
+ struct hlist_head sock_list[PORT_HASH_SIZE];
+ spinlock_t sock_lock; /* Protects sock_list */
+};
+
+static int geneve_net_id;
+
+static struct workqueue_struct *geneve_wq;
+
+static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
+{
+ return (struct genevehdr *)(udp_hdr(skb) + 1);
+}
+
+static struct hlist_head *gs_head(struct net *net, __be16 port)
+{
+ struct geneve_net *gn = net_generic(net, geneve_net_id);
+
+ return &gn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
+}
+
+/* Find geneve socket based on network namespace and UDP port */
+static struct geneve_sock *geneve_find_sock(struct net *net, __be16 port)
+{
+ struct geneve_sock *gs;
+
+ hlist_for_each_entry_rcu(gs, gs_head(net, port), hlist) {
+ if (inet_sk(gs->sock->sk)->inet_sport == port)
+ return gs;
+ }
+
+ return NULL;
+}
+
+static void geneve_build_header(struct genevehdr *geneveh,
+ __be16 tun_flags, u8 vni[3],
+ u8 options_len, u8 *options)
+{
+ geneveh->ver = GENEVE_VER;
+ geneveh->opt_len = options_len / 4;
+ geneveh->oam = !!(tun_flags & TUNNEL_OAM);
+ geneveh->critical = !!(tun_flags & TUNNEL_CRIT_OPT);
+ geneveh->rsvd1 = 0;
+ memcpy(geneveh->vni, vni, 3);
+ geneveh->proto_type = htons(ETH_P_TEB);
+ geneveh->rsvd2 = 0;
+
+ memcpy(geneveh->options, options, options_len);
+}
+
+/* Transmit a fully formated Geneve frame.
+ *
+ * When calling this function. The skb->data should point
+ * to the geneve header which is fully formed.
+ *
+ * This function will add other UDP tunnel headers.
+ */
+int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
+ struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos,
+ __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port,
+ __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt,
+ bool xnet)
+{
+ struct genevehdr *gnvh;
+ int min_headroom;
+ int err;
+
+ skb = udp_tunnel_handle_offloads(skb, !gs->sock->sk->sk_no_check_tx);
+
+ min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
+ + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr)
+ + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+
+ err = skb_cow_head(skb, min_headroom);
+ if (unlikely(err))
+ return err;
+
+ if (vlan_tx_tag_present(skb)) {
+ if (unlikely(!__vlan_put_tag(skb,
+ skb->vlan_proto,
+ vlan_tx_tag_get(skb)))) {
+ err = -ENOMEM;
+ return err;
+ }
+ skb->vlan_tci = 0;
+ }
+
+ gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len);
+ geneve_build_header(gnvh, tun_flags, vni, opt_len, opt);
+
+ return udp_tunnel_xmit_skb(gs->sock, rt, skb, src, dst,
+ tos, ttl, df, src_port, dst_port, xnet);
+}
+EXPORT_SYMBOL_GPL(geneve_xmit_skb);
+
+static void geneve_notify_add_rx_port(struct geneve_sock *gs)
+{
+ struct sock *sk = gs->sock->sk;
+ sa_family_t sa_family = sk->sk_family;
+ int err;
+
+ if (sa_family == AF_INET) {
+ err = udp_add_offload(&gs->udp_offloads);
+ if (err)
+ pr_warn("geneve: udp_add_offload failed with status %d\n",
+ err);
+ }
+}
+
+/* Callback from net/ipv4/udp.c to receive packets */
+static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
+{
+ struct genevehdr *geneveh;
+ struct geneve_sock *gs;
+ int opts_len;
+
+ /* Need Geneve and inner Ethernet header to be present */
+ if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN)))
+ goto error;
+
+ /* Return packets with reserved bits set */
+ geneveh = geneve_hdr(skb);
+
+ if (unlikely(geneveh->ver != GENEVE_VER))
+ goto error;
+
+ if (unlikely(geneveh->proto_type != htons(ETH_P_TEB)))
+ goto error;
+
+ opts_len = geneveh->opt_len * 4;
+ if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len,
+ htons(ETH_P_TEB)))
+ goto drop;
+
+ gs = rcu_dereference_sk_user_data(sk);
+ if (!gs)
+ goto drop;
+
+ gs->rcv(gs, skb);
+ return 0;
+
+drop:
+ /* Consume bad packet */
+ kfree_skb(skb);
+ return 0;
+
+error:
+ /* Let the UDP layer deal with the skb */
+ return 1;
+}
+
+static void geneve_del_work(struct work_struct *work)
+{
+ struct geneve_sock *gs = container_of(work, struct geneve_sock,
+ del_work);
+
+ udp_tunnel_sock_release(gs->sock);
+ kfree_rcu(gs, rcu);
+}
+
+static struct socket *geneve_create_sock(struct net *net, bool ipv6,
+ __be16 port)
+{
+ struct socket *sock;
+ struct udp_port_cfg udp_conf;
+ int err;
+
+ memset(&udp_conf, 0, sizeof(udp_conf));
+
+ if (ipv6) {
+ udp_conf.family = AF_INET6;
+ } else {
+ udp_conf.family = AF_INET;
+ udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
+ }
+
+ udp_conf.local_udp_port = port;
+
+ /* Open UDP socket */
+ err = udp_sock_create(net, &udp_conf, &sock);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ return sock;
+}
+
+/* Create new listen socket if needed */
+static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
+ geneve_rcv_t *rcv, void *data,
+ bool ipv6)
+{
+ struct geneve_net *gn = net_generic(net, geneve_net_id);
+ struct geneve_sock *gs;
+ struct socket *sock;
+ struct udp_tunnel_sock_cfg tunnel_cfg;
+
+ gs = kzalloc(sizeof(*gs), GFP_KERNEL);
+ if (!gs)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_WORK(&gs->del_work, geneve_del_work);
+
+ sock = geneve_create_sock(net, ipv6, port);
+ if (IS_ERR(sock)) {
+ kfree(gs);
+ return ERR_CAST(sock);
+ }
+
+ gs->sock = sock;
+ atomic_set(&gs->refcnt, 1);
+ gs->rcv = rcv;
+ gs->rcv_data = data;
+
+ /* Initialize the geneve udp offloads structure */
+ gs->udp_offloads.port = port;
+ gs->udp_offloads.callbacks.gro_receive = NULL;
+ gs->udp_offloads.callbacks.gro_complete = NULL;
+
+ spin_lock(&gn->sock_lock);
+ hlist_add_head_rcu(&gs->hlist, gs_head(net, port));
+ geneve_notify_add_rx_port(gs);
+ spin_unlock(&gn->sock_lock);
+
+ /* Mark socket as an encapsulation socket */
+ tunnel_cfg.sk_user_data = gs;
+ tunnel_cfg.encap_type = 1;
+ tunnel_cfg.encap_rcv = geneve_udp_encap_recv;
+ tunnel_cfg.encap_destroy = NULL;
+ setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
+
+ return gs;
+}
+
+struct geneve_sock *geneve_sock_add(struct net *net, __be16 port,
+ geneve_rcv_t *rcv, void *data,
+ bool no_share, bool ipv6)
+{
+ struct geneve_sock *gs;
+
+ gs = geneve_socket_create(net, port, rcv, data, ipv6);
+ if (!IS_ERR(gs))
+ return gs;
+
+ if (no_share) /* Return error if sharing is not allowed. */
+ return ERR_PTR(-EINVAL);
+
+ gs = geneve_find_sock(net, port);
+ if (gs) {
+ if (gs->rcv == rcv)
+ atomic_inc(&gs->refcnt);
+ else
+ gs = ERR_PTR(-EBUSY);
+ } else {
+ gs = ERR_PTR(-EINVAL);
+ }
+
+ return gs;
+}
+EXPORT_SYMBOL_GPL(geneve_sock_add);
+
+void geneve_sock_release(struct geneve_sock *gs)
+{
+ if (!atomic_dec_and_test(&gs->refcnt))
+ return;
+
+ queue_work(geneve_wq, &gs->del_work);
+}
+EXPORT_SYMBOL_GPL(geneve_sock_release);
+
+static __net_init int geneve_init_net(struct net *net)
+{
+ struct geneve_net *gn = net_generic(net, geneve_net_id);
+ unsigned int h;
+
+ spin_lock_init(&gn->sock_lock);
+
+ for (h = 0; h < PORT_HASH_SIZE; ++h)
+ INIT_HLIST_HEAD(&gn->sock_list[h]);
+
+ return 0;
+}
+
+static struct pernet_operations geneve_net_ops = {
+ .init = geneve_init_net,
+ .exit = NULL,
+ .id = &geneve_net_id,
+ .size = sizeof(struct geneve_net),
+};
+
+static int __init geneve_init_module(void)
+{
+ int rc;
+
+ geneve_wq = alloc_workqueue("geneve", 0, 0);
+ if (!geneve_wq)
+ return -ENOMEM;
+
+ rc = register_pernet_subsys(&geneve_net_ops);
+ if (rc)
+ return rc;
+
+ pr_info("Geneve driver\n");
+
+ return 0;
+}
+late_initcall(geneve_init_module);
+
+static void __exit geneve_cleanup_module(void)
+{
+ destroy_workqueue(geneve_wq);
+}
+module_exit(geneve_cleanup_module);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jesse Gross <jesse@nicira.com>");
+MODULE_DESCRIPTION("Driver for GENEVE encapsulated traffic");
+MODULE_ALIAS_RTNL_LINK("geneve");
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 0485bf7f8f03..4a7b5b2a1ce3 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -98,7 +98,6 @@ EXPORT_SYMBOL_GPL(gre_build_header);
static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
bool *csum_err)
{
- unsigned int ip_hlen = ip_hdrlen(skb);
const struct gre_base_hdr *greh;
__be32 *options;
int hdr_len;
@@ -106,7 +105,7 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
return -EINVAL;
- greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
+ greh = (struct gre_base_hdr *)skb_transport_header(skb);
if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
return -EINVAL;
@@ -116,7 +115,7 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
if (!pskb_may_pull(skb, hdr_len))
return -EINVAL;
- greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
+ greh = (struct gre_base_hdr *)skb_transport_header(skb);
tpi->proto = greh->protocol;
options = (__be32 *)(greh + 1);
@@ -125,6 +124,10 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
*csum_err = true;
return -EINVAL;
}
+
+ skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
+ null_compute_pseudo);
+
options++;
}
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index f0bdd47bbbcb..a77729503071 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -15,13 +15,6 @@
#include <net/protocol.h>
#include <net/gre.h>
-static int gre_gso_send_check(struct sk_buff *skb)
-{
- if (!skb->encapsulation)
- return -EINVAL;
- return 0;
-}
-
static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
@@ -46,6 +39,9 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
SKB_GSO_IPIP)))
goto out;
+ if (!skb->encapsulation)
+ goto out;
+
if (unlikely(!pskb_may_pull(skb, sizeof(*greh))))
goto out;
@@ -74,7 +70,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
/* segment inner packet. */
enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
segs = skb_mac_gso_segment(skb, enc_features);
- if (!segs || IS_ERR(segs)) {
+ if (IS_ERR_OR_NULL(segs)) {
skb_gso_error_unwind(skb, protocol, ghl, mac_offset, mac_len);
goto out;
}
@@ -119,28 +115,6 @@ out:
return segs;
}
-/* Compute the whole skb csum in s/w and store it, then verify GRO csum
- * starting from gro_offset.
- */
-static __sum16 gro_skb_checksum(struct sk_buff *skb)
-{
- __sum16 sum;
-
- skb->csum = skb_checksum(skb, 0, skb->len, 0);
- NAPI_GRO_CB(skb)->csum = csum_sub(skb->csum,
- csum_partial(skb->data, skb_gro_offset(skb), 0));
- sum = csum_fold(NAPI_GRO_CB(skb)->csum);
- if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) {
- if (unlikely(!sum) && !skb->csum_complete_sw)
- netdev_rx_csum_fault(skb->dev);
- } else {
- skb->ip_summed = CHECKSUM_COMPLETE;
- skb->csum_complete_sw = 1;
- }
-
- return sum;
-}
-
static struct sk_buff **gre_gro_receive(struct sk_buff **head,
struct sk_buff *skb)
{
@@ -192,22 +166,16 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
if (unlikely(!greh))
goto out_unlock;
}
- if (greh->flags & GRE_CSUM) { /* Need to verify GRE csum first */
- __sum16 csum = 0;
-
- if (skb->ip_summed == CHECKSUM_COMPLETE)
- csum = csum_fold(NAPI_GRO_CB(skb)->csum);
- /* Don't trust csum error calculated/reported by h/w */
- if (skb->ip_summed == CHECKSUM_NONE || csum != 0)
- csum = gro_skb_checksum(skb);
-
- /* GRE CSUM is the 1's complement of the 1's complement sum
- * of the GRE hdr plus payload so it should add up to 0xffff
- * (and 0 after csum_fold()) just like the IPv4 hdr csum.
- */
- if (csum)
+
+ /* Don't bother verifying checksum if we're going to flush anyway. */
+ if ((greh->flags & GRE_CSUM) && !NAPI_GRO_CB(skb)->flush) {
+ if (skb_gro_checksum_simple_validate(skb))
goto out_unlock;
+
+ skb_gro_checksum_try_convert(skb, IPPROTO_GRE, 0,
+ null_compute_pseudo);
}
+
flush = 0;
for (p = *head; p; p = p->next) {
@@ -284,7 +252,6 @@ static int gre_gro_complete(struct sk_buff *skb, int nhoff)
static const struct net_offload gre_offload = {
.callbacks = {
- .gso_send_check = gre_gso_send_check,
.gso_segment = gre_gso_segment,
.gro_receive = gre_gro_receive,
.gro_complete = gre_gro_complete,
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 42b7bcf8045b..5882f584910e 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -231,12 +231,62 @@ static inline void icmp_xmit_unlock(struct sock *sk)
spin_unlock_bh(&sk->sk_lock.slock);
}
+int sysctl_icmp_msgs_per_sec __read_mostly = 1000;
+int sysctl_icmp_msgs_burst __read_mostly = 50;
+
+static struct {
+ spinlock_t lock;
+ u32 credit;
+ u32 stamp;
+} icmp_global = {
+ .lock = __SPIN_LOCK_UNLOCKED(icmp_global.lock),
+};
+
+/**
+ * icmp_global_allow - Are we allowed to send one more ICMP message ?
+ *
+ * Uses a token bucket to limit our ICMP messages to sysctl_icmp_msgs_per_sec.
+ * Returns false if we reached the limit and can not send another packet.
+ * Note: called with BH disabled
+ */
+bool icmp_global_allow(void)
+{
+ u32 credit, delta, incr = 0, now = (u32)jiffies;
+ bool rc = false;
+
+ /* Check if token bucket is empty and cannot be refilled
+ * without taking the spinlock.
+ */
+ if (!icmp_global.credit) {
+ delta = min_t(u32, now - icmp_global.stamp, HZ);
+ if (delta < HZ / 50)
+ return false;
+ }
+
+ spin_lock(&icmp_global.lock);
+ delta = min_t(u32, now - icmp_global.stamp, HZ);
+ if (delta >= HZ / 50) {
+ incr = sysctl_icmp_msgs_per_sec * delta / HZ ;
+ if (incr)
+ icmp_global.stamp = now;
+ }
+ credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst);
+ if (credit) {
+ credit--;
+ rc = true;
+ }
+ icmp_global.credit = credit;
+ spin_unlock(&icmp_global.lock);
+ return rc;
+}
+EXPORT_SYMBOL(icmp_global_allow);
+
/*
* Send an ICMP frame.
*/
-static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
- struct flowi4 *fl4, int type, int code)
+static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
+ struct flowi4 *fl4, int type, int code)
{
struct dst_entry *dst = &rt->dst;
bool rc = true;
@@ -253,8 +303,14 @@ static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
goto out;
/* Limit if icmp type is enabled in ratemask. */
- if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) {
- struct inet_peer *peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, 1);
+ if (!((1 << type) & net->ipv4.sysctl_icmp_ratemask))
+ goto out;
+
+ rc = false;
+ if (icmp_global_allow()) {
+ struct inet_peer *peer;
+
+ peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, 1);
rc = inet_peer_xrlim_allow(peer,
net->ipv4.sysctl_icmp_ratelimit);
if (peer)
@@ -663,16 +719,16 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
/* Checkin full IP header plus 8 bytes of protocol to
* avoid additional coding at protocol handlers.
*/
- if (!pskb_may_pull(skb, iph->ihl * 4 + 8))
+ if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) {
+ ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS);
return;
+ }
raw_icmp_error(skb, protocol, info);
- rcu_read_lock();
ipprot = rcu_dereference(inet_protos[protocol]);
if (ipprot && ipprot->err_handler)
ipprot->err_handler(skb, info);
- rcu_read_unlock();
}
static bool icmp_tag_validation(int proto)
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index db710b059bab..fb70e3ecc3e4 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -117,7 +117,7 @@
#define IGMP_V2_Unsolicited_Report_Interval (10*HZ)
#define IGMP_V3_Unsolicited_Report_Interval (1*HZ)
#define IGMP_Query_Response_Interval (10*HZ)
-#define IGMP_Unsolicited_Report_Count 2
+#define IGMP_Query_Robustness_Variable 2
#define IGMP_Initial_Report_Delay (1)
@@ -756,8 +756,7 @@ static void igmp_ifc_event(struct in_device *in_dev)
{
if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
return;
- in_dev->mr_ifc_count = in_dev->mr_qrv ? in_dev->mr_qrv :
- IGMP_Unsolicited_Report_Count;
+ in_dev->mr_ifc_count = in_dev->mr_qrv ?: sysctl_igmp_qrv;
igmp_ifc_start_timer(in_dev, 1);
}
@@ -932,7 +931,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
in_dev->mr_qrv = ih3->qrv;
if (!group) { /* general query */
if (ih3->nsrcs)
- return false; /* no sources allowed */
+ return true; /* no sources allowed */
igmp_gq_start_timer(in_dev);
return false;
}
@@ -1086,8 +1085,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
pmc->interface = im->interface;
in_dev_hold(in_dev);
pmc->multiaddr = im->multiaddr;
- pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv :
- IGMP_Unsolicited_Report_Count;
+ pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv;
pmc->sfmode = im->sfmode;
if (pmc->sfmode == MCAST_INCLUDE) {
struct ip_sf_list *psf;
@@ -1226,8 +1224,7 @@ static void igmp_group_added(struct ip_mc_list *im)
}
/* else, v3 */
- im->crcount = in_dev->mr_qrv ? in_dev->mr_qrv :
- IGMP_Unsolicited_Report_Count;
+ im->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv;
igmp_ifc_event(in_dev);
#endif
}
@@ -1321,8 +1318,8 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
atomic_set(&im->refcnt, 1);
spin_lock_init(&im->lock);
#ifdef CONFIG_IP_MULTICAST
- setup_timer(&im->timer, &igmp_timer_expire, (unsigned long)im);
- im->unsolicit_count = IGMP_Unsolicited_Report_Count;
+ setup_timer(&im->timer, igmp_timer_expire, (unsigned long)im);
+ im->unsolicit_count = sysctl_igmp_qrv;
#endif
im->next_rcu = in_dev->mc_list;
@@ -1460,7 +1457,7 @@ void ip_mc_init_dev(struct in_device *in_dev)
(unsigned long)in_dev);
setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire,
(unsigned long)in_dev);
- in_dev->mr_qrv = IGMP_Unsolicited_Report_Count;
+ in_dev->mr_qrv = sysctl_igmp_qrv;
#endif
spin_lock_init(&in_dev->mc_tomb_lock);
@@ -1474,6 +1471,9 @@ void ip_mc_up(struct in_device *in_dev)
ASSERT_RTNL();
+#ifdef CONFIG_IP_MULTICAST
+ in_dev->mr_qrv = sysctl_igmp_qrv;
+#endif
ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
for_each_pmc_rtnl(in_dev, pmc)
@@ -1540,7 +1540,9 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
*/
int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS;
int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF;
-
+#ifdef CONFIG_IP_MULTICAST
+int sysctl_igmp_qrv __read_mostly = IGMP_Query_Robustness_Variable;
+#endif
static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
__be32 *psfsrc)
@@ -1575,8 +1577,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
#ifdef CONFIG_IP_MULTICAST
if (psf->sf_oldin &&
!IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) {
- psf->sf_crcount = in_dev->mr_qrv ? in_dev->mr_qrv :
- IGMP_Unsolicited_Report_Count;
+ psf->sf_crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv;
psf->sf_next = pmc->tomb;
pmc->tomb = psf;
rv = 1;
@@ -1639,8 +1640,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
/* filter mode change */
pmc->sfmode = MCAST_INCLUDE;
#ifdef CONFIG_IP_MULTICAST
- pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv :
- IGMP_Unsolicited_Report_Count;
+ pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv;
in_dev->mr_ifc_count = pmc->crcount;
for (psf = pmc->sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
@@ -1818,8 +1818,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
#ifdef CONFIG_IP_MULTICAST
/* else no filters; keep old mode for reports */
- pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv :
- IGMP_Unsolicited_Report_Count;
+ pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv;
in_dev->mr_ifc_count = pmc->crcount;
for (psf = pmc->sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
@@ -2539,7 +2538,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v)
querier = "NONE";
#endif
- if (rcu_dereference(state->in_dev->mc_list) == im) {
+ if (rcu_access_pointer(state->in_dev->mc_list) == im) {
seq_printf(seq, "%d\t%-10s: %5d %7s\n",
state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier);
}
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 3b01959bf4bb..9eb89f3f0ee4 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -25,6 +25,12 @@
#include <net/inet_frag.h>
#include <net/inet_ecn.h>
+#define INETFRAGS_EVICT_BUCKETS 128
+#define INETFRAGS_EVICT_MAX 512
+
+/* don't rebuild inetfrag table with new secret more often than this */
+#define INETFRAGS_MIN_REBUILD_INTERVAL (5 * HZ)
+
/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
* Value : 0xff if frame should be dropped.
* 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
@@ -46,24 +52,39 @@ const u8 ip_frag_ecn_table[16] = {
};
EXPORT_SYMBOL(ip_frag_ecn_table);
-static void inet_frag_secret_rebuild(unsigned long dummy)
+static unsigned int
+inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q)
+{
+ return f->hashfn(q) & (INETFRAGS_HASHSZ - 1);
+}
+
+static bool inet_frag_may_rebuild(struct inet_frags *f)
+{
+ return time_after(jiffies,
+ f->last_rebuild_jiffies + INETFRAGS_MIN_REBUILD_INTERVAL);
+}
+
+static void inet_frag_secret_rebuild(struct inet_frags *f)
{
- struct inet_frags *f = (struct inet_frags *)dummy;
- unsigned long now = jiffies;
int i;
- /* Per bucket lock NOT needed here, due to write lock protection */
- write_lock(&f->lock);
+ write_seqlock_bh(&f->rnd_seqlock);
+
+ if (!inet_frag_may_rebuild(f))
+ goto out;
get_random_bytes(&f->rnd, sizeof(u32));
+
for (i = 0; i < INETFRAGS_HASHSZ; i++) {
struct inet_frag_bucket *hb;
struct inet_frag_queue *q;
struct hlist_node *n;
hb = &f->hash[i];
+ spin_lock(&hb->chain_lock);
+
hlist_for_each_entry_safe(q, n, &hb->chain, list) {
- unsigned int hval = f->hashfn(q);
+ unsigned int hval = inet_frag_hashfn(f, q);
if (hval != i) {
struct inet_frag_bucket *hb_dest;
@@ -72,76 +93,200 @@ static void inet_frag_secret_rebuild(unsigned long dummy)
/* Relink to new hash chain. */
hb_dest = &f->hash[hval];
+
+ /* This is the only place where we take
+ * another chain_lock while already holding
+ * one. As this will not run concurrently,
+ * we cannot deadlock on hb_dest lock below, if its
+ * already locked it will be released soon since
+ * other caller cannot be waiting for hb lock
+ * that we've taken above.
+ */
+ spin_lock_nested(&hb_dest->chain_lock,
+ SINGLE_DEPTH_NESTING);
hlist_add_head(&q->list, &hb_dest->chain);
+ spin_unlock(&hb_dest->chain_lock);
}
}
+ spin_unlock(&hb->chain_lock);
+ }
+
+ f->rebuild = false;
+ f->last_rebuild_jiffies = jiffies;
+out:
+ write_sequnlock_bh(&f->rnd_seqlock);
+}
+
+static bool inet_fragq_should_evict(const struct inet_frag_queue *q)
+{
+ return q->net->low_thresh == 0 ||
+ frag_mem_limit(q->net) >= q->net->low_thresh;
+}
+
+static unsigned int
+inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
+{
+ struct inet_frag_queue *fq;
+ struct hlist_node *n;
+ unsigned int evicted = 0;
+ HLIST_HEAD(expired);
+
+evict_again:
+ spin_lock(&hb->chain_lock);
+
+ hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
+ if (!inet_fragq_should_evict(fq))
+ continue;
+
+ if (!del_timer(&fq->timer)) {
+ /* q expiring right now thus increment its refcount so
+ * it won't be freed under us and wait until the timer
+ * has finished executing then destroy it
+ */
+ atomic_inc(&fq->refcnt);
+ spin_unlock(&hb->chain_lock);
+ del_timer_sync(&fq->timer);
+ WARN_ON(atomic_read(&fq->refcnt) != 1);
+ inet_frag_put(fq, f);
+ goto evict_again;
+ }
+
+ fq->flags |= INET_FRAG_EVICTED;
+ hlist_del(&fq->list);
+ hlist_add_head(&fq->list, &expired);
+ ++evicted;
}
- write_unlock(&f->lock);
- mod_timer(&f->secret_timer, now + f->secret_interval);
+ spin_unlock(&hb->chain_lock);
+
+ hlist_for_each_entry_safe(fq, n, &expired, list)
+ f->frag_expire((unsigned long) fq);
+
+ return evicted;
}
-void inet_frags_init(struct inet_frags *f)
+static void inet_frag_worker(struct work_struct *work)
+{
+ unsigned int budget = INETFRAGS_EVICT_BUCKETS;
+ unsigned int i, evicted = 0;
+ struct inet_frags *f;
+
+ f = container_of(work, struct inet_frags, frags_work);
+
+ BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ);
+
+ local_bh_disable();
+
+ for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) {
+ evicted += inet_evict_bucket(f, &f->hash[i]);
+ i = (i + 1) & (INETFRAGS_HASHSZ - 1);
+ if (evicted > INETFRAGS_EVICT_MAX)
+ break;
+ }
+
+ f->next_bucket = i;
+
+ local_bh_enable();
+
+ if (f->rebuild && inet_frag_may_rebuild(f))
+ inet_frag_secret_rebuild(f);
+}
+
+static void inet_frag_schedule_worker(struct inet_frags *f)
+{
+ if (unlikely(!work_pending(&f->frags_work)))
+ schedule_work(&f->frags_work);
+}
+
+int inet_frags_init(struct inet_frags *f)
{
int i;
+ INIT_WORK(&f->frags_work, inet_frag_worker);
+
for (i = 0; i < INETFRAGS_HASHSZ; i++) {
struct inet_frag_bucket *hb = &f->hash[i];
spin_lock_init(&hb->chain_lock);
INIT_HLIST_HEAD(&hb->chain);
}
- rwlock_init(&f->lock);
- setup_timer(&f->secret_timer, inet_frag_secret_rebuild,
- (unsigned long)f);
- f->secret_timer.expires = jiffies + f->secret_interval;
- add_timer(&f->secret_timer);
+ seqlock_init(&f->rnd_seqlock);
+ f->last_rebuild_jiffies = 0;
+ f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0,
+ NULL);
+ if (!f->frags_cachep)
+ return -ENOMEM;
+
+ return 0;
}
EXPORT_SYMBOL(inet_frags_init);
void inet_frags_init_net(struct netns_frags *nf)
{
- nf->nqueues = 0;
init_frag_mem_limit(nf);
- INIT_LIST_HEAD(&nf->lru_list);
- spin_lock_init(&nf->lru_lock);
}
EXPORT_SYMBOL(inet_frags_init_net);
void inet_frags_fini(struct inet_frags *f)
{
- del_timer(&f->secret_timer);
+ cancel_work_sync(&f->frags_work);
+ kmem_cache_destroy(f->frags_cachep);
}
EXPORT_SYMBOL(inet_frags_fini);
void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
{
- nf->low_thresh = 0;
+ unsigned int seq;
+ int i;
+ nf->low_thresh = 0;
local_bh_disable();
- inet_frag_evictor(nf, f, true);
+
+evict_again:
+ seq = read_seqbegin(&f->rnd_seqlock);
+
+ for (i = 0; i < INETFRAGS_HASHSZ ; i++)
+ inet_evict_bucket(f, &f->hash[i]);
+
+ if (read_seqretry(&f->rnd_seqlock, seq))
+ goto evict_again;
+
local_bh_enable();
percpu_counter_destroy(&nf->mem);
}
EXPORT_SYMBOL(inet_frags_exit_net);
-static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
+static struct inet_frag_bucket *
+get_frag_bucket_locked(struct inet_frag_queue *fq, struct inet_frags *f)
+__acquires(hb->chain_lock)
{
struct inet_frag_bucket *hb;
- unsigned int hash;
+ unsigned int seq, hash;
+
+ restart:
+ seq = read_seqbegin(&f->rnd_seqlock);
- read_lock(&f->lock);
- hash = f->hashfn(fq);
+ hash = inet_frag_hashfn(f, fq);
hb = &f->hash[hash];
spin_lock(&hb->chain_lock);
+ if (read_seqretry(&f->rnd_seqlock, seq)) {
+ spin_unlock(&hb->chain_lock);
+ goto restart;
+ }
+
+ return hb;
+}
+
+static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
+{
+ struct inet_frag_bucket *hb;
+
+ hb = get_frag_bucket_locked(fq, f);
hlist_del(&fq->list);
spin_unlock(&hb->chain_lock);
-
- read_unlock(&f->lock);
- inet_frag_lru_del(fq);
}
void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
@@ -149,30 +294,29 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
if (del_timer(&fq->timer))
atomic_dec(&fq->refcnt);
- if (!(fq->last_in & INET_FRAG_COMPLETE)) {
+ if (!(fq->flags & INET_FRAG_COMPLETE)) {
fq_unlink(fq, f);
atomic_dec(&fq->refcnt);
- fq->last_in |= INET_FRAG_COMPLETE;
+ fq->flags |= INET_FRAG_COMPLETE;
}
}
EXPORT_SYMBOL(inet_frag_kill);
static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f,
- struct sk_buff *skb)
+ struct sk_buff *skb)
{
if (f->skb_free)
f->skb_free(skb);
kfree_skb(skb);
}
-void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
- int *work)
+void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
{
struct sk_buff *fp;
struct netns_frags *nf;
unsigned int sum, sum_truesize = 0;
- WARN_ON(!(q->last_in & INET_FRAG_COMPLETE));
+ WARN_ON(!(q->flags & INET_FRAG_COMPLETE));
WARN_ON(del_timer(&q->timer) != 0);
/* Release all fragment data. */
@@ -186,87 +330,32 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
fp = xp;
}
sum = sum_truesize + f->qsize;
- if (work)
- *work -= sum;
sub_frag_mem_limit(q, sum);
if (f->destructor)
f->destructor(q);
- kfree(q);
-
+ kmem_cache_free(f->frags_cachep, q);
}
EXPORT_SYMBOL(inet_frag_destroy);
-int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force)
-{
- struct inet_frag_queue *q;
- int work, evicted = 0;
-
- if (!force) {
- if (frag_mem_limit(nf) <= nf->high_thresh)
- return 0;
- }
-
- work = frag_mem_limit(nf) - nf->low_thresh;
- while (work > 0 || force) {
- spin_lock(&nf->lru_lock);
-
- if (list_empty(&nf->lru_list)) {
- spin_unlock(&nf->lru_lock);
- break;
- }
-
- q = list_first_entry(&nf->lru_list,
- struct inet_frag_queue, lru_list);
- atomic_inc(&q->refcnt);
- /* Remove q from list to avoid several CPUs grabbing it */
- list_del_init(&q->lru_list);
-
- spin_unlock(&nf->lru_lock);
-
- spin_lock(&q->lock);
- if (!(q->last_in & INET_FRAG_COMPLETE))
- inet_frag_kill(q, f);
- spin_unlock(&q->lock);
-
- if (atomic_dec_and_test(&q->refcnt))
- inet_frag_destroy(q, f, &work);
- evicted++;
- }
-
- return evicted;
-}
-EXPORT_SYMBOL(inet_frag_evictor);
-
static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
- struct inet_frag_queue *qp_in, struct inet_frags *f,
- void *arg)
+ struct inet_frag_queue *qp_in,
+ struct inet_frags *f,
+ void *arg)
{
- struct inet_frag_bucket *hb;
+ struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f);
struct inet_frag_queue *qp;
- unsigned int hash;
-
- read_lock(&f->lock); /* Protects against hash rebuild */
- /*
- * While we stayed w/o the lock other CPU could update
- * the rnd seed, so we need to re-calculate the hash
- * chain. Fortunatelly the qp_in can be used to get one.
- */
- hash = f->hashfn(qp_in);
- hb = &f->hash[hash];
- spin_lock(&hb->chain_lock);
#ifdef CONFIG_SMP
/* With SMP race we have to recheck hash table, because
- * such entry could be created on other cpu, while we
- * released the hash bucket lock.
+ * such entry could have been created on other cpu before
+ * we acquired hash bucket lock.
*/
hlist_for_each_entry(qp, &hb->chain, list) {
if (qp->net == nf && f->match(qp, arg)) {
atomic_inc(&qp->refcnt);
spin_unlock(&hb->chain_lock);
- read_unlock(&f->lock);
- qp_in->last_in |= INET_FRAG_COMPLETE;
+ qp_in->flags |= INET_FRAG_COMPLETE;
inet_frag_put(qp_in, f);
return qp;
}
@@ -278,19 +367,24 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
atomic_inc(&qp->refcnt);
hlist_add_head(&qp->list, &hb->chain);
- inet_frag_lru_add(nf, qp);
+
spin_unlock(&hb->chain_lock);
- read_unlock(&f->lock);
return qp;
}
static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
- struct inet_frags *f, void *arg)
+ struct inet_frags *f,
+ void *arg)
{
struct inet_frag_queue *q;
- q = kzalloc(f->qsize, GFP_ATOMIC);
+ if (frag_mem_limit(nf) > nf->high_thresh) {
+ inet_frag_schedule_worker(f);
+ return NULL;
+ }
+
+ q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC);
if (q == NULL)
return NULL;
@@ -301,13 +395,13 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
spin_lock_init(&q->lock);
atomic_set(&q->refcnt, 1);
- INIT_LIST_HEAD(&q->lru_list);
return q;
}
static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
- struct inet_frags *f, void *arg)
+ struct inet_frags *f,
+ void *arg)
{
struct inet_frag_queue *q;
@@ -319,13 +413,17 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
}
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
- struct inet_frags *f, void *key, unsigned int hash)
- __releases(&f->lock)
+ struct inet_frags *f, void *key,
+ unsigned int hash)
{
struct inet_frag_bucket *hb;
struct inet_frag_queue *q;
int depth = 0;
+ if (frag_mem_limit(nf) > nf->low_thresh)
+ inet_frag_schedule_worker(f);
+
+ hash &= (INETFRAGS_HASHSZ - 1);
hb = &f->hash[hash];
spin_lock(&hb->chain_lock);
@@ -333,18 +431,22 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
if (q->net == nf && f->match(q, key)) {
atomic_inc(&q->refcnt);
spin_unlock(&hb->chain_lock);
- read_unlock(&f->lock);
return q;
}
depth++;
}
spin_unlock(&hb->chain_lock);
- read_unlock(&f->lock);
if (depth <= INETFRAGS_MAXDEPTH)
return inet_frag_create(nf, f, key);
- else
- return ERR_PTR(-ENOBUFS);
+
+ if (inet_frag_may_rebuild(f)) {
+ if (!f->rebuild)
+ f->rebuild = true;
+ inet_frag_schedule_worker(f);
+ }
+
+ return ERR_PTR(-ENOBUFS);
}
EXPORT_SYMBOL(inet_frag_find);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 43116e8c8e13..9111a4e22155 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -229,7 +229,7 @@ begin:
}
} else if (score == hiscore && reuseport) {
matches++;
- if (((u64)phash * matches) >> 32 == 0)
+ if (reciprocal_scale(phash, matches) == 0)
result = sk;
phash = next_pseudo_random32(phash);
}
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index bd5f5928167d..241afd743d2c 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -72,29 +72,10 @@ void inet_peer_base_init(struct inet_peer_base *bp)
{
bp->root = peer_avl_empty_rcu;
seqlock_init(&bp->lock);
- bp->flush_seq = ~0U;
bp->total = 0;
}
EXPORT_SYMBOL_GPL(inet_peer_base_init);
-static atomic_t v4_seq = ATOMIC_INIT(0);
-static atomic_t v6_seq = ATOMIC_INIT(0);
-
-static atomic_t *inetpeer_seq_ptr(int family)
-{
- return (family == AF_INET ? &v4_seq : &v6_seq);
-}
-
-static inline void flush_check(struct inet_peer_base *base, int family)
-{
- atomic_t *fp = inetpeer_seq_ptr(family);
-
- if (unlikely(base->flush_seq != atomic_read(fp))) {
- inetpeer_invalidate_tree(base);
- base->flush_seq = atomic_read(fp);
- }
-}
-
#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
/* Exported for sysctl_net_ipv4. */
@@ -444,8 +425,6 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
unsigned int sequence;
int invalidated, gccnt = 0;
- flush_check(base, daddr->family);
-
/* Attempt a lockless lookup first.
* Because of a concurrent writer, we might not find an existing entry.
*/
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index ed32313e307c..2811cc18701a 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -55,6 +55,7 @@
*/
static int sysctl_ipfrag_max_dist __read_mostly = 64;
+static const char ip_frag_cache_name[] = "ip4-frags";
struct ipfrag_skb_cb
{
@@ -86,11 +87,6 @@ static inline u8 ip4_frag_ecn(u8 tos)
static struct inet_frags ip4_frags;
-int ip_frag_nqueues(struct net *net)
-{
- return net->ipv4.frags.nqueues;
-}
-
int ip_frag_mem(struct net *net)
{
return sum_frag_mem_limit(&net->ipv4.frags);
@@ -109,21 +105,21 @@ static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd));
return jhash_3words((__force u32)id << 16 | prot,
(__force u32)saddr, (__force u32)daddr,
- ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1);
+ ip4_frags.rnd);
}
-static unsigned int ip4_hashfn(struct inet_frag_queue *q)
+static unsigned int ip4_hashfn(const struct inet_frag_queue *q)
{
- struct ipq *ipq;
+ const struct ipq *ipq;
ipq = container_of(q, struct ipq, q);
return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
}
-static bool ip4_frag_match(struct inet_frag_queue *q, void *a)
+static bool ip4_frag_match(const struct inet_frag_queue *q, const void *a)
{
- struct ipq *qp;
- struct ip4_create_arg *arg = a;
+ const struct ipq *qp;
+ const struct ip4_create_arg *arg = a;
qp = container_of(q, struct ipq, q);
return qp->id == arg->iph->id &&
@@ -133,14 +129,14 @@ static bool ip4_frag_match(struct inet_frag_queue *q, void *a)
qp->user == arg->user;
}
-static void ip4_frag_init(struct inet_frag_queue *q, void *a)
+static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
{
struct ipq *qp = container_of(q, struct ipq, q);
struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4,
frags);
struct net *net = container_of(ipv4, struct net, ipv4);
- struct ip4_create_arg *arg = a;
+ const struct ip4_create_arg *arg = a;
qp->protocol = arg->iph->protocol;
qp->id = arg->iph->id;
@@ -177,18 +173,6 @@ static void ipq_kill(struct ipq *ipq)
inet_frag_kill(&ipq->q, &ip4_frags);
}
-/* Memory limiting on fragments. Evictor trashes the oldest
- * fragment queue until we are back under the threshold.
- */
-static void ip_evictor(struct net *net)
-{
- int evicted;
-
- evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags, false);
- if (evicted)
- IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted);
-}
-
/*
* Oops, a fragment queue timed out. Kill it and send an ICMP reply.
*/
@@ -202,19 +186,22 @@ static void ip_expire(unsigned long arg)
spin_lock(&qp->q.lock);
- if (qp->q.last_in & INET_FRAG_COMPLETE)
+ if (qp->q.flags & INET_FRAG_COMPLETE)
goto out;
ipq_kill(qp);
-
- IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
- if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) {
+ if (!(qp->q.flags & INET_FRAG_EVICTED)) {
struct sk_buff *head = qp->q.fragments;
const struct iphdr *iph;
int err;
+ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
+
+ if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments)
+ goto out;
+
rcu_read_lock();
head->dev = dev_get_by_index_rcu(net, qp->iif);
if (!head->dev)
@@ -227,8 +214,7 @@ static void ip_expire(unsigned long arg)
if (err)
goto out_rcu_unlock;
- /*
- * Only an end host needs to send an ICMP
+ /* Only an end host needs to send an ICMP
* "Fragment Reassembly Timeout" message, per RFC792.
*/
if (qp->user == IP_DEFRAG_AF_PACKET ||
@@ -237,7 +223,6 @@ static void ip_expire(unsigned long arg)
(skb_rtable(head)->rt_type != RTN_LOCAL)))
goto out_rcu_unlock;
-
/* Send an ICMP "Fragment Reassembly Timeout" message. */
icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
out_rcu_unlock:
@@ -260,7 +245,6 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
arg.iph = iph;
arg.user = user;
- read_lock(&ip4_frags.lock);
hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
@@ -319,7 +303,7 @@ static int ip_frag_reinit(struct ipq *qp)
} while (fp);
sub_frag_mem_limit(&qp->q, sum_truesize);
- qp->q.last_in = 0;
+ qp->q.flags = 0;
qp->q.len = 0;
qp->q.meat = 0;
qp->q.fragments = NULL;
@@ -340,7 +324,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
int err = -ENOENT;
u8 ecn;
- if (qp->q.last_in & INET_FRAG_COMPLETE)
+ if (qp->q.flags & INET_FRAG_COMPLETE)
goto err;
if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
@@ -367,9 +351,9 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
* or have different end, the segment is corrupted.
*/
if (end < qp->q.len ||
- ((qp->q.last_in & INET_FRAG_LAST_IN) && end != qp->q.len))
+ ((qp->q.flags & INET_FRAG_LAST_IN) && end != qp->q.len))
goto err;
- qp->q.last_in |= INET_FRAG_LAST_IN;
+ qp->q.flags |= INET_FRAG_LAST_IN;
qp->q.len = end;
} else {
if (end&7) {
@@ -379,7 +363,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
}
if (end > qp->q.len) {
/* Some bits beyond end -> corruption. */
- if (qp->q.last_in & INET_FRAG_LAST_IN)
+ if (qp->q.flags & INET_FRAG_LAST_IN)
goto err;
qp->q.len = end;
}
@@ -488,13 +472,13 @@ found:
qp->ecn |= ecn;
add_frag_mem_limit(&qp->q, skb->truesize);
if (offset == 0)
- qp->q.last_in |= INET_FRAG_FIRST_IN;
+ qp->q.flags |= INET_FRAG_FIRST_IN;
if (ip_hdr(skb)->frag_off & htons(IP_DF) &&
skb->len + ihl > qp->q.max_size)
qp->q.max_size = skb->len + ihl;
- if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
+ if (qp->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
qp->q.meat == qp->q.len) {
unsigned long orefdst = skb->_skb_refdst;
@@ -505,7 +489,6 @@ found:
}
skb_dst_drop(skb);
- inet_frag_lru_move(&qp->q);
return -EINPROGRESS;
err:
@@ -655,9 +638,6 @@ int ip_defrag(struct sk_buff *skb, u32 user)
net = skb->dev ? dev_net(skb->dev) : dev_net(skb_dst(skb)->dev);
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
- /* Start by cleaning up the memory. */
- ip_evictor(net);
-
/* Lookup (or create) queue header */
if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) {
int ret;
@@ -721,14 +701,17 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
.data = &init_net.ipv4.frags.high_thresh,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &init_net.ipv4.frags.low_thresh
},
{
.procname = "ipfrag_low_thresh",
.data = &init_net.ipv4.frags.low_thresh,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &init_net.ipv4.frags.high_thresh
},
{
.procname = "ipfrag_time",
@@ -740,10 +723,12 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
{ }
};
+/* secret interval has been deprecated */
+static int ip4_frags_secret_interval_unused;
static struct ctl_table ip4_frags_ctl_table[] = {
{
.procname = "ipfrag_secret_interval",
- .data = &ip4_frags.secret_interval,
+ .data = &ip4_frags_secret_interval_unused,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
@@ -771,7 +756,10 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net)
goto err_alloc;
table[0].data = &net->ipv4.frags.high_thresh;
+ table[0].extra1 = &net->ipv4.frags.low_thresh;
+ table[0].extra2 = &init_net.ipv4.frags.high_thresh;
table[1].data = &net->ipv4.frags.low_thresh;
+ table[1].extra2 = &net->ipv4.frags.high_thresh;
table[2].data = &net->ipv4.frags.timeout;
/* Don't export sysctls to unprivileged users */
@@ -802,7 +790,7 @@ static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net)
kfree(table);
}
-static void ip4_frags_ctl_register(void)
+static void __init ip4_frags_ctl_register(void)
{
register_net_sysctl(&init_net, "net/ipv4", ip4_frags_ctl_table);
}
@@ -816,7 +804,7 @@ static inline void ip4_frags_ns_ctl_unregister(struct net *net)
{
}
-static inline void ip4_frags_ctl_register(void)
+static inline void __init ip4_frags_ctl_register(void)
{
}
#endif
@@ -873,6 +861,7 @@ void __init ipfrag_init(void)
ip4_frags.qsize = sizeof(struct ipq);
ip4_frags.match = ip4_frag_match;
ip4_frags.frag_expire = ip_expire;
- ip4_frags.secret_interval = 10 * 60 * HZ;
- inet_frags_init(&ip4_frags);
+ ip4_frags.frags_cache_name = ip_frag_cache_name;
+ if (inet_frags_init(&ip4_frags))
+ panic("IP: failed to allocate ip4_frags cache\n");
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 9b842544aea3..12055fdbe716 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -239,7 +239,9 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
tpi.seq = htonl(tunnel->o_seqno);
/* Push GRE header. */
- gre_build_header(skb, &tpi, tunnel->hlen);
+ gre_build_header(skb, &tpi, tunnel->tun_hlen);
+
+ skb_set_inner_protocol(skb, tpi.proto);
ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
}
@@ -310,7 +312,7 @@ out:
static int ipgre_tunnel_ioctl(struct net_device *dev,
struct ifreq *ifr, int cmd)
{
- int err = 0;
+ int err;
struct ip_tunnel_parm p;
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
@@ -470,13 +472,18 @@ static void ipgre_tunnel_setup(struct net_device *dev)
static void __gre_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel;
+ int t_hlen;
tunnel = netdev_priv(dev);
- tunnel->hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
+ tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
tunnel->parms.iph.protocol = IPPROTO_GRE;
- dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
- dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
+ tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
+
+ t_hlen = tunnel->hlen + sizeof(struct iphdr);
+
+ dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
+ dev->mtu = ETH_DATA_LEN - t_hlen - 4;
dev->features |= GRE_FEATURES;
dev->hw_features |= GRE_FEATURES;
@@ -503,7 +510,7 @@ static int ipgre_tunnel_init(struct net_device *dev)
memcpy(dev->broadcast, &iph->daddr, 4);
dev->flags = IFF_NOARP;
- dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+ netif_keep_dst(dev);
dev->addr_len = 4;
if (iph->daddr) {
@@ -628,6 +635,40 @@ static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
parms->iph.frag_off = htons(IP_DF);
}
+/* This function returns true when ENCAP attributes are present in the nl msg */
+static bool ipgre_netlink_encap_parms(struct nlattr *data[],
+ struct ip_tunnel_encap *ipencap)
+{
+ bool ret = false;
+
+ memset(ipencap, 0, sizeof(*ipencap));
+
+ if (!data)
+ return ret;
+
+ if (data[IFLA_GRE_ENCAP_TYPE]) {
+ ret = true;
+ ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
+ }
+
+ if (data[IFLA_GRE_ENCAP_FLAGS]) {
+ ret = true;
+ ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
+ }
+
+ if (data[IFLA_GRE_ENCAP_SPORT]) {
+ ret = true;
+ ipencap->sport = nla_get_u16(data[IFLA_GRE_ENCAP_SPORT]);
+ }
+
+ if (data[IFLA_GRE_ENCAP_DPORT]) {
+ ret = true;
+ ipencap->dport = nla_get_u16(data[IFLA_GRE_ENCAP_DPORT]);
+ }
+
+ return ret;
+}
+
static int gre_tap_init(struct net_device *dev)
{
__gre_tunnel_init(dev);
@@ -657,6 +698,15 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
struct ip_tunnel_parm p;
+ struct ip_tunnel_encap ipencap;
+
+ if (ipgre_netlink_encap_parms(data, &ipencap)) {
+ struct ip_tunnel *t = netdev_priv(dev);
+ int err = ip_tunnel_encap_setup(t, &ipencap);
+
+ if (err < 0)
+ return err;
+ }
ipgre_netlink_parms(data, tb, &p);
return ip_tunnel_newlink(dev, tb, &p);
@@ -666,6 +716,15 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[])
{
struct ip_tunnel_parm p;
+ struct ip_tunnel_encap ipencap;
+
+ if (ipgre_netlink_encap_parms(data, &ipencap)) {
+ struct ip_tunnel *t = netdev_priv(dev);
+ int err = ip_tunnel_encap_setup(t, &ipencap);
+
+ if (err < 0)
+ return err;
+ }
ipgre_netlink_parms(data, tb, &p);
return ip_tunnel_changelink(dev, tb, &p);
@@ -694,6 +753,14 @@ static size_t ipgre_get_size(const struct net_device *dev)
nla_total_size(1) +
/* IFLA_GRE_PMTUDISC */
nla_total_size(1) +
+ /* IFLA_GRE_ENCAP_TYPE */
+ nla_total_size(2) +
+ /* IFLA_GRE_ENCAP_FLAGS */
+ nla_total_size(2) +
+ /* IFLA_GRE_ENCAP_SPORT */
+ nla_total_size(2) +
+ /* IFLA_GRE_ENCAP_DPORT */
+ nla_total_size(2) +
0;
}
@@ -714,6 +781,17 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_GRE_PMTUDISC,
!!(p->iph.frag_off & htons(IP_DF))))
goto nla_put_failure;
+
+ if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
+ t->encap.type) ||
+ nla_put_u16(skb, IFLA_GRE_ENCAP_SPORT,
+ t->encap.sport) ||
+ nla_put_u16(skb, IFLA_GRE_ENCAP_DPORT,
+ t->encap.dport) ||
+ nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
+ t->encap.dport))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -731,6 +809,10 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_TTL] = { .type = NLA_U8 },
[IFLA_GRE_TOS] = { .type = NLA_U8 },
[IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
+ [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 },
+ [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
+ [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
+ [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
};
static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index ad382499bace..5b3d91be2db0 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -87,17 +87,15 @@ void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
* NOTE: dopt cannot point to skb.
*/
-int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb)
+int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb,
+ const struct ip_options *sopt)
{
- const struct ip_options *sopt;
unsigned char *sptr, *dptr;
int soffset, doffset;
int optlen;
memset(dopt, 0, sizeof(struct ip_options));
- sopt = &(IPCB(skb)->opt);
-
if (sopt->optlen == 0)
return 0;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 8d3b6b0e9857..e35b71289156 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -516,7 +516,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
hlen = iph->ihl * 4;
mtu = mtu - hlen; /* Size of data space */
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (skb->nf_bridge)
mtu -= nf_bridge_mtu_reduction(skb);
#endif
@@ -855,11 +855,15 @@ static int __ip_append_data(struct sock *sk,
unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
int csummode = CHECKSUM_NONE;
struct rtable *rt = (struct rtable *)cork->dst;
+ u32 tskey = 0;
skb = skb_peek_tail(queue);
exthdrlen = !skb ? rt->dst.header_len : 0;
mtu = cork->fragsize;
+ if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
+ sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+ tskey = sk->sk_tskey++;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
@@ -962,10 +966,6 @@ alloc_new_skb:
sk->sk_allocation);
if (unlikely(skb == NULL))
err = -ENOBUFS;
- else
- /* only the initial fragment is
- time stamped */
- cork->tx_flags = 0;
}
if (skb == NULL)
goto error;
@@ -976,7 +976,12 @@ alloc_new_skb:
skb->ip_summed = csummode;
skb->csum = 0;
skb_reserve(skb, hh_len);
+
+ /* only the initial fragment is time stamped */
skb_shinfo(skb)->tx_flags = cork->tx_flags;
+ cork->tx_flags = 0;
+ skb_shinfo(skb)->tskey = tskey;
+ tskey = 0;
/*
* Find where to start putting bytes.
@@ -1517,8 +1522,10 @@ static DEFINE_PER_CPU(struct inet_sock, unicast_sock) = {
.uc_ttl = -1,
};
-void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
- __be32 saddr, const struct ip_reply_arg *arg,
+void ip_send_unicast_reply(struct net *net, struct sk_buff *skb,
+ const struct ip_options *sopt,
+ __be32 daddr, __be32 saddr,
+ const struct ip_reply_arg *arg,
unsigned int len)
{
struct ip_options_data replyopts;
@@ -1529,7 +1536,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
struct sock *sk;
struct inet_sock *inet;
- if (ip_options_echo(&replyopts.opt.opt, skb))
+ if (__ip_options_echo(&replyopts.opt.opt, skb, sopt))
return;
ipc.addr = daddr;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 64741b938632..c373a9ad4555 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -303,7 +303,7 @@ int ip_ra_control(struct sock *sk, unsigned char on,
}
/* dont let ip_call_ra_chain() use sk again */
ra->sk = NULL;
- rcu_assign_pointer(*rap, ra->next);
+ RCU_INIT_POINTER(*rap, ra->next);
spin_unlock_bh(&ip_ra_lock);
if (ra->destructor)
@@ -325,7 +325,7 @@ int ip_ra_control(struct sock *sk, unsigned char on,
new_ra->sk = sk;
new_ra->destructor = destructor;
- new_ra->next = ra;
+ RCU_INIT_POINTER(new_ra->next, ra);
rcu_assign_pointer(*rap, new_ra);
sock_hold(sk);
spin_unlock_bh(&ip_ra_lock);
@@ -405,7 +405,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
{
struct sock_exterr_skb *serr;
- struct sk_buff *skb, *skb2;
+ struct sk_buff *skb;
DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
struct {
struct sock_extended_err ee;
@@ -415,7 +415,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
int copied;
err = -EAGAIN;
- skb = skb_dequeue(&sk->sk_error_queue);
+ skb = sock_dequeue_err_skb(sk);
if (skb == NULL)
goto out;
@@ -462,17 +462,6 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
msg->msg_flags |= MSG_ERRQUEUE;
err = copied;
- /* Reset and regenerate socket error */
- spin_lock_bh(&sk->sk_error_queue.lock);
- sk->sk_err = 0;
- skb2 = skb_peek(&sk->sk_error_queue);
- if (skb2 != NULL) {
- sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
- spin_unlock_bh(&sk->sk_error_queue.lock);
- sk->sk_error_report(sk);
- } else
- spin_unlock_bh(&sk->sk_error_queue.lock);
-
out_free_skb:
kfree_skb(skb);
out:
@@ -1319,7 +1308,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
if (sk->sk_type != SOCK_STREAM)
return -ENOPROTOOPT;
- msg.msg_control = optval;
+ msg.msg_control = (__force void *) optval;
msg.msg_controllen = len;
msg.msg_flags = flags;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 6f9de61dce5f..0bb8e141eacc 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -55,6 +55,8 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
+#include <net/udp.h>
+#include <net/gue.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
@@ -69,23 +71,25 @@ static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
}
static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
- struct dst_entry *dst)
+ struct dst_entry *dst, __be32 saddr)
{
struct dst_entry *old_dst;
dst_clone(dst);
old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
dst_release(old_dst);
+ idst->saddr = saddr;
}
-static void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
+static noinline void tunnel_dst_set(struct ip_tunnel *t,
+ struct dst_entry *dst, __be32 saddr)
{
- __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst);
+ __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
}
static void tunnel_dst_reset(struct ip_tunnel *t)
{
- tunnel_dst_set(t, NULL);
+ tunnel_dst_set(t, NULL, 0);
}
void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
@@ -93,20 +97,25 @@ void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
int i;
for_each_possible_cpu(i)
- __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
+ __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
}
EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
-static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
+static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
+ u32 cookie, __be32 *saddr)
{
+ struct ip_tunnel_dst *idst;
struct dst_entry *dst;
rcu_read_lock();
- dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
+ idst = raw_cpu_ptr(t->dst_cache);
+ dst = rcu_dereference(idst->dst);
if (dst && !atomic_inc_not_zero(&dst->__refcnt))
dst = NULL;
if (dst) {
- if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
+ if (!dst->obsolete || dst->ops->check(dst, cookie)) {
+ *saddr = idst->saddr;
+ } else {
tunnel_dst_reset(t);
dst_release(dst);
dst = NULL;
@@ -305,7 +314,7 @@ static struct net_device *__ip_tunnel_create(struct net *net,
}
ASSERT_RTNL();
- dev = alloc_netdev(ops->priv_size, name, ops->setup);
+ dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
if (!dev) {
err = -ENOMEM;
goto failed;
@@ -367,7 +376,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
if (!IS_ERR(rt)) {
tdev = rt->dst.dev;
- tunnel_dst_set(tunnel, &rt->dst);
+ tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
ip_rt_put(rt);
}
if (dev->type != ARPHRD_ETHER)
@@ -480,6 +489,103 @@ drop:
}
EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
+static int ip_encap_hlen(struct ip_tunnel_encap *e)
+{
+ switch (e->type) {
+ case TUNNEL_ENCAP_NONE:
+ return 0;
+ case TUNNEL_ENCAP_FOU:
+ return sizeof(struct udphdr);
+ case TUNNEL_ENCAP_GUE:
+ return sizeof(struct udphdr) + sizeof(struct guehdr);
+ default:
+ return -EINVAL;
+ }
+}
+
+int ip_tunnel_encap_setup(struct ip_tunnel *t,
+ struct ip_tunnel_encap *ipencap)
+{
+ int hlen;
+
+ memset(&t->encap, 0, sizeof(t->encap));
+
+ hlen = ip_encap_hlen(ipencap);
+ if (hlen < 0)
+ return hlen;
+
+ t->encap.type = ipencap->type;
+ t->encap.sport = ipencap->sport;
+ t->encap.dport = ipencap->dport;
+ t->encap.flags = ipencap->flags;
+
+ t->encap_hlen = hlen;
+ t->hlen = t->encap_hlen + t->tun_hlen;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
+
+static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ size_t hdr_len, u8 *protocol, struct flowi4 *fl4)
+{
+ struct udphdr *uh;
+ __be16 sport;
+ bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
+ int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+
+ skb = iptunnel_handle_offloads(skb, csum, type);
+
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ /* Get length and hash before making space in skb */
+
+ sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
+ skb, 0, 0, false);
+
+ skb_push(skb, hdr_len);
+
+ skb_reset_transport_header(skb);
+ uh = udp_hdr(skb);
+
+ if (e->type == TUNNEL_ENCAP_GUE) {
+ struct guehdr *guehdr = (struct guehdr *)&uh[1];
+
+ guehdr->version = 0;
+ guehdr->hlen = 0;
+ guehdr->flags = 0;
+ guehdr->next_hdr = *protocol;
+ }
+
+ uh->dest = e->dport;
+ uh->source = sport;
+ uh->len = htons(skb->len);
+ uh->check = 0;
+ udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
+ fl4->saddr, fl4->daddr, skb->len);
+
+ *protocol = IPPROTO_UDP;
+
+ return 0;
+}
+
+int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
+ u8 *protocol, struct flowi4 *fl4)
+{
+ switch (t->encap.type) {
+ case TUNNEL_ENCAP_NONE:
+ return 0;
+ case TUNNEL_ENCAP_FOU:
+ case TUNNEL_ENCAP_GUE:
+ return fou_build_header(skb, &t->encap, t->encap_hlen,
+ protocol, fl4);
+ default:
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL(ip_tunnel_encap);
+
static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
struct rtable *rt, __be16 df)
{
@@ -529,7 +635,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
}
void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
- const struct iphdr *tnl_params, const u8 protocol)
+ const struct iphdr *tnl_params, u8 protocol)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *inner_iph;
@@ -610,7 +716,10 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
- rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL;
+ if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
+ goto tx_error;
+
+ rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
if (!rt) {
rt = ip_route_output_key(tunnel->net, &fl4);
@@ -620,7 +729,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
if (connected)
- tunnel_dst_set(tunnel, &rt->dst);
+ tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
}
if (rt->dst.dev == dev) {
@@ -663,7 +772,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
df |= (inner_iph->frag_off&htons(IP_DF));
max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
- + rt->dst.header_len;
+ + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
if (max_headroom > dev->needed_headroom)
dev->needed_headroom = max_headroom;
@@ -757,9 +866,14 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
- if (!t && (cmd == SIOCADDTUNNEL)) {
- t = ip_tunnel_create(net, itn, p);
- err = PTR_ERR_OR_ZERO(t);
+ if (cmd == SIOCADDTUNNEL) {
+ if (!t) {
+ t = ip_tunnel_create(net, itn, p);
+ err = PTR_ERR_OR_ZERO(t);
+ break;
+ }
+
+ err = -EEXIST;
break;
}
if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index b8960f3527f3..3e861011e4a3 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -364,7 +364,7 @@ static int vti_tunnel_init(struct net_device *dev)
dev->iflink = 0;
dev->addr_len = 4;
dev->features |= NETIF_F_LLTX;
- dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+ netif_keep_dst(dev);
return ip_tunnel_init(dev);
}
@@ -534,40 +534,28 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = {
static int __init vti_init(void)
{
+ const char *msg;
int err;
- pr_info("IPv4 over IPSec tunneling driver\n");
+ pr_info("IPv4 over IPsec tunneling driver\n");
+ msg = "tunnel device";
err = register_pernet_device(&vti_net_ops);
if (err < 0)
- return err;
- err = xfrm4_protocol_register(&vti_esp4_protocol, IPPROTO_ESP);
- if (err < 0) {
- unregister_pernet_device(&vti_net_ops);
- pr_info("vti init: can't register tunnel\n");
-
- return err;
- }
+ goto pernet_dev_failed;
+ msg = "tunnel protocols";
+ err = xfrm4_protocol_register(&vti_esp4_protocol, IPPROTO_ESP);
+ if (err < 0)
+ goto xfrm_proto_esp_failed;
err = xfrm4_protocol_register(&vti_ah4_protocol, IPPROTO_AH);
- if (err < 0) {
- xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
- unregister_pernet_device(&vti_net_ops);
- pr_info("vti init: can't register tunnel\n");
-
- return err;
- }
-
+ if (err < 0)
+ goto xfrm_proto_ah_failed;
err = xfrm4_protocol_register(&vti_ipcomp4_protocol, IPPROTO_COMP);
- if (err < 0) {
- xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
- xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
- unregister_pernet_device(&vti_net_ops);
- pr_info("vti init: can't register tunnel\n");
-
- return err;
- }
+ if (err < 0)
+ goto xfrm_proto_comp_failed;
+ msg = "netlink interface";
err = rtnl_link_register(&vti_link_ops);
if (err < 0)
goto rtnl_link_failed;
@@ -576,23 +564,23 @@ static int __init vti_init(void)
rtnl_link_failed:
xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
+xfrm_proto_comp_failed:
xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
+xfrm_proto_ah_failed:
xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
+xfrm_proto_esp_failed:
unregister_pernet_device(&vti_net_ops);
+pernet_dev_failed:
+ pr_err("vti init: failed to register %s\n", msg);
return err;
}
static void __exit vti_fini(void)
{
rtnl_link_unregister(&vti_link_ops);
- if (xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP))
- pr_info("vti close: can't deregister tunnel\n");
- if (xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH))
- pr_info("vti close: can't deregister tunnel\n");
- if (xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP))
- pr_info("vti close: can't deregister tunnel\n");
-
-
+ xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
+ xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
+ xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
unregister_pernet_device(&vti_net_ops);
}
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index b3e86ea7b71b..648fa1490ea7 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -143,8 +143,6 @@ __be32 ic_servaddr = NONE; /* Boot server IP address */
__be32 root_server_addr = NONE; /* Address of NFS server */
u8 root_server_path[256] = { 0, }; /* Path to mount as root */
-__be32 ic_dev_xid; /* Device under configuration */
-
/* vendor class identifier */
static char vendor_class_identifier[253] __initdata;
@@ -264,7 +262,8 @@ static int __init ic_open_devs(void)
/* wait for a carrier on at least one device */
start = jiffies;
next_msg = start + msecs_to_jiffies(CONF_CARRIER_TIMEOUT/12);
- while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) {
+ while (time_before(jiffies, start +
+ msecs_to_jiffies(CONF_CARRIER_TIMEOUT))) {
int wait, elapsed;
for_each_netdev(&init_net, dev)
@@ -654,6 +653,7 @@ static struct packet_type bootp_packet_type __initdata = {
.func = ic_bootp_recv,
};
+static __be32 ic_dev_xid; /* Device under configuration */
/*
* Initialize DHCP/BOOTP extension fields in the request.
@@ -1218,10 +1218,10 @@ static int __init ic_dynamic(void)
get_random_bytes(&timeout, sizeof(timeout));
timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned int) CONF_TIMEOUT_RANDOM);
for (;;) {
+#ifdef IPCONFIG_BOOTP
/* Track the device we are configuring */
ic_dev_xid = d->xid;
-#ifdef IPCONFIG_BOOTP
if (do_bootp && (d->able & IC_BOOTP))
ic_bootp_send_if(d, jiffies - start_jiffies);
#endif
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 62eaa005e146..37096d64730e 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -224,6 +224,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (IS_ERR(skb))
goto out;
+ skb_set_inner_ipproto(skb, IPPROTO_IPIP);
+
ip_tunnel_xmit(skb, dev, tiph, tiph->protocol);
return NETDEV_TX_OK;
@@ -287,7 +289,7 @@ static void ipip_tunnel_setup(struct net_device *dev)
dev->iflink = 0;
dev->addr_len = 4;
dev->features |= NETIF_F_LLTX;
- dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+ netif_keep_dst(dev);
dev->features |= IPIP_FEATURES;
dev->hw_features |= IPIP_FEATURES;
@@ -301,7 +303,8 @@ static int ipip_tunnel_init(struct net_device *dev)
memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
- tunnel->hlen = 0;
+ tunnel->tun_hlen = 0;
+ tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
tunnel->parms.iph.protocol = IPPROTO_IPIP;
return ip_tunnel_init(dev);
}
@@ -340,10 +343,53 @@ static void ipip_netlink_parms(struct nlattr *data[],
parms->iph.frag_off = htons(IP_DF);
}
+/* This function returns true when ENCAP attributes are present in the nl msg */
+static bool ipip_netlink_encap_parms(struct nlattr *data[],
+ struct ip_tunnel_encap *ipencap)
+{
+ bool ret = false;
+
+ memset(ipencap, 0, sizeof(*ipencap));
+
+ if (!data)
+ return ret;
+
+ if (data[IFLA_IPTUN_ENCAP_TYPE]) {
+ ret = true;
+ ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
+ }
+
+ if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
+ ret = true;
+ ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
+ }
+
+ if (data[IFLA_IPTUN_ENCAP_SPORT]) {
+ ret = true;
+ ipencap->sport = nla_get_u16(data[IFLA_IPTUN_ENCAP_SPORT]);
+ }
+
+ if (data[IFLA_IPTUN_ENCAP_DPORT]) {
+ ret = true;
+ ipencap->dport = nla_get_u16(data[IFLA_IPTUN_ENCAP_DPORT]);
+ }
+
+ return ret;
+}
+
static int ipip_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
struct ip_tunnel_parm p;
+ struct ip_tunnel_encap ipencap;
+
+ if (ipip_netlink_encap_parms(data, &ipencap)) {
+ struct ip_tunnel *t = netdev_priv(dev);
+ int err = ip_tunnel_encap_setup(t, &ipencap);
+
+ if (err < 0)
+ return err;
+ }
ipip_netlink_parms(data, &p);
return ip_tunnel_newlink(dev, tb, &p);
@@ -353,6 +399,15 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[])
{
struct ip_tunnel_parm p;
+ struct ip_tunnel_encap ipencap;
+
+ if (ipip_netlink_encap_parms(data, &ipencap)) {
+ struct ip_tunnel *t = netdev_priv(dev);
+ int err = ip_tunnel_encap_setup(t, &ipencap);
+
+ if (err < 0)
+ return err;
+ }
ipip_netlink_parms(data, &p);
@@ -378,6 +433,14 @@ static size_t ipip_get_size(const struct net_device *dev)
nla_total_size(1) +
/* IFLA_IPTUN_PMTUDISC */
nla_total_size(1) +
+ /* IFLA_IPTUN_ENCAP_TYPE */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_ENCAP_FLAGS */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_ENCAP_SPORT */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_ENCAP_DPORT */
+ nla_total_size(2) +
0;
}
@@ -394,6 +457,17 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
!!(parm->iph.frag_off & htons(IP_DF))))
goto nla_put_failure;
+
+ if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
+ tunnel->encap.type) ||
+ nla_put_u16(skb, IFLA_IPTUN_ENCAP_SPORT,
+ tunnel->encap.sport) ||
+ nla_put_u16(skb, IFLA_IPTUN_ENCAP_DPORT,
+ tunnel->encap.dport) ||
+ nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
+ tunnel->encap.dport))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -407,6 +481,10 @@ static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_TTL] = { .type = NLA_U8 },
[IFLA_IPTUN_TOS] = { .type = NLA_U8 },
[IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 },
+ [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 },
+ [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
+ [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
+ [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
};
static struct rtnl_link_ops ipip_link_ops __read_mostly = {
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 65bcaa789043..c8034587859d 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -500,7 +500,7 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
else
sprintf(name, "pimreg%u", mrt->id);
- dev = alloc_netdev(0, name, reg_vif_setup);
+ dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
if (dev == NULL)
return NULL;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index a26ce035e3fa..4c019d5c3f57 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -36,6 +36,16 @@ config NF_CONNTRACK_PROC_COMPAT
If unsure, say Y.
+config NF_LOG_ARP
+ tristate "ARP packet logging"
+ default m if NETFILTER_ADVANCED=n
+ select NF_LOG_COMMON
+
+config NF_LOG_IPV4
+ tristate "IPv4 packet logging"
+ default m if NETFILTER_ADVANCED=n
+ select NF_LOG_COMMON
+
config NF_TABLES_IPV4
depends on NF_TABLES
tristate "IPv4 nf_tables support"
@@ -51,9 +61,36 @@ config NFT_CHAIN_ROUTE_IPV4
fields such as the source, destination, type of service and
the packet mark.
+config NF_REJECT_IPV4
+ tristate "IPv4 packet rejection"
+ default m if NETFILTER_ADVANCED=n
+
+config NFT_REJECT_IPV4
+ depends on NF_TABLES_IPV4
+ select NF_REJECT_IPV4
+ default NFT_REJECT
+ tristate
+
+config NF_TABLES_ARP
+ depends on NF_TABLES
+ tristate "ARP nf_tables support"
+ help
+ This option enables the ARP support for nf_tables.
+
+config NF_NAT_IPV4
+ tristate "IPv4 NAT"
+ depends on NF_CONNTRACK_IPV4
+ default m if NETFILTER_ADVANCED=n
+ select NF_NAT
+ help
+ The IPv4 NAT option allows masquerading, port forwarding and other
+ forms of full Network Address Port Translation. This can be
+ controlled by iptables or nft.
+
+if NF_NAT_IPV4
+
config NFT_CHAIN_NAT_IPV4
depends on NF_TABLES_IPV4
- depends on NF_NAT_IPV4 && NFT_NAT
tristate "IPv4 nf_tables nat chain support"
help
This option enables the "nat" chain for IPv4 in nf_tables. This
@@ -61,16 +98,54 @@ config NFT_CHAIN_NAT_IPV4
packet transformations such as the source, destination address and
source and destination ports.
-config NFT_REJECT_IPV4
+config NF_NAT_MASQUERADE_IPV4
+ tristate "IPv4 masquerade support"
+ help
+ This is the kernel functionality to provide NAT in the masquerade
+ flavour (automatic source address selection).
+
+config NFT_MASQ_IPV4
+ tristate "IPv4 masquerading support for nf_tables"
depends on NF_TABLES_IPV4
- default NFT_REJECT
+ depends on NFT_MASQ
+ select NF_NAT_MASQUERADE_IPV4
+ help
+ This is the expression that provides IPv4 masquerading support for
+ nf_tables.
+
+config NF_NAT_SNMP_BASIC
+ tristate "Basic SNMP-ALG support"
+ depends on NF_CONNTRACK_SNMP
+ depends on NETFILTER_ADVANCED
+ default NF_NAT && NF_CONNTRACK_SNMP
+ ---help---
+
+ This module implements an Application Layer Gateway (ALG) for
+ SNMP payloads. In conjunction with NAT, it allows a network
+ management system to access multiple private networks with
+ conflicting addresses. It works by modifying IP addresses
+ inside SNMP payloads to match IP-layer NAT mapping.
+
+ This is the "basic" form of SNMP-ALG, as described in RFC 2962
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NF_NAT_PROTO_GRE
tristate
+ depends on NF_CT_PROTO_GRE
-config NF_TABLES_ARP
- depends on NF_TABLES
- tristate "ARP nf_tables support"
- help
- This option enables the ARP support for nf_tables.
+config NF_NAT_PPTP
+ tristate
+ depends on NF_CONNTRACK
+ default NF_CONNTRACK_PPTP
+ select NF_NAT_PROTO_GRE
+
+config NF_NAT_H323
+ tristate
+ depends on NF_CONNTRACK
+ default NF_CONNTRACK_H323
+
+endif # NF_NAT_IPV4
config IP_NF_IPTABLES
tristate "IP tables support (required for filtering/masq/NAT)"
@@ -138,6 +213,7 @@ config IP_NF_FILTER
config IP_NF_TARGET_REJECT
tristate "REJECT target support"
depends on IP_NF_FILTER
+ select NF_REJECT_IPV4
default m if NETFILTER_ADVANCED=n
help
The REJECT target allows a filtering rule to specify that an ICMP
@@ -159,42 +235,26 @@ config IP_NF_TARGET_SYNPROXY
To compile it as a module, choose M here. If unsure, say N.
-config IP_NF_TARGET_ULOG
- tristate "ULOG target support (obsolete)"
- default m if NETFILTER_ADVANCED=n
- ---help---
-
- This option enables the old IPv4-only "ipt_ULOG" implementation
- which has been obsoleted by the new "nfnetlink_log" code (see
- CONFIG_NETFILTER_NETLINK_LOG).
-
- This option adds a `ULOG' target, which allows you to create rules in
- any iptables table. The packet is passed to a userspace logging
- daemon using netlink multicast sockets; unlike the LOG target
- which can only be viewed through syslog.
-
- The appropriate userspace logging daemon (ulogd) may be obtained from
- <http://www.netfilter.org/projects/ulogd/index.html>
-
- To compile it as a module, choose M here. If unsure, say N.
-
# NAT + specific targets: nf_conntrack
-config NF_NAT_IPV4
- tristate "IPv4 NAT"
+config IP_NF_NAT
+ tristate "iptables NAT support"
depends on NF_CONNTRACK_IPV4
default m if NETFILTER_ADVANCED=n
select NF_NAT
+ select NF_NAT_IPV4
+ select NETFILTER_XT_NAT
help
- The IPv4 NAT option allows masquerading, port forwarding and other
- forms of full Network Address Port Translation. It is controlled by
- the `nat' table in iptables: see the man page for iptables(8).
+ This enables the `nat' table in iptables. This allows masquerading,
+ port forwarding and other forms of full Network Address Port
+ Translation.
To compile it as a module, choose M here. If unsure, say N.
-if NF_NAT_IPV4
+if IP_NF_NAT
config IP_NF_TARGET_MASQUERADE
tristate "MASQUERADE target support"
+ select NF_NAT_MASQUERADE_IPV4
default m if NETFILTER_ADVANCED=n
help
Masquerading is a special case of NAT: all outgoing connections are
@@ -223,47 +283,7 @@ config IP_NF_TARGET_REDIRECT
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_TARGET_REDIRECT.
-endif
-
-config NF_NAT_SNMP_BASIC
- tristate "Basic SNMP-ALG support"
- depends on NF_CONNTRACK_SNMP && NF_NAT_IPV4
- depends on NETFILTER_ADVANCED
- default NF_NAT && NF_CONNTRACK_SNMP
- ---help---
-
- This module implements an Application Layer Gateway (ALG) for
- SNMP payloads. In conjunction with NAT, it allows a network
- management system to access multiple private networks with
- conflicting addresses. It works by modifying IP addresses
- inside SNMP payloads to match IP-layer NAT mapping.
-
- This is the "basic" form of SNMP-ALG, as described in RFC 2962
-
- To compile it as a module, choose M here. If unsure, say N.
-
-# If they want FTP, set to $CONFIG_IP_NF_NAT (m or y),
-# or $CONFIG_IP_NF_FTP (m or y), whichever is weaker.
-# From kconfig-language.txt:
-#
-# <expr> '&&' <expr> (6)
-#
-# (6) Returns the result of min(/expr/, /expr/).
-
-config NF_NAT_PROTO_GRE
- tristate
- depends on NF_NAT_IPV4 && NF_CT_PROTO_GRE
-
-config NF_NAT_PPTP
- tristate
- depends on NF_CONNTRACK && NF_NAT_IPV4
- default NF_NAT_IPV4 && NF_CONNTRACK_PPTP
- select NF_NAT_PROTO_GRE
-
-config NF_NAT_H323
- tristate
- depends on NF_CONNTRACK && NF_NAT_IPV4
- default NF_NAT_IPV4 && NF_CONNTRACK_H323
+endif # IP_NF_NAT
# mangle + specific targets
config IP_NF_MANGLE
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 90b82405331e..f4cef5af0969 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -19,10 +19,18 @@ obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
# defrag
obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o
+# logging
+obj-$(CONFIG_NF_LOG_ARP) += nf_log_arp.o
+obj-$(CONFIG_NF_LOG_IPV4) += nf_log_ipv4.o
+
+# reject
+obj-$(CONFIG_NF_REJECT_IPV4) += nf_reject_ipv4.o
+
# NAT helpers (nf_conntrack)
obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o
obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o
obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
+obj-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o
# NAT protocols (nf_nat)
obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
@@ -31,6 +39,7 @@ obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
+obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o
obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
# generic IP tables
@@ -39,7 +48,7 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
# the three instances of ip_tables
obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
-obj-$(CONFIG_NF_NAT_IPV4) += iptable_nat.o
+obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o
@@ -53,7 +62,6 @@ obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
obj-$(CONFIG_IP_NF_TARGET_SYNPROXY) += ipt_SYNPROXY.o
-obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
# generic ARP tables
obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 2510c02c2d21..e90f83a3415b 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -285,7 +285,7 @@ clusterip_hashfn(const struct sk_buff *skb,
}
/* node numbers are 1..n, not 0..n */
- return (((u64)hashval * config->num_total_nodes) >> 32) + 1;
+ return reciprocal_scale(hashval, config->num_total_nodes) + 1;
}
static inline int
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 00352ce0f0de..da7f02a0b868 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -22,6 +22,7 @@
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter/x_tables.h>
#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/ipv4/nf_nat_masquerade.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -46,103 +47,17 @@ static int masquerade_tg_check(const struct xt_tgchk_param *par)
static unsigned int
masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
- struct nf_conn *ct;
- struct nf_conn_nat *nat;
- enum ip_conntrack_info ctinfo;
- struct nf_nat_range newrange;
+ struct nf_nat_range range;
const struct nf_nat_ipv4_multi_range_compat *mr;
- const struct rtable *rt;
- __be32 newsrc, nh;
-
- NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING);
-
- ct = nf_ct_get(skb, &ctinfo);
- nat = nfct_nat(ct);
-
- NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
- ctinfo == IP_CT_RELATED_REPLY));
-
- /* Source address is 0.0.0.0 - locally generated packet that is
- * probably not supposed to be masqueraded.
- */
- if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
- return NF_ACCEPT;
mr = par->targinfo;
- rt = skb_rtable(skb);
- nh = rt_nexthop(rt, ip_hdr(skb)->daddr);
- newsrc = inet_select_addr(par->out, nh, RT_SCOPE_UNIVERSE);
- if (!newsrc) {
- pr_info("%s ate my IP address\n", par->out->name);
- return NF_DROP;
- }
-
- nat->masq_index = par->out->ifindex;
-
- /* Transfer from original range. */
- memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
- memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
- newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS;
- newrange.min_addr.ip = newsrc;
- newrange.max_addr.ip = newsrc;
- newrange.min_proto = mr->range[0].min;
- newrange.max_proto = mr->range[0].max;
+ range.flags = mr->range[0].flags;
+ range.min_proto = mr->range[0].min;
+ range.max_proto = mr->range[0].max;
- /* Hand modified range to generic setup. */
- return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
+ return nf_nat_masquerade_ipv4(skb, par->hooknum, &range, par->out);
}
-static int
-device_cmp(struct nf_conn *i, void *ifindex)
-{
- const struct nf_conn_nat *nat = nfct_nat(i);
-
- if (!nat)
- return 0;
- if (nf_ct_l3num(i) != NFPROTO_IPV4)
- return 0;
- return nat->masq_index == (int)(long)ifindex;
-}
-
-static int masq_device_event(struct notifier_block *this,
- unsigned long event,
- void *ptr)
-{
- const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct net *net = dev_net(dev);
-
- if (event == NETDEV_DOWN) {
- /* Device was downed. Search entire table for
- conntracks which were associated with that device,
- and forget them. */
- NF_CT_ASSERT(dev->ifindex != 0);
-
- nf_ct_iterate_cleanup(net, device_cmp,
- (void *)(long)dev->ifindex, 0, 0);
- }
-
- return NOTIFY_DONE;
-}
-
-static int masq_inet_event(struct notifier_block *this,
- unsigned long event,
- void *ptr)
-{
- struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
- struct netdev_notifier_info info;
-
- netdev_notifier_info_init(&info, dev);
- return masq_device_event(this, event, &info);
-}
-
-static struct notifier_block masq_dev_notifier = {
- .notifier_call = masq_device_event,
-};
-
-static struct notifier_block masq_inet_notifier = {
- .notifier_call = masq_inet_event,
-};
-
static struct xt_target masquerade_tg_reg __read_mostly = {
.name = "MASQUERADE",
.family = NFPROTO_IPV4,
@@ -160,12 +75,8 @@ static int __init masquerade_tg_init(void)
ret = xt_register_target(&masquerade_tg_reg);
- if (ret == 0) {
- /* Register for device down reports */
- register_netdevice_notifier(&masq_dev_notifier);
- /* Register IP address change reports */
- register_inetaddr_notifier(&masq_inet_notifier);
- }
+ if (ret == 0)
+ nf_nat_masquerade_ipv4_register_notifier();
return ret;
}
@@ -173,8 +84,7 @@ static int __init masquerade_tg_init(void)
static void __exit masquerade_tg_exit(void)
{
xt_unregister_target(&masquerade_tg_reg);
- unregister_netdevice_notifier(&masq_dev_notifier);
- unregister_inetaddr_notifier(&masq_inet_notifier);
+ nf_nat_masquerade_ipv4_unregister_notifier();
}
module_init(masquerade_tg_init);
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 5b6e0df4ccff..8f48f5517e33 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -20,7 +20,7 @@
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_REJECT.h>
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
#include <linux/netfilter_bridge.h>
#endif
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
deleted file mode 100644
index 9cb993cd224b..000000000000
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ /dev/null
@@ -1,498 +0,0 @@
-/*
- * netfilter module for userspace packet logging daemons
- *
- * (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
- * (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2005-2007 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This module accepts two parameters:
- *
- * nlbufsiz:
- * The parameter specifies how big the buffer for each netlink multicast
- * group is. e.g. If you say nlbufsiz=8192, up to eight kb of packets will
- * get accumulated in the kernel until they are sent to userspace. It is
- * NOT possible to allocate more than 128kB, and it is strongly discouraged,
- * because atomically allocating 128kB inside the network rx softirq is not
- * reliable. Please also keep in mind that this buffer size is allocated for
- * each nlgroup you are using, so the total kernel memory usage increases
- * by that factor.
- *
- * Actually you should use nlbufsiz a bit smaller than PAGE_SIZE, since
- * nlbufsiz is used with alloc_skb, which adds another
- * sizeof(struct skb_shared_info). Use NLMSG_GOODSIZE instead.
- *
- * flushtimeout:
- * Specify, after how many hundredths of a second the queue should be
- * flushed even if it is not full yet.
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/socket.h>
-#include <linux/slab.h>
-#include <linux/skbuff.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <net/netlink.h>
-#include <linux/netdevice.h>
-#include <linux/mm.h>
-#include <linux/moduleparam.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter_ipv4/ipt_ULOG.h>
-#include <net/netfilter/nf_log.h>
-#include <net/netns/generic.h>
-#include <net/sock.h>
-#include <linux/bitops.h>
-#include <asm/unaligned.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("Xtables: packet logging to netlink using ULOG");
-MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG);
-
-#define ULOG_NL_EVENT 111 /* Harald's favorite number */
-#define ULOG_MAXNLGROUPS 32 /* numer of nlgroups */
-
-static unsigned int nlbufsiz = NLMSG_GOODSIZE;
-module_param(nlbufsiz, uint, 0400);
-MODULE_PARM_DESC(nlbufsiz, "netlink buffer size");
-
-static unsigned int flushtimeout = 10;
-module_param(flushtimeout, uint, 0600);
-MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)");
-
-static bool nflog = true;
-module_param(nflog, bool, 0400);
-MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
-
-/* global data structures */
-
-typedef struct {
- unsigned int qlen; /* number of nlmsgs' in the skb */
- struct nlmsghdr *lastnlh; /* netlink header of last msg in skb */
- struct sk_buff *skb; /* the pre-allocated skb */
- struct timer_list timer; /* the timer function */
-} ulog_buff_t;
-
-static int ulog_net_id __read_mostly;
-struct ulog_net {
- unsigned int nlgroup[ULOG_MAXNLGROUPS];
- ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS];
- struct sock *nflognl;
- spinlock_t lock;
-};
-
-static struct ulog_net *ulog_pernet(struct net *net)
-{
- return net_generic(net, ulog_net_id);
-}
-
-/* send one ulog_buff_t to userspace */
-static void ulog_send(struct ulog_net *ulog, unsigned int nlgroupnum)
-{
- ulog_buff_t *ub = &ulog->ulog_buffers[nlgroupnum];
-
- pr_debug("ulog_send: timer is deleting\n");
- del_timer(&ub->timer);
-
- if (!ub->skb) {
- pr_debug("ulog_send: nothing to send\n");
- return;
- }
-
- /* last nlmsg needs NLMSG_DONE */
- if (ub->qlen > 1)
- ub->lastnlh->nlmsg_type = NLMSG_DONE;
-
- NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1;
- pr_debug("throwing %d packets to netlink group %u\n",
- ub->qlen, nlgroupnum + 1);
- netlink_broadcast(ulog->nflognl, ub->skb, 0, nlgroupnum + 1,
- GFP_ATOMIC);
-
- ub->qlen = 0;
- ub->skb = NULL;
- ub->lastnlh = NULL;
-}
-
-
-/* timer function to flush queue in flushtimeout time */
-static void ulog_timer(unsigned long data)
-{
- unsigned int groupnum = *((unsigned int *)data);
- struct ulog_net *ulog = container_of((void *)data,
- struct ulog_net,
- nlgroup[groupnum]);
- pr_debug("timer function called, calling ulog_send\n");
-
- /* lock to protect against somebody modifying our structure
- * from ipt_ulog_target at the same time */
- spin_lock_bh(&ulog->lock);
- ulog_send(ulog, groupnum);
- spin_unlock_bh(&ulog->lock);
-}
-
-static struct sk_buff *ulog_alloc_skb(unsigned int size)
-{
- struct sk_buff *skb;
- unsigned int n;
-
- /* alloc skb which should be big enough for a whole
- * multipart message. WARNING: has to be <= 131000
- * due to slab allocator restrictions */
-
- n = max(size, nlbufsiz);
- skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN);
- if (!skb) {
- if (n > size) {
- /* try to allocate only as much as we need for
- * current packet */
-
- skb = alloc_skb(size, GFP_ATOMIC);
- if (!skb)
- pr_debug("cannot even allocate %ub\n", size);
- }
- }
-
- return skb;
-}
-
-static void ipt_ulog_packet(struct net *net,
- unsigned int hooknum,
- const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const struct ipt_ulog_info *loginfo,
- const char *prefix)
-{
- ulog_buff_t *ub;
- ulog_packet_msg_t *pm;
- size_t size, copy_len;
- struct nlmsghdr *nlh;
- struct timeval tv;
- struct ulog_net *ulog = ulog_pernet(net);
-
- /* ffs == find first bit set, necessary because userspace
- * is already shifting groupnumber, but we need unshifted.
- * ffs() returns [1..32], we need [0..31] */
- unsigned int groupnum = ffs(loginfo->nl_group) - 1;
-
- /* calculate the size of the skb needed */
- if (loginfo->copy_range == 0 || loginfo->copy_range > skb->len)
- copy_len = skb->len;
- else
- copy_len = loginfo->copy_range;
-
- size = nlmsg_total_size(sizeof(*pm) + copy_len);
-
- ub = &ulog->ulog_buffers[groupnum];
-
- spin_lock_bh(&ulog->lock);
-
- if (!ub->skb) {
- if (!(ub->skb = ulog_alloc_skb(size)))
- goto alloc_failure;
- } else if (ub->qlen >= loginfo->qthreshold ||
- size > skb_tailroom(ub->skb)) {
- /* either the queue len is too high or we don't have
- * enough room in nlskb left. send it to userspace. */
-
- ulog_send(ulog, groupnum);
-
- if (!(ub->skb = ulog_alloc_skb(size)))
- goto alloc_failure;
- }
-
- pr_debug("qlen %d, qthreshold %Zu\n", ub->qlen, loginfo->qthreshold);
-
- nlh = nlmsg_put(ub->skb, 0, ub->qlen, ULOG_NL_EVENT,
- sizeof(*pm)+copy_len, 0);
- if (!nlh) {
- pr_debug("error during nlmsg_put\n");
- goto out_unlock;
- }
- ub->qlen++;
-
- pm = nlmsg_data(nlh);
- memset(pm, 0, sizeof(*pm));
-
- /* We might not have a timestamp, get one */
- if (skb->tstamp.tv64 == 0)
- __net_timestamp((struct sk_buff *)skb);
-
- /* copy hook, prefix, timestamp, payload, etc. */
- pm->data_len = copy_len;
- tv = ktime_to_timeval(skb->tstamp);
- put_unaligned(tv.tv_sec, &pm->timestamp_sec);
- put_unaligned(tv.tv_usec, &pm->timestamp_usec);
- put_unaligned(skb->mark, &pm->mark);
- pm->hook = hooknum;
- if (prefix != NULL) {
- strncpy(pm->prefix, prefix, sizeof(pm->prefix) - 1);
- pm->prefix[sizeof(pm->prefix) - 1] = '\0';
- }
- else if (loginfo->prefix[0] != '\0')
- strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix));
-
- if (in && in->hard_header_len > 0 &&
- skb->mac_header != skb->network_header &&
- in->hard_header_len <= ULOG_MAC_LEN) {
- memcpy(pm->mac, skb_mac_header(skb), in->hard_header_len);
- pm->mac_len = in->hard_header_len;
- } else
- pm->mac_len = 0;
-
- if (in)
- strncpy(pm->indev_name, in->name, sizeof(pm->indev_name));
-
- if (out)
- strncpy(pm->outdev_name, out->name, sizeof(pm->outdev_name));
-
- /* copy_len <= skb->len, so can't fail. */
- if (skb_copy_bits(skb, 0, pm->payload, copy_len) < 0)
- BUG();
-
- /* check if we are building multi-part messages */
- if (ub->qlen > 1)
- ub->lastnlh->nlmsg_flags |= NLM_F_MULTI;
-
- ub->lastnlh = nlh;
-
- /* if timer isn't already running, start it */
- if (!timer_pending(&ub->timer)) {
- ub->timer.expires = jiffies + flushtimeout * HZ / 100;
- add_timer(&ub->timer);
- }
-
- /* if threshold is reached, send message to userspace */
- if (ub->qlen >= loginfo->qthreshold) {
- if (loginfo->qthreshold > 1)
- nlh->nlmsg_type = NLMSG_DONE;
- ulog_send(ulog, groupnum);
- }
-out_unlock:
- spin_unlock_bh(&ulog->lock);
-
- return;
-
-alloc_failure:
- pr_debug("Error building netlink message\n");
- spin_unlock_bh(&ulog->lock);
-}
-
-static unsigned int
-ulog_tg(struct sk_buff *skb, const struct xt_action_param *par)
-{
- struct net *net = dev_net(par->in ? par->in : par->out);
-
- ipt_ulog_packet(net, par->hooknum, skb, par->in, par->out,
- par->targinfo, NULL);
- return XT_CONTINUE;
-}
-
-static void ipt_logfn(struct net *net,
- u_int8_t pf,
- unsigned int hooknum,
- const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const struct nf_loginfo *li,
- const char *prefix)
-{
- struct ipt_ulog_info loginfo;
-
- if (!li || li->type != NF_LOG_TYPE_ULOG) {
- loginfo.nl_group = ULOG_DEFAULT_NLGROUP;
- loginfo.copy_range = 0;
- loginfo.qthreshold = ULOG_DEFAULT_QTHRESHOLD;
- loginfo.prefix[0] = '\0';
- } else {
- loginfo.nl_group = li->u.ulog.group;
- loginfo.copy_range = li->u.ulog.copy_len;
- loginfo.qthreshold = li->u.ulog.qthreshold;
- strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix));
- }
-
- ipt_ulog_packet(net, hooknum, skb, in, out, &loginfo, prefix);
-}
-
-static int ulog_tg_check(const struct xt_tgchk_param *par)
-{
- const struct ipt_ulog_info *loginfo = par->targinfo;
-
- if (!par->net->xt.ulog_warn_deprecated) {
- pr_info("ULOG is deprecated and it will be removed soon, "
- "use NFLOG instead\n");
- par->net->xt.ulog_warn_deprecated = true;
- }
-
- if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') {
- pr_debug("prefix not null-terminated\n");
- return -EINVAL;
- }
- if (loginfo->qthreshold > ULOG_MAX_QLEN) {
- pr_debug("queue threshold %Zu > MAX_QLEN\n",
- loginfo->qthreshold);
- return -EINVAL;
- }
- return 0;
-}
-
-#ifdef CONFIG_COMPAT
-struct compat_ipt_ulog_info {
- compat_uint_t nl_group;
- compat_size_t copy_range;
- compat_size_t qthreshold;
- char prefix[ULOG_PREFIX_LEN];
-};
-
-static void ulog_tg_compat_from_user(void *dst, const void *src)
-{
- const struct compat_ipt_ulog_info *cl = src;
- struct ipt_ulog_info l = {
- .nl_group = cl->nl_group,
- .copy_range = cl->copy_range,
- .qthreshold = cl->qthreshold,
- };
-
- memcpy(l.prefix, cl->prefix, sizeof(l.prefix));
- memcpy(dst, &l, sizeof(l));
-}
-
-static int ulog_tg_compat_to_user(void __user *dst, const void *src)
-{
- const struct ipt_ulog_info *l = src;
- struct compat_ipt_ulog_info cl = {
- .nl_group = l->nl_group,
- .copy_range = l->copy_range,
- .qthreshold = l->qthreshold,
- };
-
- memcpy(cl.prefix, l->prefix, sizeof(cl.prefix));
- return copy_to_user(dst, &cl, sizeof(cl)) ? -EFAULT : 0;
-}
-#endif /* CONFIG_COMPAT */
-
-static struct xt_target ulog_tg_reg __read_mostly = {
- .name = "ULOG",
- .family = NFPROTO_IPV4,
- .target = ulog_tg,
- .targetsize = sizeof(struct ipt_ulog_info),
- .checkentry = ulog_tg_check,
-#ifdef CONFIG_COMPAT
- .compatsize = sizeof(struct compat_ipt_ulog_info),
- .compat_from_user = ulog_tg_compat_from_user,
- .compat_to_user = ulog_tg_compat_to_user,
-#endif
- .me = THIS_MODULE,
-};
-
-static struct nf_logger ipt_ulog_logger __read_mostly = {
- .name = "ipt_ULOG",
- .logfn = ipt_logfn,
- .me = THIS_MODULE,
-};
-
-static int __net_init ulog_tg_net_init(struct net *net)
-{
- int i;
- struct ulog_net *ulog = ulog_pernet(net);
- struct netlink_kernel_cfg cfg = {
- .groups = ULOG_MAXNLGROUPS,
- };
-
- spin_lock_init(&ulog->lock);
- /* initialize ulog_buffers */
- for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
- ulog->nlgroup[i] = i;
- setup_timer(&ulog->ulog_buffers[i].timer, ulog_timer,
- (unsigned long)&ulog->nlgroup[i]);
- }
-
- ulog->nflognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg);
- if (!ulog->nflognl)
- return -ENOMEM;
-
- if (nflog)
- nf_log_set(net, NFPROTO_IPV4, &ipt_ulog_logger);
-
- return 0;
-}
-
-static void __net_exit ulog_tg_net_exit(struct net *net)
-{
- ulog_buff_t *ub;
- int i;
- struct ulog_net *ulog = ulog_pernet(net);
-
- if (nflog)
- nf_log_unset(net, &ipt_ulog_logger);
-
- netlink_kernel_release(ulog->nflognl);
-
- /* remove pending timers and free allocated skb's */
- for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
- ub = &ulog->ulog_buffers[i];
- pr_debug("timer is deleting\n");
- del_timer(&ub->timer);
-
- if (ub->skb) {
- kfree_skb(ub->skb);
- ub->skb = NULL;
- }
- }
-}
-
-static struct pernet_operations ulog_tg_net_ops = {
- .init = ulog_tg_net_init,
- .exit = ulog_tg_net_exit,
- .id = &ulog_net_id,
- .size = sizeof(struct ulog_net),
-};
-
-static int __init ulog_tg_init(void)
-{
- int ret;
- pr_debug("init module\n");
-
- if (nlbufsiz > 128*1024) {
- pr_warn("Netlink buffer has to be <= 128kB\n");
- return -EINVAL;
- }
-
- ret = register_pernet_subsys(&ulog_tg_net_ops);
- if (ret)
- goto out_pernet;
-
- ret = xt_register_target(&ulog_tg_reg);
- if (ret < 0)
- goto out_target;
-
- if (nflog)
- nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger);
-
- return 0;
-
-out_target:
- unregister_pernet_subsys(&ulog_tg_net_ops);
-out_pernet:
- return ret;
-}
-
-static void __exit ulog_tg_exit(void)
-{
- pr_debug("cleanup_module\n");
- if (nflog)
- nf_log_unregister(&ipt_ulog_logger);
- xt_unregister_target(&ulog_tg_reg);
- unregister_pernet_subsys(&ulog_tg_net_ops);
-}
-
-module_init(ulog_tg_init);
-module_exit(ulog_tg_exit);
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index f1787c04a4dd..6b67d7e9a75d 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -28,222 +28,57 @@ static const struct xt_table nf_nat_ipv4_table = {
.af = NFPROTO_IPV4,
};
-static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
-{
- /* Force range to this IP; let proto decide mapping for
- * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
- */
- struct nf_nat_range range;
-
- range.flags = 0;
- pr_debug("Allocating NULL binding for %p (%pI4)\n", ct,
- HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip :
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
-
- return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
-}
-
-static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
- const struct net_device *in,
- const struct net_device *out,
- struct nf_conn *ct)
+static unsigned int iptable_nat_do_chain(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
- unsigned int ret;
- ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table);
- if (ret == NF_ACCEPT) {
- if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
- ret = alloc_null_binding(ct, hooknum);
- }
- return ret;
+ return ipt_do_table(skb, ops->hooknum, in, out, net->ipv4.nat_table);
}
-static unsigned int
-nf_nat_ipv4_fn(const struct nf_hook_ops *ops,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int iptable_nat_ipv4_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
- struct nf_conn_nat *nat;
- /* maniptype == SRC for postrouting. */
- enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
-
- /* We never see fragments: conntrack defrags on pre-routing
- * and local-out, and nf_nat_out protects post-routing.
- */
- NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
-
- ct = nf_ct_get(skb, &ctinfo);
- /* Can't track? It's not due to stress, or conntrack would
- * have dropped it. Hence it's the user's responsibilty to
- * packet filter it out, or implement conntrack/NAT for that
- * protocol. 8) --RR
- */
- if (!ct)
- return NF_ACCEPT;
-
- /* Don't try to NAT if this packet is not conntracked */
- if (nf_ct_is_untracked(ct))
- return NF_ACCEPT;
-
- nat = nf_ct_nat_ext_add(ct);
- if (nat == NULL)
- return NF_ACCEPT;
-
- switch (ctinfo) {
- case IP_CT_RELATED:
- case IP_CT_RELATED_REPLY:
- if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
- if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
- ops->hooknum))
- return NF_DROP;
- else
- return NF_ACCEPT;
- }
- /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
- case IP_CT_NEW:
- /* Seen it before? This can happen for loopback, retrans,
- * or local packets.
- */
- if (!nf_nat_initialized(ct, maniptype)) {
- unsigned int ret;
-
- ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct);
- if (ret != NF_ACCEPT)
- return ret;
- } else {
- pr_debug("Already setup manip %s for ct %p\n",
- maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
- ct);
- if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
- goto oif_changed;
- }
- break;
-
- default:
- /* ESTABLISHED */
- NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
- ctinfo == IP_CT_ESTABLISHED_REPLY);
- if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
- goto oif_changed;
- }
-
- return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
-
-oif_changed:
- nf_ct_kill_acct(ct, ctinfo, skb);
- return NF_DROP;
+ return nf_nat_ipv4_fn(ops, skb, in, out, iptable_nat_do_chain);
}
-static unsigned int
-nf_nat_ipv4_in(const struct nf_hook_ops *ops,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int iptable_nat_ipv4_in(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- unsigned int ret;
- __be32 daddr = ip_hdr(skb)->daddr;
-
- ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
- if (ret != NF_DROP && ret != NF_STOLEN &&
- daddr != ip_hdr(skb)->daddr)
- skb_dst_drop(skb);
-
- return ret;
+ return nf_nat_ipv4_in(ops, skb, in, out, iptable_nat_do_chain);
}
-static unsigned int
-nf_nat_ipv4_out(const struct nf_hook_ops *ops,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int iptable_nat_ipv4_out(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
-#ifdef CONFIG_XFRM
- const struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
- int err;
-#endif
- unsigned int ret;
-
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr))
- return NF_ACCEPT;
-
- ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
-#ifdef CONFIG_XFRM
- if (ret != NF_DROP && ret != NF_STOLEN &&
- !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
- (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
- if ((ct->tuplehash[dir].tuple.src.u3.ip !=
- ct->tuplehash[!dir].tuple.dst.u3.ip) ||
- (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
- ct->tuplehash[dir].tuple.src.u.all !=
- ct->tuplehash[!dir].tuple.dst.u.all)) {
- err = nf_xfrm_me_harder(skb, AF_INET);
- if (err < 0)
- ret = NF_DROP_ERR(err);
- }
- }
-#endif
- return ret;
+ return nf_nat_ipv4_out(ops, skb, in, out, iptable_nat_do_chain);
}
-static unsigned int
-nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int iptable_nat_ipv4_local_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- const struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
- unsigned int ret;
- int err;
-
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr))
- return NF_ACCEPT;
-
- ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
- if (ret != NF_DROP && ret != NF_STOLEN &&
- (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
- if (ct->tuplehash[dir].tuple.dst.u3.ip !=
- ct->tuplehash[!dir].tuple.src.u3.ip) {
- err = ip_route_me_harder(skb, RTN_UNSPEC);
- if (err < 0)
- ret = NF_DROP_ERR(err);
- }
-#ifdef CONFIG_XFRM
- else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
- ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
- ct->tuplehash[dir].tuple.dst.u.all !=
- ct->tuplehash[!dir].tuple.src.u.all) {
- err = nf_xfrm_me_harder(skb, AF_INET);
- if (err < 0)
- ret = NF_DROP_ERR(err);
- }
-#endif
- }
- return ret;
+ return nf_nat_ipv4_local_fn(ops, skb, in, out, iptable_nat_do_chain);
}
static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
/* Before packet filtering, change destination */
{
- .hook = nf_nat_ipv4_in,
+ .hook = iptable_nat_ipv4_in,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
@@ -251,7 +86,7 @@ static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
},
/* After packet filtering, change source */
{
- .hook = nf_nat_ipv4_out,
+ .hook = iptable_nat_ipv4_out,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_POST_ROUTING,
@@ -259,7 +94,7 @@ static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
},
/* Before packet filtering, change destination */
{
- .hook = nf_nat_ipv4_local_fn,
+ .hook = iptable_nat_ipv4_local_fn,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
@@ -267,7 +102,7 @@ static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
},
/* After packet filtering, change source */
{
- .hook = nf_nat_ipv4_fn,
+ .hook = iptable_nat_ipv4_fn,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 8127dc802865..a054fe083431 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -314,7 +314,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
return -ENOENT;
}
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
@@ -358,7 +358,7 @@ static struct nf_sockopt_ops so_getorigdst = {
.pf = PF_INET,
.get_optmin = SO_ORIGINAL_DST,
.get_optmax = SO_ORIGINAL_DST+1,
- .get = &getorigdst,
+ .get = getorigdst,
.owner = THIS_MODULE,
};
@@ -388,7 +388,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
.invert_tuple = ipv4_invert_tuple,
.print_tuple = ipv4_print_tuple,
.get_l4proto = ipv4_get_l4proto,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = ipv4_tuple_to_nlattr,
.nlattr_tuple_size = ipv4_nlattr_tuple_size,
.nlattr_to_tuple = ipv4_nlattr_to_tuple,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index a338dad41b7d..b91b2641adda 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -226,7 +226,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
return icmp_error_message(net, tmpl, skb, ctinfo, hooknum);
}
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
@@ -408,7 +408,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
.error = icmp_error,
.destroy = NULL,
.me = NULL,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = icmp_tuple_to_nlattr,
.nlattr_tuple_size = icmp_nlattr_tuple_size,
.nlattr_to_tuple = icmp_nlattr_to_tuple,
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index b8f6381c7d0b..7e5ca6f2d0cd 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -17,7 +17,7 @@
#include <linux/netfilter_bridge.h>
#include <linux/netfilter_ipv4.h>
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <net/netfilter/nf_conntrack.h>
#endif
#include <net/netfilter/nf_conntrack_zones.h>
@@ -45,12 +45,12 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
{
u16 zone = NF_CT_DEFAULT_ZONE;
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
if (skb->nfct)
zone = nf_ct_zone((struct nf_conn *)skb->nfct);
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (skb->nf_bridge &&
skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
@@ -74,8 +74,8 @@ static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops,
inet->nodefrag)
return NF_ACCEPT;
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-#if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#if !IS_ENABLED(CONFIG_NF_NAT)
/* Previously seen (loopback)? Ignore. Do this before
fragment check. */
if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c
new file mode 100644
index 000000000000..ccfc78db12ee
--- /dev/null
+++ b/net/ipv4/netfilter/nf_log_arp.c
@@ -0,0 +1,149 @@
+/*
+ * (C) 2014 by Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * Based on code from ebt_log from:
+ *
+ * Bart De Schuymer <bdschuym@pandora.be>
+ * Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/if_arp.h>
+#include <linux/ip.h>
+#include <net/route.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/xt_LOG.h>
+#include <net/netfilter/nf_log.h>
+
+static struct nf_loginfo default_loginfo = {
+ .type = NF_LOG_TYPE_LOG,
+ .u = {
+ .log = {
+ .level = 5,
+ .logflags = NF_LOG_MASK,
+ },
+ },
+};
+
+struct arppayload {
+ unsigned char mac_src[ETH_ALEN];
+ unsigned char ip_src[4];
+ unsigned char mac_dst[ETH_ALEN];
+ unsigned char ip_dst[4];
+};
+
+static void dump_arp_packet(struct nf_log_buf *m,
+ const struct nf_loginfo *info,
+ const struct sk_buff *skb, unsigned int nhoff)
+{
+ const struct arphdr *ah;
+ struct arphdr _arph;
+ const struct arppayload *ap;
+ struct arppayload _arpp;
+
+ ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
+ if (ah == NULL) {
+ nf_log_buf_add(m, "TRUNCATED");
+ return;
+ }
+ nf_log_buf_add(m, "ARP HTYPE=%d PTYPE=0x%04x OPCODE=%d",
+ ntohs(ah->ar_hrd), ntohs(ah->ar_pro), ntohs(ah->ar_op));
+
+ /* If it's for Ethernet and the lengths are OK, then log the ARP
+ * payload.
+ */
+ if (ah->ar_hrd != htons(1) ||
+ ah->ar_hln != ETH_ALEN ||
+ ah->ar_pln != sizeof(__be32))
+ return;
+
+ ap = skb_header_pointer(skb, sizeof(_arph), sizeof(_arpp), &_arpp);
+ if (ap == NULL) {
+ nf_log_buf_add(m, " INCOMPLETE [%Zu bytes]",
+ skb->len - sizeof(_arph));
+ return;
+ }
+ nf_log_buf_add(m, " MACSRC=%pM IPSRC=%pI4 MACDST=%pM IPDST=%pI4",
+ ap->mac_src, ap->ip_src, ap->mac_dst, ap->ip_dst);
+}
+
+void nf_log_arp_packet(struct net *net, u_int8_t pf,
+ unsigned int hooknum, const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct nf_loginfo *loginfo,
+ const char *prefix)
+{
+ struct nf_log_buf *m;
+
+ /* FIXME: Disabled from containers until syslog ns is supported */
+ if (!net_eq(net, &init_net))
+ return;
+
+ m = nf_log_buf_open();
+
+ if (!loginfo)
+ loginfo = &default_loginfo;
+
+ nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo,
+ prefix);
+ dump_arp_packet(m, loginfo, skb, 0);
+
+ nf_log_buf_close(m);
+}
+
+static struct nf_logger nf_arp_logger __read_mostly = {
+ .name = "nf_log_arp",
+ .type = NF_LOG_TYPE_LOG,
+ .logfn = nf_log_arp_packet,
+ .me = THIS_MODULE,
+};
+
+static int __net_init nf_log_arp_net_init(struct net *net)
+{
+ nf_log_set(net, NFPROTO_ARP, &nf_arp_logger);
+ return 0;
+}
+
+static void __net_exit nf_log_arp_net_exit(struct net *net)
+{
+ nf_log_unset(net, &nf_arp_logger);
+}
+
+static struct pernet_operations nf_log_arp_net_ops = {
+ .init = nf_log_arp_net_init,
+ .exit = nf_log_arp_net_exit,
+};
+
+static int __init nf_log_arp_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&nf_log_arp_net_ops);
+ if (ret < 0)
+ return ret;
+
+ nf_log_register(NFPROTO_ARP, &nf_arp_logger);
+ return 0;
+}
+
+static void __exit nf_log_arp_exit(void)
+{
+ unregister_pernet_subsys(&nf_log_arp_net_ops);
+ nf_log_unregister(&nf_arp_logger);
+}
+
+module_init(nf_log_arp_init);
+module_exit(nf_log_arp_exit);
+
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("Netfilter ARP packet logging");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NF_LOGGER(3, 0);
diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c
new file mode 100644
index 000000000000..078bdca1b607
--- /dev/null
+++ b/net/ipv4/netfilter/nf_log_ipv4.c
@@ -0,0 +1,385 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/if_arp.h>
+#include <linux/ip.h>
+#include <net/ipv6.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/tcp.h>
+#include <net/route.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/xt_LOG.h>
+#include <net/netfilter/nf_log.h>
+
+static struct nf_loginfo default_loginfo = {
+ .type = NF_LOG_TYPE_LOG,
+ .u = {
+ .log = {
+ .level = 5,
+ .logflags = NF_LOG_MASK,
+ },
+ },
+};
+
+/* One level of recursion won't kill us */
+static void dump_ipv4_packet(struct nf_log_buf *m,
+ const struct nf_loginfo *info,
+ const struct sk_buff *skb, unsigned int iphoff)
+{
+ struct iphdr _iph;
+ const struct iphdr *ih;
+ unsigned int logflags;
+
+ if (info->type == NF_LOG_TYPE_LOG)
+ logflags = info->u.log.logflags;
+ else
+ logflags = NF_LOG_MASK;
+
+ ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
+ if (ih == NULL) {
+ nf_log_buf_add(m, "TRUNCATED");
+ return;
+ }
+
+ /* Important fields:
+ * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */
+ /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */
+ nf_log_buf_add(m, "SRC=%pI4 DST=%pI4 ", &ih->saddr, &ih->daddr);
+
+ /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
+ nf_log_buf_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
+ ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK,
+ ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
+
+ /* Max length: 6 "CE DF MF " */
+ if (ntohs(ih->frag_off) & IP_CE)
+ nf_log_buf_add(m, "CE ");
+ if (ntohs(ih->frag_off) & IP_DF)
+ nf_log_buf_add(m, "DF ");
+ if (ntohs(ih->frag_off) & IP_MF)
+ nf_log_buf_add(m, "MF ");
+
+ /* Max length: 11 "FRAG:65535 " */
+ if (ntohs(ih->frag_off) & IP_OFFSET)
+ nf_log_buf_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
+
+ if ((logflags & XT_LOG_IPOPT) &&
+ ih->ihl * 4 > sizeof(struct iphdr)) {
+ const unsigned char *op;
+ unsigned char _opt[4 * 15 - sizeof(struct iphdr)];
+ unsigned int i, optsize;
+
+ optsize = ih->ihl * 4 - sizeof(struct iphdr);
+ op = skb_header_pointer(skb, iphoff+sizeof(_iph),
+ optsize, _opt);
+ if (op == NULL) {
+ nf_log_buf_add(m, "TRUNCATED");
+ return;
+ }
+
+ /* Max length: 127 "OPT (" 15*4*2chars ") " */
+ nf_log_buf_add(m, "OPT (");
+ for (i = 0; i < optsize; i++)
+ nf_log_buf_add(m, "%02X", op[i]);
+ nf_log_buf_add(m, ") ");
+ }
+
+ switch (ih->protocol) {
+ case IPPROTO_TCP:
+ if (nf_log_dump_tcp_header(m, skb, ih->protocol,
+ ntohs(ih->frag_off) & IP_OFFSET,
+ iphoff+ih->ihl*4, logflags))
+ return;
+ break;
+ case IPPROTO_UDP:
+ case IPPROTO_UDPLITE:
+ if (nf_log_dump_udp_header(m, skb, ih->protocol,
+ ntohs(ih->frag_off) & IP_OFFSET,
+ iphoff+ih->ihl*4))
+ return;
+ break;
+ case IPPROTO_ICMP: {
+ struct icmphdr _icmph;
+ const struct icmphdr *ich;
+ static const size_t required_len[NR_ICMP_TYPES+1]
+ = { [ICMP_ECHOREPLY] = 4,
+ [ICMP_DEST_UNREACH]
+ = 8 + sizeof(struct iphdr),
+ [ICMP_SOURCE_QUENCH]
+ = 8 + sizeof(struct iphdr),
+ [ICMP_REDIRECT]
+ = 8 + sizeof(struct iphdr),
+ [ICMP_ECHO] = 4,
+ [ICMP_TIME_EXCEEDED]
+ = 8 + sizeof(struct iphdr),
+ [ICMP_PARAMETERPROB]
+ = 8 + sizeof(struct iphdr),
+ [ICMP_TIMESTAMP] = 20,
+ [ICMP_TIMESTAMPREPLY] = 20,
+ [ICMP_ADDRESS] = 12,
+ [ICMP_ADDRESSREPLY] = 12 };
+
+ /* Max length: 11 "PROTO=ICMP " */
+ nf_log_buf_add(m, "PROTO=ICMP ");
+
+ if (ntohs(ih->frag_off) & IP_OFFSET)
+ break;
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ ich = skb_header_pointer(skb, iphoff + ih->ihl * 4,
+ sizeof(_icmph), &_icmph);
+ if (ich == NULL) {
+ nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - ih->ihl*4);
+ break;
+ }
+
+ /* Max length: 18 "TYPE=255 CODE=255 " */
+ nf_log_buf_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code);
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ if (ich->type <= NR_ICMP_TYPES &&
+ required_len[ich->type] &&
+ skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) {
+ nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - ih->ihl*4);
+ break;
+ }
+
+ switch (ich->type) {
+ case ICMP_ECHOREPLY:
+ case ICMP_ECHO:
+ /* Max length: 19 "ID=65535 SEQ=65535 " */
+ nf_log_buf_add(m, "ID=%u SEQ=%u ",
+ ntohs(ich->un.echo.id),
+ ntohs(ich->un.echo.sequence));
+ break;
+
+ case ICMP_PARAMETERPROB:
+ /* Max length: 14 "PARAMETER=255 " */
+ nf_log_buf_add(m, "PARAMETER=%u ",
+ ntohl(ich->un.gateway) >> 24);
+ break;
+ case ICMP_REDIRECT:
+ /* Max length: 24 "GATEWAY=255.255.255.255 " */
+ nf_log_buf_add(m, "GATEWAY=%pI4 ", &ich->un.gateway);
+ /* Fall through */
+ case ICMP_DEST_UNREACH:
+ case ICMP_SOURCE_QUENCH:
+ case ICMP_TIME_EXCEEDED:
+ /* Max length: 3+maxlen */
+ if (!iphoff) { /* Only recurse once. */
+ nf_log_buf_add(m, "[");
+ dump_ipv4_packet(m, info, skb,
+ iphoff + ih->ihl*4+sizeof(_icmph));
+ nf_log_buf_add(m, "] ");
+ }
+
+ /* Max length: 10 "MTU=65535 " */
+ if (ich->type == ICMP_DEST_UNREACH &&
+ ich->code == ICMP_FRAG_NEEDED) {
+ nf_log_buf_add(m, "MTU=%u ",
+ ntohs(ich->un.frag.mtu));
+ }
+ }
+ break;
+ }
+ /* Max Length */
+ case IPPROTO_AH: {
+ struct ip_auth_hdr _ahdr;
+ const struct ip_auth_hdr *ah;
+
+ if (ntohs(ih->frag_off) & IP_OFFSET)
+ break;
+
+ /* Max length: 9 "PROTO=AH " */
+ nf_log_buf_add(m, "PROTO=AH ");
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ ah = skb_header_pointer(skb, iphoff+ih->ihl*4,
+ sizeof(_ahdr), &_ahdr);
+ if (ah == NULL) {
+ nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - ih->ihl*4);
+ break;
+ }
+
+ /* Length: 15 "SPI=0xF1234567 " */
+ nf_log_buf_add(m, "SPI=0x%x ", ntohl(ah->spi));
+ break;
+ }
+ case IPPROTO_ESP: {
+ struct ip_esp_hdr _esph;
+ const struct ip_esp_hdr *eh;
+
+ /* Max length: 10 "PROTO=ESP " */
+ nf_log_buf_add(m, "PROTO=ESP ");
+
+ if (ntohs(ih->frag_off) & IP_OFFSET)
+ break;
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ eh = skb_header_pointer(skb, iphoff+ih->ihl*4,
+ sizeof(_esph), &_esph);
+ if (eh == NULL) {
+ nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - ih->ihl*4);
+ break;
+ }
+
+ /* Length: 15 "SPI=0xF1234567 " */
+ nf_log_buf_add(m, "SPI=0x%x ", ntohl(eh->spi));
+ break;
+ }
+ /* Max length: 10 "PROTO 255 " */
+ default:
+ nf_log_buf_add(m, "PROTO=%u ", ih->protocol);
+ }
+
+ /* Max length: 15 "UID=4294967295 " */
+ if ((logflags & XT_LOG_UID) && !iphoff)
+ nf_log_dump_sk_uid_gid(m, skb->sk);
+
+ /* Max length: 16 "MARK=0xFFFFFFFF " */
+ if (!iphoff && skb->mark)
+ nf_log_buf_add(m, "MARK=0x%x ", skb->mark);
+
+ /* Proto Max log string length */
+ /* IP: 40+46+6+11+127 = 230 */
+ /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */
+ /* UDP: 10+max(25,20) = 35 */
+ /* UDPLITE: 14+max(25,20) = 39 */
+ /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */
+ /* ESP: 10+max(25)+15 = 50 */
+ /* AH: 9+max(25)+15 = 49 */
+ /* unknown: 10 */
+
+ /* (ICMP allows recursion one level deep) */
+ /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */
+ /* maxlen = 230+ 91 + 230 + 252 = 803 */
+}
+
+static void dump_ipv4_mac_header(struct nf_log_buf *m,
+ const struct nf_loginfo *info,
+ const struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dev;
+ unsigned int logflags = 0;
+
+ if (info->type == NF_LOG_TYPE_LOG)
+ logflags = info->u.log.logflags;
+
+ if (!(logflags & XT_LOG_MACDECODE))
+ goto fallback;
+
+ switch (dev->type) {
+ case ARPHRD_ETHER:
+ nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
+ eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
+ ntohs(eth_hdr(skb)->h_proto));
+ return;
+ default:
+ break;
+ }
+
+fallback:
+ nf_log_buf_add(m, "MAC=");
+ if (dev->hard_header_len &&
+ skb->mac_header != skb->network_header) {
+ const unsigned char *p = skb_mac_header(skb);
+ unsigned int i;
+
+ nf_log_buf_add(m, "%02x", *p++);
+ for (i = 1; i < dev->hard_header_len; i++, p++)
+ nf_log_buf_add(m, ":%02x", *p);
+ }
+ nf_log_buf_add(m, " ");
+}
+
+static void nf_log_ip_packet(struct net *net, u_int8_t pf,
+ unsigned int hooknum, const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct nf_loginfo *loginfo,
+ const char *prefix)
+{
+ struct nf_log_buf *m;
+
+ /* FIXME: Disabled from containers until syslog ns is supported */
+ if (!net_eq(net, &init_net))
+ return;
+
+ m = nf_log_buf_open();
+
+ if (!loginfo)
+ loginfo = &default_loginfo;
+
+ nf_log_dump_packet_common(m, pf, hooknum, skb, in,
+ out, loginfo, prefix);
+
+ if (in != NULL)
+ dump_ipv4_mac_header(m, loginfo, skb);
+
+ dump_ipv4_packet(m, loginfo, skb, 0);
+
+ nf_log_buf_close(m);
+}
+
+static struct nf_logger nf_ip_logger __read_mostly = {
+ .name = "nf_log_ipv4",
+ .type = NF_LOG_TYPE_LOG,
+ .logfn = nf_log_ip_packet,
+ .me = THIS_MODULE,
+};
+
+static int __net_init nf_log_ipv4_net_init(struct net *net)
+{
+ nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger);
+ return 0;
+}
+
+static void __net_exit nf_log_ipv4_net_exit(struct net *net)
+{
+ nf_log_unset(net, &nf_ip_logger);
+}
+
+static struct pernet_operations nf_log_ipv4_net_ops = {
+ .init = nf_log_ipv4_net_init,
+ .exit = nf_log_ipv4_net_exit,
+};
+
+static int __init nf_log_ipv4_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&nf_log_ipv4_net_ops);
+ if (ret < 0)
+ return ret;
+
+ nf_log_register(NFPROTO_IPV4, &nf_ip_logger);
+ return 0;
+}
+
+static void __exit nf_log_ipv4_exit(void)
+{
+ unregister_pernet_subsys(&nf_log_ipv4_net_ops);
+ nf_log_unregister(&nf_ip_logger);
+}
+
+module_init(nf_log_ipv4_init);
+module_exit(nf_log_ipv4_exit);
+
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("Netfilter IPv4 packet logging");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NF_LOGGER(AF_INET, 0);
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index d8b2e14efddc..fc37711e11f3 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -154,6 +154,7 @@ static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
htons(oldlen), htons(datalen), 1);
}
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
struct nf_nat_range *range)
{
@@ -169,6 +170,7 @@ static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
return 0;
}
+#endif
static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = {
.l3proto = NFPROTO_IPV4,
@@ -177,7 +179,9 @@ static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = {
.manip_pkt = nf_nat_ipv4_manip_pkt,
.csum_update = nf_nat_ipv4_csum_update,
.csum_recalc = nf_nat_ipv4_csum_recalc,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_to_range = nf_nat_ipv4_nlattr_to_range,
+#endif
#ifdef CONFIG_XFRM
.decode_session = nf_nat_ipv4_decode_session,
#endif
@@ -250,6 +254,205 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb,
}
EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
+unsigned int
+nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct nf_conn *ct))
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn_nat *nat;
+ /* maniptype == SRC for postrouting. */
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
+
+ /* We never see fragments: conntrack defrags on pre-routing
+ * and local-out, and nf_nat_out protects post-routing.
+ */
+ NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
+
+ ct = nf_ct_get(skb, &ctinfo);
+ /* Can't track? It's not due to stress, or conntrack would
+ * have dropped it. Hence it's the user's responsibilty to
+ * packet filter it out, or implement conntrack/NAT for that
+ * protocol. 8) --RR
+ */
+ if (!ct)
+ return NF_ACCEPT;
+
+ /* Don't try to NAT if this packet is not conntracked */
+ if (nf_ct_is_untracked(ct))
+ return NF_ACCEPT;
+
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return NF_ACCEPT;
+
+ switch (ctinfo) {
+ case IP_CT_RELATED:
+ case IP_CT_RELATED_REPLY:
+ if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
+ if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
+ ops->hooknum))
+ return NF_DROP;
+ else
+ return NF_ACCEPT;
+ }
+ /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+ case IP_CT_NEW:
+ /* Seen it before? This can happen for loopback, retrans,
+ * or local packets.
+ */
+ if (!nf_nat_initialized(ct, maniptype)) {
+ unsigned int ret;
+
+ ret = do_chain(ops, skb, in, out, ct);
+ if (ret != NF_ACCEPT)
+ return ret;
+
+ if (nf_nat_initialized(ct, HOOK2MANIP(ops->hooknum)))
+ break;
+
+ ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
+ if (ret != NF_ACCEPT)
+ return ret;
+ } else {
+ pr_debug("Already setup manip %s for ct %p\n",
+ maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
+ ct);
+ if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
+ goto oif_changed;
+ }
+ break;
+
+ default:
+ /* ESTABLISHED */
+ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
+ ctinfo == IP_CT_ESTABLISHED_REPLY);
+ if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
+ goto oif_changed;
+ }
+
+ return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+
+oif_changed:
+ nf_ct_kill_acct(ct, ctinfo, skb);
+ return NF_DROP;
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv4_fn);
+
+unsigned int
+nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct nf_conn *ct))
+{
+ unsigned int ret;
+ __be32 daddr = ip_hdr(skb)->daddr;
+
+ ret = nf_nat_ipv4_fn(ops, skb, in, out, do_chain);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ daddr != ip_hdr(skb)->daddr)
+ skb_dst_drop(skb);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv4_in);
+
+unsigned int
+nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct nf_conn *ct))
+{
+#ifdef CONFIG_XFRM
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ int err;
+#endif
+ unsigned int ret;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr))
+ return NF_ACCEPT;
+
+ ret = nf_nat_ipv4_fn(ops, skb, in, out, do_chain);
+#ifdef CONFIG_XFRM
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if ((ct->tuplehash[dir].tuple.src.u3.ip !=
+ ct->tuplehash[!dir].tuple.dst.u3.ip) ||
+ (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
+ ct->tuplehash[dir].tuple.src.u.all !=
+ ct->tuplehash[!dir].tuple.dst.u.all)) {
+ err = nf_xfrm_me_harder(skb, AF_INET);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+ }
+#endif
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv4_out);
+
+unsigned int
+nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct nf_conn *ct))
+{
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ unsigned int ret;
+ int err;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr))
+ return NF_ACCEPT;
+
+ ret = nf_nat_ipv4_fn(ops, skb, in, out, do_chain);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (ct->tuplehash[dir].tuple.dst.u3.ip !=
+ ct->tuplehash[!dir].tuple.src.u3.ip) {
+ err = ip_route_me_harder(skb, RTN_UNSPEC);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+#ifdef CONFIG_XFRM
+ else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
+ ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
+ ct->tuplehash[dir].tuple.dst.u.all !=
+ ct->tuplehash[!dir].tuple.src.u.all) {
+ err = nf_xfrm_me_harder(skb, AF_INET);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+#endif
+ }
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv4_local_fn);
+
static int __init nf_nat_l3proto_ipv4_init(void)
{
int err;
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
new file mode 100644
index 000000000000..c6eb42100e9a
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -0,0 +1,153 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/atomic.h>
+#include <linux/inetdevice.h>
+#include <linux/ip.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <net/protocol.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <net/route.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/ipv4/nf_nat_masquerade.h>
+
+unsigned int
+nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
+ const struct nf_nat_range *range,
+ const struct net_device *out)
+{
+ struct nf_conn *ct;
+ struct nf_conn_nat *nat;
+ enum ip_conntrack_info ctinfo;
+ struct nf_nat_range newrange;
+ const struct rtable *rt;
+ __be32 newsrc, nh;
+
+ NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING);
+
+ ct = nf_ct_get(skb, &ctinfo);
+ nat = nfct_nat(ct);
+
+ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+ ctinfo == IP_CT_RELATED_REPLY));
+
+ /* Source address is 0.0.0.0 - locally generated packet that is
+ * probably not supposed to be masqueraded.
+ */
+ if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
+ return NF_ACCEPT;
+
+ rt = skb_rtable(skb);
+ nh = rt_nexthop(rt, ip_hdr(skb)->daddr);
+ newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE);
+ if (!newsrc) {
+ pr_info("%s ate my IP address\n", out->name);
+ return NF_DROP;
+ }
+
+ nat->masq_index = out->ifindex;
+
+ /* Transfer from original range. */
+ memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
+ memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
+ newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS;
+ newrange.min_addr.ip = newsrc;
+ newrange.max_addr.ip = newsrc;
+ newrange.min_proto = range->min_proto;
+ newrange.max_proto = range->max_proto;
+
+ /* Hand modified range to generic setup. */
+ return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
+
+static int device_cmp(struct nf_conn *i, void *ifindex)
+{
+ const struct nf_conn_nat *nat = nfct_nat(i);
+
+ if (!nat)
+ return 0;
+ if (nf_ct_l3num(i) != NFPROTO_IPV4)
+ return 0;
+ return nat->masq_index == (int)(long)ifindex;
+}
+
+static int masq_device_event(struct notifier_block *this,
+ unsigned long event,
+ void *ptr)
+{
+ const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct net *net = dev_net(dev);
+
+ if (event == NETDEV_DOWN) {
+ /* Device was downed. Search entire table for
+ * conntracks which were associated with that device,
+ * and forget them.
+ */
+ NF_CT_ASSERT(dev->ifindex != 0);
+
+ nf_ct_iterate_cleanup(net, device_cmp,
+ (void *)(long)dev->ifindex, 0, 0);
+ }
+
+ return NOTIFY_DONE;
+}
+
+static int masq_inet_event(struct notifier_block *this,
+ unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
+ struct netdev_notifier_info info;
+
+ netdev_notifier_info_init(&info, dev);
+ return masq_device_event(this, event, &info);
+}
+
+static struct notifier_block masq_dev_notifier = {
+ .notifier_call = masq_device_event,
+};
+
+static struct notifier_block masq_inet_notifier = {
+ .notifier_call = masq_inet_event,
+};
+
+static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0);
+
+void nf_nat_masquerade_ipv4_register_notifier(void)
+{
+ /* check if the notifier was already set */
+ if (atomic_inc_return(&masquerade_notifier_refcount) > 1)
+ return;
+
+ /* Register for device down reports */
+ register_netdevice_notifier(&masq_dev_notifier);
+ /* Register IP address change reports */
+ register_inetaddr_notifier(&masq_inet_notifier);
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier);
+
+void nf_nat_masquerade_ipv4_unregister_notifier(void)
+{
+ /* check if the notifier still has clients */
+ if (atomic_dec_return(&masquerade_notifier_refcount) > 0)
+ return;
+
+ unregister_netdevice_notifier(&masq_dev_notifier);
+ unregister_inetaddr_notifier(&masq_inet_notifier);
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index 690d890111bb..9414923f1e15 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -124,7 +124,7 @@ static const struct nf_nat_l4proto gre = {
.manip_pkt = gre_manip_pkt,
.in_range = nf_nat_l4proto_in_range,
.unique_tuple = gre_unique_tuple,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index eb303471bcf6..4557b4ab8342 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -77,7 +77,7 @@ const struct nf_nat_l4proto nf_nat_l4proto_icmp = {
.manip_pkt = icmp_manip_pkt,
.in_range = icmp_in_range,
.unique_tuple = icmp_unique_tuple,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
new file mode 100644
index 000000000000..b023b4eb1a96
--- /dev/null
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -0,0 +1,127 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/route.h>
+#include <net/dst.h>
+#include <linux/netfilter_ipv4.h>
+
+/* Send RST reply */
+void nf_send_reset(struct sk_buff *oldskb, int hook)
+{
+ struct sk_buff *nskb;
+ const struct iphdr *oiph;
+ struct iphdr *niph;
+ const struct tcphdr *oth;
+ struct tcphdr _otcph, *tcph;
+
+ /* IP header checks: fragment. */
+ if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
+ return;
+
+ oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
+ sizeof(_otcph), &_otcph);
+ if (oth == NULL)
+ return;
+
+ /* No RST for RST. */
+ if (oth->rst)
+ return;
+
+ if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
+ return;
+
+ /* Check checksum */
+ if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
+ return;
+ oiph = ip_hdr(oldskb);
+
+ nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
+ LL_MAX_HEADER, GFP_ATOMIC);
+ if (!nskb)
+ return;
+
+ skb_reserve(nskb, LL_MAX_HEADER);
+
+ skb_reset_network_header(nskb);
+ niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
+ niph->version = 4;
+ niph->ihl = sizeof(struct iphdr) / 4;
+ niph->tos = 0;
+ niph->id = 0;
+ niph->frag_off = htons(IP_DF);
+ niph->protocol = IPPROTO_TCP;
+ niph->check = 0;
+ niph->saddr = oiph->daddr;
+ niph->daddr = oiph->saddr;
+
+ skb_reset_transport_header(nskb);
+ tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
+ memset(tcph, 0, sizeof(*tcph));
+ tcph->source = oth->dest;
+ tcph->dest = oth->source;
+ tcph->doff = sizeof(struct tcphdr) / 4;
+
+ if (oth->ack)
+ tcph->seq = oth->ack_seq;
+ else {
+ tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
+ oldskb->len - ip_hdrlen(oldskb) -
+ (oth->doff << 2));
+ tcph->ack = 1;
+ }
+
+ tcph->rst = 1;
+ tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr,
+ niph->daddr, 0);
+ nskb->ip_summed = CHECKSUM_PARTIAL;
+ nskb->csum_start = (unsigned char *)tcph - nskb->head;
+ nskb->csum_offset = offsetof(struct tcphdr, check);
+
+ /* ip_route_me_harder expects skb->dst to be set */
+ skb_dst_set_noref(nskb, skb_dst(oldskb));
+
+ nskb->protocol = htons(ETH_P_IP);
+ if (ip_route_me_harder(nskb, RTN_UNSPEC))
+ goto free_nskb;
+
+ niph->ttl = ip4_dst_hoplimit(skb_dst(nskb));
+
+ /* "Never happens" */
+ if (nskb->len > dst_mtu(skb_dst(nskb)))
+ goto free_nskb;
+
+ nf_ct_attach(nskb, oldskb);
+
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+ /* If we use ip_local_out for bridged traffic, the MAC source on
+ * the RST will be ours, instead of the destination's. This confuses
+ * some routers/firewalls, and they drop the packet. So we need to
+ * build the eth header using the original destination's MAC as the
+ * source, and send the RST packet directly.
+ */
+ if (oldskb->nf_bridge) {
+ struct ethhdr *oeth = eth_hdr(oldskb);
+ nskb->dev = oldskb->nf_bridge->physindev;
+ niph->tot_len = htons(nskb->len);
+ ip_send_check(niph);
+ if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
+ oeth->h_source, oeth->h_dest, nskb->len) < 0)
+ goto free_nskb;
+ dev_queue_xmit(nskb);
+ } else
+#endif
+ ip_local_out(nskb);
+
+ return;
+
+ free_nskb:
+ kfree_skb(nskb);
+}
+EXPORT_SYMBOL_GPL(nf_send_reset);
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
index 3964157d826c..df547bf50078 100644
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -26,136 +26,53 @@
#include <net/netfilter/nf_nat_l3proto.h>
#include <net/ip.h>
-/*
- * NAT chains
- */
-
-static unsigned int nf_nat_fn(const struct nf_hook_ops *ops,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct nf_conn *ct)
{
- enum ip_conntrack_info ctinfo;
- struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
- struct nf_conn_nat *nat;
- enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
struct nft_pktinfo pkt;
- unsigned int ret;
-
- if (ct == NULL || nf_ct_is_untracked(ct))
- return NF_ACCEPT;
-
- NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)));
-
- nat = nf_ct_nat_ext_add(ct);
- if (nat == NULL)
- return NF_ACCEPT;
-
- switch (ctinfo) {
- case IP_CT_RELATED:
- case IP_CT_RELATED + IP_CT_IS_REPLY:
- if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
- if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
- ops->hooknum))
- return NF_DROP;
- else
- return NF_ACCEPT;
- }
- /* Fall through */
- case IP_CT_NEW:
- if (nf_nat_initialized(ct, maniptype))
- break;
- nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
+ nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
- ret = nft_do_chain(&pkt, ops);
- if (ret != NF_ACCEPT)
- return ret;
- if (!nf_nat_initialized(ct, maniptype)) {
- ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
- if (ret != NF_ACCEPT)
- return ret;
- }
- default:
- break;
- }
-
- return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+ return nft_do_chain(&pkt, ops);
}
-static unsigned int nf_nat_prerouting(const struct nf_hook_ops *ops,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int nft_nat_ipv4_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- __be32 daddr = ip_hdr(skb)->daddr;
- unsigned int ret;
-
- ret = nf_nat_fn(ops, skb, in, out, okfn);
- if (ret != NF_DROP && ret != NF_STOLEN &&
- ip_hdr(skb)->daddr != daddr) {
- skb_dst_drop(skb);
- }
- return ret;
+ return nf_nat_ipv4_fn(ops, skb, in, out, nft_nat_do_chain);
}
-static unsigned int nf_nat_postrouting(const struct nf_hook_ops *ops,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int nft_nat_ipv4_in(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- enum ip_conntrack_info ctinfo __maybe_unused;
- const struct nf_conn *ct __maybe_unused;
- unsigned int ret;
-
- ret = nf_nat_fn(ops, skb, in, out, okfn);
-#ifdef CONFIG_XFRM
- if (ret != NF_DROP && ret != NF_STOLEN &&
- (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
- if (ct->tuplehash[dir].tuple.src.u3.ip !=
- ct->tuplehash[!dir].tuple.dst.u3.ip ||
- ct->tuplehash[dir].tuple.src.u.all !=
- ct->tuplehash[!dir].tuple.dst.u.all)
- return nf_xfrm_me_harder(skb, AF_INET) == 0 ?
- ret : NF_DROP;
- }
-#endif
- return ret;
+ return nf_nat_ipv4_in(ops, skb, in, out, nft_nat_do_chain);
}
-static unsigned int nf_nat_output(const struct nf_hook_ops *ops,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int nft_nat_ipv4_out(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- enum ip_conntrack_info ctinfo;
- const struct nf_conn *ct;
- unsigned int ret;
-
- ret = nf_nat_fn(ops, skb, in, out, okfn);
- if (ret != NF_DROP && ret != NF_STOLEN &&
- (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ return nf_nat_ipv4_out(ops, skb, in, out, nft_nat_do_chain);
+}
- if (ct->tuplehash[dir].tuple.dst.u3.ip !=
- ct->tuplehash[!dir].tuple.src.u3.ip) {
- if (ip_route_me_harder(skb, RTN_UNSPEC))
- ret = NF_DROP;
- }
-#ifdef CONFIG_XFRM
- else if (ct->tuplehash[dir].tuple.dst.u.all !=
- ct->tuplehash[!dir].tuple.src.u.all)
- if (nf_xfrm_me_harder(skb, AF_INET))
- ret = NF_DROP;
-#endif
- }
- return ret;
+static unsigned int nft_nat_ipv4_local_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return nf_nat_ipv4_local_fn(ops, skb, in, out, nft_nat_do_chain);
}
static const struct nf_chain_type nft_chain_nat_ipv4 = {
@@ -168,10 +85,10 @@ static const struct nf_chain_type nft_chain_nat_ipv4 = {
(1 << NF_INET_LOCAL_OUT) |
(1 << NF_INET_LOCAL_IN),
.hooks = {
- [NF_INET_PRE_ROUTING] = nf_nat_prerouting,
- [NF_INET_POST_ROUTING] = nf_nat_postrouting,
- [NF_INET_LOCAL_OUT] = nf_nat_output,
- [NF_INET_LOCAL_IN] = nf_nat_fn,
+ [NF_INET_PRE_ROUTING] = nft_nat_ipv4_in,
+ [NF_INET_POST_ROUTING] = nft_nat_ipv4_out,
+ [NF_INET_LOCAL_OUT] = nft_nat_ipv4_local_fn,
+ [NF_INET_LOCAL_IN] = nft_nat_ipv4_fn,
},
};
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
new file mode 100644
index 000000000000..1c636d6b5b50
--- /dev/null
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_masq.h>
+#include <net/netfilter/ipv4/nf_nat_masquerade.h>
+
+static void nft_masq_ipv4_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_masq *priv = nft_expr_priv(expr);
+ struct nf_nat_range range;
+ unsigned int verdict;
+
+ range.flags = priv->flags;
+
+ verdict = nf_nat_masquerade_ipv4(pkt->skb, pkt->ops->hooknum,
+ &range, pkt->out);
+
+ data[NFT_REG_VERDICT].verdict = verdict;
+}
+
+static struct nft_expr_type nft_masq_ipv4_type;
+static const struct nft_expr_ops nft_masq_ipv4_ops = {
+ .type = &nft_masq_ipv4_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)),
+ .eval = nft_masq_ipv4_eval,
+ .init = nft_masq_init,
+ .dump = nft_masq_dump,
+};
+
+static struct nft_expr_type nft_masq_ipv4_type __read_mostly = {
+ .family = NFPROTO_IPV4,
+ .name = "masq",
+ .ops = &nft_masq_ipv4_ops,
+ .policy = nft_masq_policy,
+ .maxattr = NFTA_MASQ_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_masq_ipv4_module_init(void)
+{
+ int ret;
+
+ ret = nft_register_expr(&nft_masq_ipv4_type);
+ if (ret < 0)
+ return ret;
+
+ nf_nat_masquerade_ipv4_register_notifier();
+
+ return ret;
+}
+
+static void __exit nft_masq_ipv4_module_exit(void)
+{
+ nft_unregister_expr(&nft_masq_ipv4_type);
+ nf_nat_masquerade_ipv4_unregister_notifier();
+}
+
+module_init(nft_masq_ipv4_module_init);
+module_exit(nft_masq_ipv4_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "masq");
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
index e79718a382f2..ed33299c56d1 100644
--- a/net/ipv4/netfilter/nft_reject_ipv4.c
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -16,7 +16,6 @@
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
-#include <net/icmp.h>
#include <net/netfilter/ipv4/nf_reject.h>
#include <net/netfilter/nft_reject.h>
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 044a0ddf6a79..57f7c9804139 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -311,7 +311,7 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
if (addr->sin_addr.s_addr == htonl(INADDR_ANY))
chk_addr_ret = RTN_LOCAL;
- if ((sysctl_ip_nonlocal_bind == 0 &&
+ if ((net->ipv4.sysctl_ip_nonlocal_bind == 0 &&
isk->freebind == 0 && isk->transparent == 0 &&
chk_addr_ret != RTN_LOCAL) ||
chk_addr_ret == RTN_MULTICAST ||
@@ -911,7 +911,7 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
sin6->sin6_flowinfo = ip6_flowinfo(ip6);
sin6->sin6_scope_id =
ipv6_iface_scope_id(&sin6->sin6_addr,
- IP6CB(skb)->iif);
+ inet6_iif(skb));
*addr_len = sizeof(*sin6);
}
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index ae0af9386f7c..8e3eb39f84e7 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -52,6 +52,7 @@
static int sockstat_seq_show(struct seq_file *seq, void *v)
{
struct net *net = seq->private;
+ unsigned int frag_mem;
int orphans, sockets;
local_bh_disable();
@@ -71,8 +72,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
sock_prot_inuse_get(net, &udplite_prot));
seq_printf(seq, "RAW: inuse %d\n",
sock_prot_inuse_get(net, &raw_prot));
- seq_printf(seq, "FRAG: inuse %d memory %d\n",
- ip_frag_nqueues(net), ip_frag_mem(net));
+ frag_mem = ip_frag_mem(net);
+ seq_printf(seq, "FRAG: inuse %u memory %u\n", !!frag_mem, frag_mem);
return 0;
}
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 46d6a1c923a8..4b7c0ec65251 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -30,6 +30,7 @@
const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly;
+EXPORT_SYMBOL(inet_offloads);
int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
{
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 2c65160565e1..739db3100c23 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -58,6 +58,7 @@
#include <linux/in_route.h>
#include <linux/route.h>
#include <linux/skbuff.h>
+#include <linux/igmp.h>
#include <net/net_namespace.h>
#include <net/dst.h>
#include <net/sock.h>
@@ -174,7 +175,9 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
while (sk) {
delivered = 1;
- if (iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) {
+ if ((iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) &&
+ ip_mc_sf_allow(sk, iph->daddr, iph->saddr,
+ skb->dev->ifindex)) {
struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
/* Not releasing hash table! */
@@ -365,6 +368,8 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
skb->ip_summed = CHECKSUM_NONE;
+ sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
+
skb->transport_header = skb->network_header;
err = -EFAULT;
if (memcpy_fromiovecend((void *)iph, from, 0, length))
@@ -606,6 +611,8 @@ back_from_confirm:
&rt, msg->msg_flags);
else {
+ sock_tx_timestamp(sk, &ipc.tx_flags);
+
if (!ipc.addr)
ipc.addr = fl4.daddr;
lock_sock(sk);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 190199851c9a..793c0bb8c4fd 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -596,12 +596,12 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
static inline u32 fnhe_hashfun(__be32 daddr)
{
+ static u32 fnhe_hashrnd __read_mostly;
u32 hval;
- hval = (__force u32) daddr;
- hval ^= (hval >> 11) ^ (hval >> 22);
-
- return hval & (FNHE_HASH_SIZE - 1);
+ net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd));
+ hval = jhash_1word((__force u32) daddr, fnhe_hashrnd);
+ return hash_32(hval, FNHE_HASH_SHIFT);
}
static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
@@ -628,12 +628,12 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
spin_lock_bh(&fnhe_lock);
- hash = nh->nh_exceptions;
+ hash = rcu_dereference(nh->nh_exceptions);
if (!hash) {
hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC);
if (!hash)
goto out_unlock;
- nh->nh_exceptions = hash;
+ rcu_assign_pointer(nh->nh_exceptions, hash);
}
hash += hval;
@@ -746,7 +746,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
}
n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
- if (n) {
+ if (!IS_ERR(n)) {
if (!(n->nud_state & NUD_VALID)) {
neigh_event_send(n, NULL);
} else {
@@ -1242,7 +1242,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
{
- struct fnhe_hash_bucket *hash = nh->nh_exceptions;
+ struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions);
struct fib_nh_exception *fnhe;
u32 hval;
@@ -1798,8 +1798,6 @@ local_input:
no_route:
RT_CACHE_STAT_INC(in_no_route);
res.type = RTN_UNREACHABLE;
- if (err == -ESRCH)
- err = -ENETUNREACH;
goto local_input;
/*
@@ -2267,9 +2265,9 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
return rt;
if (flp4->flowi4_proto)
- rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
- flowi4_to_flowi(flp4),
- sk, 0);
+ rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
+ flowi4_to_flowi(flp4),
+ sk, 0);
return rt;
}
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index c86624b36a62..0431a8f3c8f4 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -25,7 +25,7 @@
extern int sysctl_tcp_syncookies;
-static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
+static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly;
#define COOKIEBITS 24 /* Upper bits store count */
#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
@@ -170,7 +170,8 @@ u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
}
EXPORT_SYMBOL_GPL(__cookie_v4_init_sequence);
-__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
+__u32 cookie_v4_init_sequence(struct sock *sk, const struct sk_buff *skb,
+ __u16 *mssp)
{
const struct iphdr *iph = ip_hdr(skb);
const struct tcphdr *th = tcp_hdr(skb);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 79a007c52558..b3c53c8b331e 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -286,13 +286,6 @@ static struct ctl_table ipv4_table[] = {
.extra2 = &ip_ttl_max,
},
{
- .procname = "ip_nonlocal_bind",
- .data = &sysctl_ip_nonlocal_bind,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "tcp_syn_retries",
.data = &sysctl_tcp_syn_retries,
.maxlen = sizeof(int),
@@ -450,6 +443,16 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+#ifdef CONFIG_IP_MULTICAST
+ {
+ .procname = "igmp_qrv",
+ .data = &sysctl_igmp_qrv,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one
+ },
+#endif
{
.procname = "inet_peer_threshold",
.data = &inet_peer_threshold,
@@ -628,15 +631,6 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
-#ifdef CONFIG_NET_DMA
- {
- .procname = "tcp_dma_copybreak",
- .data = &sysctl_tcp_dma_copybreak,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
-#endif
{
.procname = "tcp_slow_start_after_idle",
.data = &sysctl_tcp_slow_start_after_idle,
@@ -728,6 +722,22 @@ static struct ctl_table ipv4_table[] = {
.extra2 = &one,
},
{
+ .procname = "icmp_msgs_per_sec",
+ .data = &sysctl_icmp_msgs_per_sec,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ },
+ {
+ .procname = "icmp_msgs_burst",
+ .data = &sysctl_icmp_msgs_burst,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ },
+ {
.procname = "udp_mem",
.data = &sysctl_udp_mem,
.maxlen = sizeof(sysctl_udp_mem),
@@ -839,6 +849,13 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "ip_nonlocal_bind",
+ .data = &init_net.ipv4.sysctl_ip_nonlocal_bind,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
.procname = "fwmark_reflect",
.data = &init_net.ipv4.sysctl_fwmark_reflect,
.maxlen = sizeof(int),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9d2118e5fbc7..86023b9be47f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -274,7 +274,6 @@
#include <net/tcp.h>
#include <net/xfrm.h>
#include <net/ip.h>
-#include <net/netdma.h>
#include <net/sock.h>
#include <asm/uaccess.h>
@@ -405,7 +404,7 @@ void tcp_init_sock(struct sock *sk)
tp->reordering = sysctl_tcp_reordering;
tcp_enable_early_retrans(tp);
- icsk->icsk_ca_ops = &tcp_init_congestion_ops;
+ tcp_assign_congestion_control(sk);
tp->tsoffset = 0;
@@ -426,6 +425,17 @@ void tcp_init_sock(struct sock *sk)
}
EXPORT_SYMBOL(tcp_init_sock);
+static void tcp_tx_timestamp(struct sock *sk, struct sk_buff *skb)
+{
+ if (sk->sk_tsflags) {
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+
+ sock_tx_timestamp(sk, &shinfo->tx_flags);
+ if (shinfo->tx_flags & SKBTX_ANY_TSTAMP)
+ shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1;
+ }
+}
+
/*
* Wait for a TCP event.
*
@@ -523,7 +533,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
}
/* This barrier is coupled with smp_wmb() in tcp_reset() */
smp_rmb();
- if (sk->sk_err)
+ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
mask |= POLLERR;
return mask;
@@ -598,7 +608,7 @@ static inline bool forced_push(const struct tcp_sock *tp)
return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
}
-static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
+static void skb_entail(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
@@ -607,7 +617,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
tcb->seq = tcb->end_seq = tp->write_seq;
tcb->tcp_flags = TCPHDR_ACK;
tcb->sacked = 0;
- skb_header_release(skb);
+ __skb_header_release(skb);
tcp_add_write_queue_tail(sk, skb);
sk->sk_wmem_queued += skb->truesize;
sk_mem_charge(sk, skb->truesize);
@@ -952,15 +962,17 @@ new_segment:
skb->ip_summed = CHECKSUM_PARTIAL;
tp->write_seq += copy;
TCP_SKB_CB(skb)->end_seq += copy;
- skb_shinfo(skb)->gso_segs = 0;
+ tcp_skb_pcount_set(skb, 0);
if (!copied)
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
copied += copy;
offset += copy;
- if (!(size -= copy))
+ if (!(size -= copy)) {
+ tcp_tx_timestamp(sk, skb);
goto out;
+ }
if (skb->len < size_goal || (flags & MSG_OOB))
continue;
@@ -1175,13 +1187,6 @@ new_segment:
goto wait_for_memory;
/*
- * All packets are restored as if they have
- * already been sent.
- */
- if (tp->repair)
- TCP_SKB_CB(skb)->when = tcp_time_stamp;
-
- /*
* Check whether we can use HW checksum.
*/
if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
@@ -1190,6 +1195,13 @@ new_segment:
skb_entail(sk, skb);
copy = size_goal;
max = size_goal;
+
+ /* All packets are restored as if they have
+ * already been sent. skb_mstamp isn't set to
+ * avoid wrong rtt estimation.
+ */
+ if (tp->repair)
+ TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED;
}
/* Try to append data to the end of skb. */
@@ -1248,12 +1260,14 @@ new_segment:
tp->write_seq += copy;
TCP_SKB_CB(skb)->end_seq += copy;
- skb_shinfo(skb)->gso_segs = 0;
+ tcp_skb_pcount_set(skb, 0);
from += copy;
copied += copy;
- if ((seglen -= copy) == 0 && iovlen == 0)
+ if ((seglen -= copy) == 0 && iovlen == 0) {
+ tcp_tx_timestamp(sk, skb);
goto out;
+ }
if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
continue;
@@ -1379,7 +1393,7 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
* calculation of whether or not we must ACK for the sake of
* a window update.
*/
-void tcp_cleanup_rbuf(struct sock *sk, int copied)
+static void tcp_cleanup_rbuf(struct sock *sk, int copied)
{
struct tcp_sock *tp = tcp_sk(sk);
bool time_to_ack = false;
@@ -1455,39 +1469,6 @@ static void tcp_prequeue_process(struct sock *sk)
tp->ucopy.memory = 0;
}
-#ifdef CONFIG_NET_DMA
-static void tcp_service_net_dma(struct sock *sk, bool wait)
-{
- dma_cookie_t done, used;
- dma_cookie_t last_issued;
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (!tp->ucopy.dma_chan)
- return;
-
- last_issued = tp->ucopy.dma_cookie;
- dma_async_issue_pending(tp->ucopy.dma_chan);
-
- do {
- if (dma_async_is_tx_complete(tp->ucopy.dma_chan,
- last_issued, &done,
- &used) == DMA_COMPLETE) {
- /* Safe to free early-copied skbs now */
- __skb_queue_purge(&sk->sk_async_wait_queue);
- break;
- } else {
- struct sk_buff *skb;
- while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
- (dma_async_is_complete(skb->dma_cookie, done,
- used) == DMA_COMPLETE)) {
- __skb_dequeue(&sk->sk_async_wait_queue);
- kfree_skb(skb);
- }
- }
- } while (wait);
-}
-#endif
-
static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
{
struct sk_buff *skb;
@@ -1495,9 +1476,9 @@ static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
offset = seq - TCP_SKB_CB(skb)->seq;
- if (tcp_hdr(skb)->syn)
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
offset--;
- if (offset < skb->len || tcp_hdr(skb)->fin) {
+ if (offset < skb->len || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) {
*off = offset;
return skb;
}
@@ -1505,7 +1486,7 @@ static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
* splitted a fat GRO packet, while we released socket lock
* in skb_splice_bits()
*/
- sk_eat_skb(sk, skb, false);
+ sk_eat_skb(sk, skb);
}
return NULL;
}
@@ -1570,12 +1551,12 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
if (offset + 1 != skb->len)
continue;
}
- if (tcp_hdr(skb)->fin) {
- sk_eat_skb(sk, skb, false);
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
+ sk_eat_skb(sk, skb);
++seq;
break;
}
- sk_eat_skb(sk, skb, false);
+ sk_eat_skb(sk, skb);
if (!desc->count)
break;
tp->copied_seq = seq;
@@ -1613,10 +1594,12 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
int target; /* Read at least this many bytes */
long timeo;
struct task_struct *user_recv = NULL;
- bool copied_early = false;
struct sk_buff *skb;
u32 urg_hole = 0;
+ if (unlikely(flags & MSG_ERRQUEUE))
+ return ip_recv_error(sk, msg, len, addr_len);
+
if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) &&
(sk->sk_state == TCP_ESTABLISHED))
sk_busy_loop(sk, nonblock);
@@ -1656,28 +1639,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
-#ifdef CONFIG_NET_DMA
- tp->ucopy.dma_chan = NULL;
- preempt_disable();
- skb = skb_peek_tail(&sk->sk_receive_queue);
- {
- int available = 0;
-
- if (skb)
- available = TCP_SKB_CB(skb)->seq + skb->len - (*seq);
- if ((available < target) &&
- (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
- !sysctl_tcp_low_latency &&
- net_dma_find_channel()) {
- preempt_enable();
- tp->ucopy.pinned_list =
- dma_pin_iovec_pages(msg->msg_iov, len);
- } else {
- preempt_enable();
- }
- }
-#endif
-
do {
u32 offset;
@@ -1704,11 +1665,11 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
break;
offset = *seq - TCP_SKB_CB(skb)->seq;
- if (tcp_hdr(skb)->syn)
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
offset--;
if (offset < skb->len)
goto found_ok_skb;
- if (tcp_hdr(skb)->fin)
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
goto found_fin_ok;
WARN(!(flags & MSG_PEEK),
"recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n",
@@ -1808,16 +1769,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
/* __ Set realtime policy in scheduler __ */
}
-#ifdef CONFIG_NET_DMA
- if (tp->ucopy.dma_chan) {
- if (tp->rcv_wnd == 0 &&
- !skb_queue_empty(&sk->sk_async_wait_queue)) {
- tcp_service_net_dma(sk, true);
- tcp_cleanup_rbuf(sk, copied);
- } else
- dma_async_issue_pending(tp->ucopy.dma_chan);
- }
-#endif
if (copied >= target) {
/* Do not sleep, just process backlog. */
release_sock(sk);
@@ -1825,11 +1776,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
} else
sk_wait_data(sk, &timeo);
-#ifdef CONFIG_NET_DMA
- tcp_service_net_dma(sk, false); /* Don't block */
- tp->ucopy.wakeup = 0;
-#endif
-
if (user_recv) {
int chunk;
@@ -1887,43 +1833,13 @@ do_prequeue:
}
if (!(flags & MSG_TRUNC)) {
-#ifdef CONFIG_NET_DMA
- if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
- tp->ucopy.dma_chan = net_dma_find_channel();
-
- if (tp->ucopy.dma_chan) {
- tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec(
- tp->ucopy.dma_chan, skb, offset,
- msg->msg_iov, used,
- tp->ucopy.pinned_list);
-
- if (tp->ucopy.dma_cookie < 0) {
-
- pr_alert("%s: dma_cookie < 0\n",
- __func__);
-
- /* Exception. Bailout! */
- if (!copied)
- copied = -EFAULT;
- break;
- }
-
- dma_async_issue_pending(tp->ucopy.dma_chan);
-
- if ((offset + used) == skb->len)
- copied_early = true;
-
- } else
-#endif
- {
- err = skb_copy_datagram_iovec(skb, offset,
- msg->msg_iov, used);
- if (err) {
- /* Exception. Bailout! */
- if (!copied)
- copied = -EFAULT;
- break;
- }
+ err = skb_copy_datagram_iovec(skb, offset,
+ msg->msg_iov, used);
+ if (err) {
+ /* Exception. Bailout! */
+ if (!copied)
+ copied = -EFAULT;
+ break;
}
}
@@ -1941,21 +1857,17 @@ skip_copy:
if (used + offset < skb->len)
continue;
- if (tcp_hdr(skb)->fin)
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
goto found_fin_ok;
- if (!(flags & MSG_PEEK)) {
- sk_eat_skb(sk, skb, copied_early);
- copied_early = false;
- }
+ if (!(flags & MSG_PEEK))
+ sk_eat_skb(sk, skb);
continue;
found_fin_ok:
/* Process the FIN. */
++*seq;
- if (!(flags & MSG_PEEK)) {
- sk_eat_skb(sk, skb, copied_early);
- copied_early = false;
- }
+ if (!(flags & MSG_PEEK))
+ sk_eat_skb(sk, skb);
break;
} while (len > 0);
@@ -1978,16 +1890,6 @@ skip_copy:
tp->ucopy.len = 0;
}
-#ifdef CONFIG_NET_DMA
- tcp_service_net_dma(sk, true); /* Wait for queue to drain */
- tp->ucopy.dma_chan = NULL;
-
- if (tp->ucopy.pinned_list) {
- dma_unpin_iovec_pages(tp->ucopy.pinned_list);
- tp->ucopy.pinned_list = NULL;
- }
-#endif
-
/* According to UNIX98, msg_name/msg_namelen are ignored
* on connected socket. I was just happy when found this 8) --ANK
*/
@@ -2142,8 +2044,10 @@ void tcp_close(struct sock *sk, long timeout)
* reader process may not have drained the data yet!
*/
while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
- u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq -
- tcp_hdr(skb)->fin;
+ u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq;
+
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
+ len--;
data_was_unread += len;
__kfree_skb(skb);
}
@@ -2331,9 +2235,6 @@ int tcp_disconnect(struct sock *sk, int flags)
__skb_queue_purge(&sk->sk_receive_queue);
tcp_write_queue_purge(sk);
__skb_queue_purge(&tp->out_of_order_queue);
-#ifdef CONFIG_NET_DMA
- __skb_queue_purge(&sk->sk_async_wait_queue);
-#endif
inet->inet_dport = 0;
@@ -2673,7 +2574,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
break;
#endif
case TCP_USER_TIMEOUT:
- /* Cap the max timeout in ms TCP will retry/retrans
+ /* Cap the max time in ms TCP will retry or probe the window
* before giving up and aborting (ETIMEDOUT) a connection.
*/
if (val < 0)
@@ -3152,7 +3053,7 @@ static int __init set_thash_entries(char *str)
}
__setup("thash_entries=", set_thash_entries);
-static void tcp_init_mem(void)
+static void __init tcp_init_mem(void)
{
unsigned long limit = nr_free_buffer_pages() / 8;
limit = max(limit, 128UL);
@@ -3170,8 +3071,8 @@ void __init tcp_init(void)
BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
- percpu_counter_init(&tcp_sockets_allocated, 0);
- percpu_counter_init(&tcp_orphan_count, 0);
+ percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
+ percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL);
tcp_hashinfo.bind_bucket_cachep =
kmem_cache_create("tcp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
@@ -3238,8 +3139,6 @@ void __init tcp_init(void)
tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
tcp_metrics_init();
-
- tcp_register_congestion_control(&tcp_reno);
-
+ BUG_ON(tcp_register_congestion_control(&tcp_reno) != 0);
tcp_tasklet_init();
}
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index d5de69bc04f5..bb395d46a389 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -17,7 +17,6 @@
#include <linux/module.h>
#include <net/tcp.h>
-
#define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation
* max_cwnd = snd_cwnd * beta
*/
@@ -46,11 +45,10 @@ MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
module_param(smooth_part, int, 0644);
MODULE_PARM_DESC(smooth_part, "log(B/(B*Smin))/log(B/(B-1))+B, # of RTT from Wmax-B to Wmax");
-
/* BIC TCP Parameters */
struct bictcp {
u32 cnt; /* increase cwnd by 1 after ACKs */
- u32 last_max_cwnd; /* last maximum snd_cwnd */
+ u32 last_max_cwnd; /* last maximum snd_cwnd */
u32 loss_cwnd; /* congestion window at last loss */
u32 last_cwnd; /* the last snd_cwnd */
u32 last_time; /* time when updated last_cwnd */
@@ -103,7 +101,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
/* binary increase */
if (cwnd < ca->last_max_cwnd) {
- __u32 dist = (ca->last_max_cwnd - cwnd)
+ __u32 dist = (ca->last_max_cwnd - cwnd)
/ BICTCP_B;
if (dist > max_increment)
@@ -154,7 +152,6 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
bictcp_update(ca, tp->snd_cwnd);
tcp_cong_avoid_ai(tp, ca->cnt);
}
-
}
/*
@@ -177,7 +174,6 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk)
ca->loss_cwnd = tp->snd_cwnd;
-
if (tp->snd_cwnd <= low_window)
return max(tp->snd_cwnd >> 1U, 2U);
else
@@ -188,6 +184,7 @@ static u32 bictcp_undo_cwnd(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
const struct bictcp *ca = inet_csk_ca(sk);
+
return max(tp->snd_cwnd, ca->loss_cwnd);
}
@@ -206,12 +203,12 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt)
if (icsk->icsk_ca_state == TCP_CA_Open) {
struct bictcp *ca = inet_csk_ca(sk);
+
cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT;
ca->delayed_ack += cnt;
}
}
-
static struct tcp_congestion_ops bictcp __read_mostly = {
.init = bictcp_init,
.ssthresh = bictcp_recalc_ssthresh,
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 7b09d8b49fa5..b1c5970d47a1 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -74,24 +74,34 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
/* Assign choice of congestion control. */
-void tcp_init_congestion_control(struct sock *sk)
+void tcp_assign_congestion_control(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_congestion_ops *ca;
- /* if no choice made yet assign the current value set as default */
- if (icsk->icsk_ca_ops == &tcp_init_congestion_ops) {
- rcu_read_lock();
- list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
- if (try_module_get(ca->owner)) {
- icsk->icsk_ca_ops = ca;
- break;
- }
-
- /* fallback to next available */
+ rcu_read_lock();
+ list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
+ if (likely(try_module_get(ca->owner))) {
+ icsk->icsk_ca_ops = ca;
+ goto out;
}
- rcu_read_unlock();
+ /* Fallback to next available. The last really
+ * guaranteed fallback is Reno from this list.
+ */
}
+out:
+ rcu_read_unlock();
+
+ /* Clear out private data before diag gets it and
+ * the ca has not been initialized.
+ */
+ if (ca->get_info)
+ memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
+}
+
+void tcp_init_congestion_control(struct sock *sk)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
if (icsk->icsk_ca_ops->init)
icsk->icsk_ca_ops->init(sk);
@@ -142,7 +152,6 @@ static int __init tcp_congestion_default(void)
}
late_initcall(tcp_congestion_default);
-
/* Build string with list of available congestion control values */
void tcp_get_available_congestion_control(char *buf, size_t maxlen)
{
@@ -154,7 +163,6 @@ void tcp_get_available_congestion_control(char *buf, size_t maxlen)
offs += snprintf(buf + offs, maxlen - offs,
"%s%s",
offs == 0 ? "" : " ", ca->name);
-
}
rcu_read_unlock();
}
@@ -186,7 +194,6 @@ void tcp_get_allowed_congestion_control(char *buf, size_t maxlen)
offs += snprintf(buf + offs, maxlen - offs,
"%s%s",
offs == 0 ? "" : " ", ca->name);
-
}
rcu_read_unlock();
}
@@ -230,7 +237,6 @@ out:
return ret;
}
-
/* Change congestion control for socket */
int tcp_set_congestion_control(struct sock *sk, const char *name)
{
@@ -285,15 +291,13 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
* ABC caps N to 2. Slow start exits when cwnd grows over ssthresh and
* returns the leftover acks to adjust cwnd in congestion avoidance mode.
*/
-int tcp_slow_start(struct tcp_sock *tp, u32 acked)
+void tcp_slow_start(struct tcp_sock *tp, u32 acked)
{
u32 cwnd = tp->snd_cwnd + acked;
if (cwnd > tp->snd_ssthresh)
cwnd = tp->snd_ssthresh + 1;
- acked -= cwnd - tp->snd_cwnd;
tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
- return acked;
}
EXPORT_SYMBOL_GPL(tcp_slow_start);
@@ -337,6 +341,7 @@ EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
u32 tcp_reno_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
+
return max(tp->snd_cwnd >> 1U, 2U);
}
EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);
@@ -348,15 +353,3 @@ struct tcp_congestion_ops tcp_reno = {
.ssthresh = tcp_reno_ssthresh,
.cong_avoid = tcp_reno_cong_avoid,
};
-
-/* Initial congestion control used (until SYN)
- * really reno under another name so we can tell difference
- * during tcp_set_default_congestion_control
- */
-struct tcp_congestion_ops tcp_init_congestion_ops = {
- .name = "",
- .owner = THIS_MODULE,
- .ssthresh = tcp_reno_ssthresh,
- .cong_avoid = tcp_reno_cong_avoid,
-};
-EXPORT_SYMBOL_GPL(tcp_init_congestion_ops);
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index a9bd8a4828a9..20de0118c98e 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -82,12 +82,13 @@ MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (mse
/* BIC TCP Parameters */
struct bictcp {
u32 cnt; /* increase cwnd by 1 after ACKs */
- u32 last_max_cwnd; /* last maximum snd_cwnd */
+ u32 last_max_cwnd; /* last maximum snd_cwnd */
u32 loss_cwnd; /* congestion window at last loss */
u32 last_cwnd; /* the last snd_cwnd */
u32 last_time; /* time when updated last_cwnd */
u32 bic_origin_point;/* origin point of bic function */
- u32 bic_K; /* time to origin point from the beginning of the current epoch */
+ u32 bic_K; /* time to origin point
+ from the beginning of the current epoch */
u32 delay_min; /* min delay (msec << 3) */
u32 epoch_start; /* beginning of an epoch */
u32 ack_cnt; /* number of acks */
@@ -219,7 +220,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
ca->last_time = tcp_time_stamp;
if (ca->epoch_start == 0) {
- ca->epoch_start = tcp_time_stamp; /* record the beginning of an epoch */
+ ca->epoch_start = tcp_time_stamp; /* record beginning */
ca->ack_cnt = 1; /* start counting */
ca->tcp_cwnd = cwnd; /* syn with cubic */
@@ -263,9 +264,9 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
/* c/rtt * (t-K)^3 */
delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ);
- if (t < ca->bic_K) /* below origin*/
+ if (t < ca->bic_K) /* below origin*/
bic_target = ca->bic_origin_point - delta;
- else /* above origin*/
+ else /* above origin*/
bic_target = ca->bic_origin_point + delta;
/* cubic function - calc bictcp_cnt*/
@@ -285,13 +286,14 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
/* TCP Friendly */
if (tcp_friendliness) {
u32 scale = beta_scale;
+
delta = (cwnd * scale) >> 3;
while (ca->ack_cnt > delta) { /* update tcp cwnd */
ca->ack_cnt -= delta;
ca->tcp_cwnd++;
}
- if (ca->tcp_cwnd > cwnd){ /* if bic is slower than tcp */
+ if (ca->tcp_cwnd > cwnd) { /* if bic is slower than tcp */
delta = ca->tcp_cwnd - cwnd;
max_cnt = cwnd / delta;
if (ca->cnt > max_cnt)
@@ -320,7 +322,6 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
bictcp_update(ca, tp->snd_cwnd);
tcp_cong_avoid_ai(tp, ca->cnt);
}
-
}
static u32 bictcp_recalc_ssthresh(struct sock *sk)
@@ -452,7 +453,8 @@ static int __init cubictcp_register(void)
* based on SRTT of 100ms
*/
- beta_scale = 8*(BICTCP_BETA_SCALE+beta)/ 3 / (BICTCP_BETA_SCALE - beta);
+ beta_scale = 8*(BICTCP_BETA_SCALE+beta) / 3
+ / (BICTCP_BETA_SCALE - beta);
cube_rtt_scale = (bic_scale * 10); /* 1024*c/rtt */
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
new file mode 100644
index 000000000000..b504371af742
--- /dev/null
+++ b/net/ipv4/tcp_dctcp.c
@@ -0,0 +1,344 @@
+/* DataCenter TCP (DCTCP) congestion control.
+ *
+ * http://simula.stanford.edu/~alizade/Site/DCTCP.html
+ *
+ * This is an implementation of DCTCP over Reno, an enhancement to the
+ * TCP congestion control algorithm designed for data centers. DCTCP
+ * leverages Explicit Congestion Notification (ECN) in the network to
+ * provide multi-bit feedback to the end hosts. DCTCP's goal is to meet
+ * the following three data center transport requirements:
+ *
+ * - High burst tolerance (incast due to partition/aggregate)
+ * - Low latency (short flows, queries)
+ * - High throughput (continuous data updates, large file transfers)
+ * with commodity shallow buffered switches
+ *
+ * The algorithm is described in detail in the following two papers:
+ *
+ * 1) Mohammad Alizadeh, Albert Greenberg, David A. Maltz, Jitendra Padhye,
+ * Parveen Patel, Balaji Prabhakar, Sudipta Sengupta, and Murari Sridharan:
+ * "Data Center TCP (DCTCP)", Data Center Networks session
+ * Proc. ACM SIGCOMM, New Delhi, 2010.
+ * http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp-final.pdf
+ *
+ * 2) Mohammad Alizadeh, Adel Javanmard, and Balaji Prabhakar:
+ * "Analysis of DCTCP: Stability, Convergence, and Fairness"
+ * Proc. ACM SIGMETRICS, San Jose, 2011.
+ * http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp_analysis-full.pdf
+ *
+ * Initial prototype from Abdul Kabbani, Masato Yasuda and Mohammad Alizadeh.
+ *
+ * Authors:
+ *
+ * Daniel Borkmann <dborkman@redhat.com>
+ * Florian Westphal <fw@strlen.de>
+ * Glenn Judd <glenn.judd@morganstanley.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <net/tcp.h>
+#include <linux/inet_diag.h>
+
+#define DCTCP_MAX_ALPHA 1024U
+
+struct dctcp {
+ u32 acked_bytes_ecn;
+ u32 acked_bytes_total;
+ u32 prior_snd_una;
+ u32 prior_rcv_nxt;
+ u32 dctcp_alpha;
+ u32 next_seq;
+ u32 ce_state;
+ u32 delayed_ack_reserved;
+};
+
+static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */
+module_param(dctcp_shift_g, uint, 0644);
+MODULE_PARM_DESC(dctcp_shift_g, "parameter g for updating dctcp_alpha");
+
+static unsigned int dctcp_alpha_on_init __read_mostly = DCTCP_MAX_ALPHA;
+module_param(dctcp_alpha_on_init, uint, 0644);
+MODULE_PARM_DESC(dctcp_alpha_on_init, "parameter for initial alpha value");
+
+static unsigned int dctcp_clamp_alpha_on_loss __read_mostly;
+module_param(dctcp_clamp_alpha_on_loss, uint, 0644);
+MODULE_PARM_DESC(dctcp_clamp_alpha_on_loss,
+ "parameter for clamping alpha on loss");
+
+static struct tcp_congestion_ops dctcp_reno;
+
+static void dctcp_reset(const struct tcp_sock *tp, struct dctcp *ca)
+{
+ ca->next_seq = tp->snd_nxt;
+
+ ca->acked_bytes_ecn = 0;
+ ca->acked_bytes_total = 0;
+}
+
+static void dctcp_init(struct sock *sk)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+
+ if ((tp->ecn_flags & TCP_ECN_OK) ||
+ (sk->sk_state == TCP_LISTEN ||
+ sk->sk_state == TCP_CLOSE)) {
+ struct dctcp *ca = inet_csk_ca(sk);
+
+ ca->prior_snd_una = tp->snd_una;
+ ca->prior_rcv_nxt = tp->rcv_nxt;
+
+ ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
+
+ ca->delayed_ack_reserved = 0;
+ ca->ce_state = 0;
+
+ dctcp_reset(tp, ca);
+ return;
+ }
+
+ /* No ECN support? Fall back to Reno. Also need to clear
+ * ECT from sk since it is set during 3WHS for DCTCP.
+ */
+ inet_csk(sk)->icsk_ca_ops = &dctcp_reno;
+ INET_ECN_dontxmit(sk);
+}
+
+static u32 dctcp_ssthresh(struct sock *sk)
+{
+ const struct dctcp *ca = inet_csk_ca(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
+}
+
+/* Minimal DCTP CE state machine:
+ *
+ * S: 0 <- last pkt was non-CE
+ * 1 <- last pkt was CE
+ */
+
+static void dctcp_ce_state_0_to_1(struct sock *sk)
+{
+ struct dctcp *ca = inet_csk_ca(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ /* State has changed from CE=0 to CE=1 and delayed
+ * ACK has not sent yet.
+ */
+ if (!ca->ce_state && ca->delayed_ack_reserved) {
+ u32 tmp_rcv_nxt;
+
+ /* Save current rcv_nxt. */
+ tmp_rcv_nxt = tp->rcv_nxt;
+
+ /* Generate previous ack with CE=0. */
+ tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
+ tp->rcv_nxt = ca->prior_rcv_nxt;
+
+ tcp_send_ack(sk);
+
+ /* Recover current rcv_nxt. */
+ tp->rcv_nxt = tmp_rcv_nxt;
+ }
+
+ ca->prior_rcv_nxt = tp->rcv_nxt;
+ ca->ce_state = 1;
+
+ tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+}
+
+static void dctcp_ce_state_1_to_0(struct sock *sk)
+{
+ struct dctcp *ca = inet_csk_ca(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ /* State has changed from CE=1 to CE=0 and delayed
+ * ACK has not sent yet.
+ */
+ if (ca->ce_state && ca->delayed_ack_reserved) {
+ u32 tmp_rcv_nxt;
+
+ /* Save current rcv_nxt. */
+ tmp_rcv_nxt = tp->rcv_nxt;
+
+ /* Generate previous ack with CE=1. */
+ tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+ tp->rcv_nxt = ca->prior_rcv_nxt;
+
+ tcp_send_ack(sk);
+
+ /* Recover current rcv_nxt. */
+ tp->rcv_nxt = tmp_rcv_nxt;
+ }
+
+ ca->prior_rcv_nxt = tp->rcv_nxt;
+ ca->ce_state = 0;
+
+ tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
+}
+
+static void dctcp_update_alpha(struct sock *sk, u32 flags)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ struct dctcp *ca = inet_csk_ca(sk);
+ u32 acked_bytes = tp->snd_una - ca->prior_snd_una;
+
+ /* If ack did not advance snd_una, count dupack as MSS size.
+ * If ack did update window, do not count it at all.
+ */
+ if (acked_bytes == 0 && !(flags & CA_ACK_WIN_UPDATE))
+ acked_bytes = inet_csk(sk)->icsk_ack.rcv_mss;
+ if (acked_bytes) {
+ ca->acked_bytes_total += acked_bytes;
+ ca->prior_snd_una = tp->snd_una;
+
+ if (flags & CA_ACK_ECE)
+ ca->acked_bytes_ecn += acked_bytes;
+ }
+
+ /* Expired RTT */
+ if (!before(tp->snd_una, ca->next_seq)) {
+ /* For avoiding denominator == 1. */
+ if (ca->acked_bytes_total == 0)
+ ca->acked_bytes_total = 1;
+
+ /* alpha = (1 - g) * alpha + g * F */
+ ca->dctcp_alpha = ca->dctcp_alpha -
+ (ca->dctcp_alpha >> dctcp_shift_g) +
+ (ca->acked_bytes_ecn << (10U - dctcp_shift_g)) /
+ ca->acked_bytes_total;
+
+ if (ca->dctcp_alpha > DCTCP_MAX_ALPHA)
+ /* Clamp dctcp_alpha to max. */
+ ca->dctcp_alpha = DCTCP_MAX_ALPHA;
+
+ dctcp_reset(tp, ca);
+ }
+}
+
+static void dctcp_state(struct sock *sk, u8 new_state)
+{
+ if (dctcp_clamp_alpha_on_loss && new_state == TCP_CA_Loss) {
+ struct dctcp *ca = inet_csk_ca(sk);
+
+ /* If this extension is enabled, we clamp dctcp_alpha to
+ * max on packet loss; the motivation is that dctcp_alpha
+ * is an indicator to the extend of congestion and packet
+ * loss is an indicator of extreme congestion; setting
+ * this in practice turned out to be beneficial, and
+ * effectively assumes total congestion which reduces the
+ * window by half.
+ */
+ ca->dctcp_alpha = DCTCP_MAX_ALPHA;
+ }
+}
+
+static void dctcp_update_ack_reserved(struct sock *sk, enum tcp_ca_event ev)
+{
+ struct dctcp *ca = inet_csk_ca(sk);
+
+ switch (ev) {
+ case CA_EVENT_DELAYED_ACK:
+ if (!ca->delayed_ack_reserved)
+ ca->delayed_ack_reserved = 1;
+ break;
+ case CA_EVENT_NON_DELAYED_ACK:
+ if (ca->delayed_ack_reserved)
+ ca->delayed_ack_reserved = 0;
+ break;
+ default:
+ /* Don't care for the rest. */
+ break;
+ }
+}
+
+static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
+{
+ switch (ev) {
+ case CA_EVENT_ECN_IS_CE:
+ dctcp_ce_state_0_to_1(sk);
+ break;
+ case CA_EVENT_ECN_NO_CE:
+ dctcp_ce_state_1_to_0(sk);
+ break;
+ case CA_EVENT_DELAYED_ACK:
+ case CA_EVENT_NON_DELAYED_ACK:
+ dctcp_update_ack_reserved(sk, ev);
+ break;
+ default:
+ /* Don't care for the rest. */
+ break;
+ }
+}
+
+static void dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
+{
+ const struct dctcp *ca = inet_csk_ca(sk);
+
+ /* Fill it also in case of VEGASINFO due to req struct limits.
+ * We can still correctly retrieve it later.
+ */
+ if (ext & (1 << (INET_DIAG_DCTCPINFO - 1)) ||
+ ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
+ struct tcp_dctcp_info info;
+
+ memset(&info, 0, sizeof(info));
+ if (inet_csk(sk)->icsk_ca_ops != &dctcp_reno) {
+ info.dctcp_enabled = 1;
+ info.dctcp_ce_state = (u16) ca->ce_state;
+ info.dctcp_alpha = ca->dctcp_alpha;
+ info.dctcp_ab_ecn = ca->acked_bytes_ecn;
+ info.dctcp_ab_tot = ca->acked_bytes_total;
+ }
+
+ nla_put(skb, INET_DIAG_DCTCPINFO, sizeof(info), &info);
+ }
+}
+
+static struct tcp_congestion_ops dctcp __read_mostly = {
+ .init = dctcp_init,
+ .in_ack_event = dctcp_update_alpha,
+ .cwnd_event = dctcp_cwnd_event,
+ .ssthresh = dctcp_ssthresh,
+ .cong_avoid = tcp_reno_cong_avoid,
+ .set_state = dctcp_state,
+ .get_info = dctcp_get_info,
+ .flags = TCP_CONG_NEEDS_ECN,
+ .owner = THIS_MODULE,
+ .name = "dctcp",
+};
+
+static struct tcp_congestion_ops dctcp_reno __read_mostly = {
+ .ssthresh = tcp_reno_ssthresh,
+ .cong_avoid = tcp_reno_cong_avoid,
+ .get_info = dctcp_get_info,
+ .owner = THIS_MODULE,
+ .name = "dctcp-reno",
+};
+
+static int __init dctcp_register(void)
+{
+ BUILD_BUG_ON(sizeof(struct dctcp) > ICSK_CA_PRIV_SIZE);
+ return tcp_register_congestion_control(&dctcp);
+}
+
+static void __exit dctcp_unregister(void)
+{
+ tcp_unregister_congestion_control(&dctcp);
+}
+
+module_init(dctcp_register);
+module_exit(dctcp_unregister);
+
+MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_AUTHOR("Glenn Judd <glenn.judd@morganstanley.com>");
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("DataCenter TCP (DCTCP)");
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index ed3f2ad42e0f..0d73f9ddb55b 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -9,7 +9,6 @@
* 2 of the License, or (at your option) any later version.
*/
-
#include <linux/module.h>
#include <linux/inet_diag.h>
@@ -35,13 +34,13 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
}
static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
- struct inet_diag_req_v2 *r, struct nlattr *bc)
+ struct inet_diag_req_v2 *r, struct nlattr *bc)
{
inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r, bc);
}
static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
- struct inet_diag_req_v2 *req)
+ struct inet_diag_req_v2 *req)
{
return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req);
}
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 9771563ab564..815c85e3b1e0 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -115,7 +115,7 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) {
struct in6_addr *buf = (struct in6_addr *) tmp.val;
- int i = 4;
+ int i;
for (i = 0; i < 4; i++)
buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i];
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 1c4908280d92..882c08aae2f5 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -9,7 +9,6 @@
#include <linux/module.h>
#include <net/tcp.h>
-
/* From AIMD tables from RFC 3649 appendix B,
* with fixed-point MD scaled <<8.
*/
@@ -17,78 +16,78 @@ static const struct hstcp_aimd_val {
unsigned int cwnd;
unsigned int md;
} hstcp_aimd_vals[] = {
- { 38, 128, /* 0.50 */ },
- { 118, 112, /* 0.44 */ },
- { 221, 104, /* 0.41 */ },
- { 347, 98, /* 0.38 */ },
- { 495, 93, /* 0.37 */ },
- { 663, 89, /* 0.35 */ },
- { 851, 86, /* 0.34 */ },
- { 1058, 83, /* 0.33 */ },
- { 1284, 81, /* 0.32 */ },
- { 1529, 78, /* 0.31 */ },
- { 1793, 76, /* 0.30 */ },
- { 2076, 74, /* 0.29 */ },
- { 2378, 72, /* 0.28 */ },
- { 2699, 71, /* 0.28 */ },
- { 3039, 69, /* 0.27 */ },
- { 3399, 68, /* 0.27 */ },
- { 3778, 66, /* 0.26 */ },
- { 4177, 65, /* 0.26 */ },
- { 4596, 64, /* 0.25 */ },
- { 5036, 62, /* 0.25 */ },
- { 5497, 61, /* 0.24 */ },
- { 5979, 60, /* 0.24 */ },
- { 6483, 59, /* 0.23 */ },
- { 7009, 58, /* 0.23 */ },
- { 7558, 57, /* 0.22 */ },
- { 8130, 56, /* 0.22 */ },
- { 8726, 55, /* 0.22 */ },
- { 9346, 54, /* 0.21 */ },
- { 9991, 53, /* 0.21 */ },
- { 10661, 52, /* 0.21 */ },
- { 11358, 52, /* 0.20 */ },
- { 12082, 51, /* 0.20 */ },
- { 12834, 50, /* 0.20 */ },
- { 13614, 49, /* 0.19 */ },
- { 14424, 48, /* 0.19 */ },
- { 15265, 48, /* 0.19 */ },
- { 16137, 47, /* 0.19 */ },
- { 17042, 46, /* 0.18 */ },
- { 17981, 45, /* 0.18 */ },
- { 18955, 45, /* 0.18 */ },
- { 19965, 44, /* 0.17 */ },
- { 21013, 43, /* 0.17 */ },
- { 22101, 43, /* 0.17 */ },
- { 23230, 42, /* 0.17 */ },
- { 24402, 41, /* 0.16 */ },
- { 25618, 41, /* 0.16 */ },
- { 26881, 40, /* 0.16 */ },
- { 28193, 39, /* 0.16 */ },
- { 29557, 39, /* 0.15 */ },
- { 30975, 38, /* 0.15 */ },
- { 32450, 38, /* 0.15 */ },
- { 33986, 37, /* 0.15 */ },
- { 35586, 36, /* 0.14 */ },
- { 37253, 36, /* 0.14 */ },
- { 38992, 35, /* 0.14 */ },
- { 40808, 35, /* 0.14 */ },
- { 42707, 34, /* 0.13 */ },
- { 44694, 33, /* 0.13 */ },
- { 46776, 33, /* 0.13 */ },
- { 48961, 32, /* 0.13 */ },
- { 51258, 32, /* 0.13 */ },
- { 53677, 31, /* 0.12 */ },
- { 56230, 30, /* 0.12 */ },
- { 58932, 30, /* 0.12 */ },
- { 61799, 29, /* 0.12 */ },
- { 64851, 28, /* 0.11 */ },
- { 68113, 28, /* 0.11 */ },
- { 71617, 27, /* 0.11 */ },
- { 75401, 26, /* 0.10 */ },
- { 79517, 26, /* 0.10 */ },
- { 84035, 25, /* 0.10 */ },
- { 89053, 24, /* 0.10 */ },
+ { 38, 128, /* 0.50 */ },
+ { 118, 112, /* 0.44 */ },
+ { 221, 104, /* 0.41 */ },
+ { 347, 98, /* 0.38 */ },
+ { 495, 93, /* 0.37 */ },
+ { 663, 89, /* 0.35 */ },
+ { 851, 86, /* 0.34 */ },
+ { 1058, 83, /* 0.33 */ },
+ { 1284, 81, /* 0.32 */ },
+ { 1529, 78, /* 0.31 */ },
+ { 1793, 76, /* 0.30 */ },
+ { 2076, 74, /* 0.29 */ },
+ { 2378, 72, /* 0.28 */ },
+ { 2699, 71, /* 0.28 */ },
+ { 3039, 69, /* 0.27 */ },
+ { 3399, 68, /* 0.27 */ },
+ { 3778, 66, /* 0.26 */ },
+ { 4177, 65, /* 0.26 */ },
+ { 4596, 64, /* 0.25 */ },
+ { 5036, 62, /* 0.25 */ },
+ { 5497, 61, /* 0.24 */ },
+ { 5979, 60, /* 0.24 */ },
+ { 6483, 59, /* 0.23 */ },
+ { 7009, 58, /* 0.23 */ },
+ { 7558, 57, /* 0.22 */ },
+ { 8130, 56, /* 0.22 */ },
+ { 8726, 55, /* 0.22 */ },
+ { 9346, 54, /* 0.21 */ },
+ { 9991, 53, /* 0.21 */ },
+ { 10661, 52, /* 0.21 */ },
+ { 11358, 52, /* 0.20 */ },
+ { 12082, 51, /* 0.20 */ },
+ { 12834, 50, /* 0.20 */ },
+ { 13614, 49, /* 0.19 */ },
+ { 14424, 48, /* 0.19 */ },
+ { 15265, 48, /* 0.19 */ },
+ { 16137, 47, /* 0.19 */ },
+ { 17042, 46, /* 0.18 */ },
+ { 17981, 45, /* 0.18 */ },
+ { 18955, 45, /* 0.18 */ },
+ { 19965, 44, /* 0.17 */ },
+ { 21013, 43, /* 0.17 */ },
+ { 22101, 43, /* 0.17 */ },
+ { 23230, 42, /* 0.17 */ },
+ { 24402, 41, /* 0.16 */ },
+ { 25618, 41, /* 0.16 */ },
+ { 26881, 40, /* 0.16 */ },
+ { 28193, 39, /* 0.16 */ },
+ { 29557, 39, /* 0.15 */ },
+ { 30975, 38, /* 0.15 */ },
+ { 32450, 38, /* 0.15 */ },
+ { 33986, 37, /* 0.15 */ },
+ { 35586, 36, /* 0.14 */ },
+ { 37253, 36, /* 0.14 */ },
+ { 38992, 35, /* 0.14 */ },
+ { 40808, 35, /* 0.14 */ },
+ { 42707, 34, /* 0.13 */ },
+ { 44694, 33, /* 0.13 */ },
+ { 46776, 33, /* 0.13 */ },
+ { 48961, 32, /* 0.13 */ },
+ { 51258, 32, /* 0.13 */ },
+ { 53677, 31, /* 0.12 */ },
+ { 56230, 30, /* 0.12 */ },
+ { 58932, 30, /* 0.12 */ },
+ { 61799, 29, /* 0.12 */ },
+ { 64851, 28, /* 0.11 */ },
+ { 68113, 28, /* 0.11 */ },
+ { 71617, 27, /* 0.11 */ },
+ { 75401, 26, /* 0.10 */ },
+ { 79517, 26, /* 0.10 */ },
+ { 84035, 25, /* 0.10 */ },
+ { 89053, 24, /* 0.10 */ },
};
#define HSTCP_AIMD_MAX ARRAY_SIZE(hstcp_aimd_vals)
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 031361311a8b..58469fff6c18 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -98,7 +98,8 @@ static inline void measure_rtt(struct sock *sk, u32 srtt)
}
}
-static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, s32 rtt)
+static void measure_achieved_throughput(struct sock *sk,
+ u32 pkts_acked, s32 rtt)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
@@ -148,8 +149,8 @@ static inline void htcp_beta_update(struct htcp *ca, u32 minRTT, u32 maxRTT)
if (use_bandwidth_switch) {
u32 maxB = ca->maxB;
u32 old_maxB = ca->old_maxB;
- ca->old_maxB = ca->maxB;
+ ca->old_maxB = ca->maxB;
if (!between(5 * maxB, 4 * old_maxB, 6 * old_maxB)) {
ca->beta = BETA_MIN;
ca->modeswitch = 0;
@@ -270,6 +271,7 @@ static void htcp_state(struct sock *sk, u8 new_state)
case TCP_CA_Open:
{
struct htcp *ca = inet_csk_ca(sk);
+
if (ca->undo_last_cong) {
ca->last_cong = jiffies;
ca->undo_last_cong = 0;
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index d8f8f05a4951..f963b274f2b0 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -29,7 +29,6 @@ static int rtt0 = 25;
module_param(rtt0, int, 0644);
MODULE_PARM_DESC(rtt0, "reference rout trip time (ms)");
-
/* This is called to refresh values for hybla parameters */
static inline void hybla_recalc_param (struct sock *sk)
{
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 5999b3972e64..1d5a30a90adf 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -284,7 +284,7 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked)
delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT;
if (delta >= tp->snd_cwnd) {
tp->snd_cwnd = min(tp->snd_cwnd + delta / tp->snd_cwnd,
- (u32) tp->snd_cwnd_clamp);
+ (u32)tp->snd_cwnd_clamp);
tp->snd_cwnd_cnt = 0;
}
}
@@ -299,7 +299,6 @@ static u32 tcp_illinois_ssthresh(struct sock *sk)
return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U);
}
-
/* Extract info for Tcp socket info provided via netlink. */
static void tcp_illinois_info(struct sock *sk, u32 ext,
struct sk_buff *skb)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 40639c288dc2..00a41499d52c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -73,7 +73,7 @@
#include <net/inet_common.h>
#include <linux/ipsec.h>
#include <asm/unaligned.h>
-#include <net/netdma.h>
+#include <linux/errqueue.h>
int sysctl_tcp_timestamps __read_mostly = 1;
int sysctl_tcp_window_scaling __read_mostly = 1;
@@ -200,28 +200,25 @@ static inline bool tcp_in_quickack_mode(const struct sock *sk)
return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong;
}
-static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp)
+static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
{
if (tp->ecn_flags & TCP_ECN_OK)
tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
}
-static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb)
+static void tcp_ecn_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb)
{
if (tcp_hdr(skb)->cwr)
tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
}
-static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp)
+static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
{
tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
}
-static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
+static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
{
- if (!(tp->ecn_flags & TCP_ECN_OK))
- return;
-
switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
case INET_ECN_NOT_ECT:
/* Funny extension: if ECT is not set on a segment,
@@ -232,30 +229,43 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *s
tcp_enter_quickack_mode((struct sock *)tp);
break;
case INET_ECN_CE:
+ if (tcp_ca_needs_ecn((struct sock *)tp))
+ tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_IS_CE);
+
if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
/* Better not delay acks, sender can have a very low cwnd */
tcp_enter_quickack_mode((struct sock *)tp);
tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
}
- /* fallinto */
+ tp->ecn_flags |= TCP_ECN_SEEN;
+ break;
default:
+ if (tcp_ca_needs_ecn((struct sock *)tp))
+ tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_NO_CE);
tp->ecn_flags |= TCP_ECN_SEEN;
+ break;
}
}
-static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
+static void tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
+{
+ if (tp->ecn_flags & TCP_ECN_OK)
+ __tcp_ecn_check_ce(tp, skb);
+}
+
+static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
{
if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
tp->ecn_flags &= ~TCP_ECN_OK;
}
-static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
+static void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
{
if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
tp->ecn_flags &= ~TCP_ECN_OK;
}
-static bool TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
+static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
{
if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
return true;
@@ -652,7 +662,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
}
icsk->icsk_ack.lrcvtime = now;
- TCP_ECN_check_ce(tp, skb);
+ tcp_ecn_check_ce(tp, skb);
if (skb->len >= 128)
tcp_grow_window(sk, skb);
@@ -1294,9 +1304,9 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
TCP_SKB_CB(prev)->end_seq += shifted;
TCP_SKB_CB(skb)->seq += shifted;
- skb_shinfo(prev)->gso_segs += pcount;
- BUG_ON(skb_shinfo(skb)->gso_segs < pcount);
- skb_shinfo(skb)->gso_segs -= pcount;
+ tcp_skb_pcount_add(prev, pcount);
+ BUG_ON(tcp_skb_pcount(skb) < pcount);
+ tcp_skb_pcount_add(skb, -pcount);
/* When we're adding to gso_segs == 1, gso_size will be zero,
* in theory this shouldn't be necessary but as long as DSACK
@@ -1309,7 +1319,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
}
/* CHECKME: To clear or not to clear? Mimics normal skb currently */
- if (skb_shinfo(skb)->gso_segs <= 1) {
+ if (tcp_skb_pcount(skb) <= 1) {
skb_shinfo(skb)->gso_size = 0;
skb_shinfo(skb)->gso_type = 0;
}
@@ -1887,33 +1897,34 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
tp->sacked_out = 0;
}
-static void tcp_clear_retrans_partial(struct tcp_sock *tp)
+void tcp_clear_retrans(struct tcp_sock *tp)
{
tp->retrans_out = 0;
tp->lost_out = 0;
-
tp->undo_marker = 0;
tp->undo_retrans = -1;
+ tp->fackets_out = 0;
+ tp->sacked_out = 0;
}
-void tcp_clear_retrans(struct tcp_sock *tp)
+static inline void tcp_init_undo(struct tcp_sock *tp)
{
- tcp_clear_retrans_partial(tp);
-
- tp->fackets_out = 0;
- tp->sacked_out = 0;
+ tp->undo_marker = tp->snd_una;
+ /* Retransmission still in flight may cause DSACKs later. */
+ tp->undo_retrans = tp->retrans_out ? : -1;
}
-/* Enter Loss state. If "how" is not zero, forget all SACK information
+/* Enter Loss state. If we detect SACK reneging, forget all SACK information
* and reset tags completely, otherwise preserve SACKs. If receiver
* dropped its ofo queue, we will know this due to reneging detection.
*/
-void tcp_enter_loss(struct sock *sk, int how)
+void tcp_enter_loss(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
bool new_recovery = false;
+ bool is_reneg; /* is receiver reneging on SACKs? */
/* Reduce ssthresh if it has not yet been made inside this window. */
if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
@@ -1923,18 +1934,22 @@ void tcp_enter_loss(struct sock *sk, int how)
tp->prior_ssthresh = tcp_current_ssthresh(sk);
tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
tcp_ca_event(sk, CA_EVENT_LOSS);
+ tcp_init_undo(tp);
}
tp->snd_cwnd = 1;
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_time_stamp;
- tcp_clear_retrans_partial(tp);
+ tp->retrans_out = 0;
+ tp->lost_out = 0;
if (tcp_is_reno(tp))
tcp_reset_reno_sack(tp);
- tp->undo_marker = tp->snd_una;
- if (how) {
+ skb = tcp_write_queue_head(sk);
+ is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
+ if (is_reneg) {
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
tp->sacked_out = 0;
tp->fackets_out = 0;
}
@@ -1944,11 +1959,8 @@ void tcp_enter_loss(struct sock *sk, int how)
if (skb == tcp_send_head(sk))
break;
- if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
- tp->undo_marker = 0;
-
TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
- if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
+ if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || is_reneg) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
tp->lost_out += tcp_skb_pcount(skb);
@@ -1966,7 +1978,7 @@ void tcp_enter_loss(struct sock *sk, int how)
sysctl_tcp_reordering);
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->snd_nxt;
- TCP_ECN_queue_cwr(tp);
+ tcp_ecn_queue_cwr(tp);
/* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
* loss recovery is underway except recurring timeout(s) on
@@ -1981,19 +1993,21 @@ void tcp_enter_loss(struct sock *sk, int how)
* remembered SACKs do not reflect real state of receiver i.e.
* receiver _host_ is heavily congested (or buggy).
*
- * Do processing similar to RTO timeout.
+ * To avoid big spurious retransmission bursts due to transient SACK
+ * scoreboard oddities that look like reneging, we give the receiver a
+ * little time (max(RTT/2, 10ms)) to send us some more ACKs that will
+ * restore sanity to the SACK scoreboard. If the apparent reneging
+ * persists until this RTO then we'll clear the SACK scoreboard.
*/
static bool tcp_check_sack_reneging(struct sock *sk, int flag)
{
if (flag & FLAG_SACK_RENEGING) {
- struct inet_connection_sock *icsk = inet_csk(sk);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
+ struct tcp_sock *tp = tcp_sk(sk);
+ unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4),
+ msecs_to_jiffies(10));
- tcp_enter_loss(sk, 1);
- icsk->icsk_retransmits++;
- tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
- icsk->icsk_rto, TCP_RTO_MAX);
+ delay, TCP_RTO_MAX);
return true;
}
return false;
@@ -2356,7 +2370,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
if (tp->prior_ssthresh > tp->snd_ssthresh) {
tp->snd_ssthresh = tp->prior_ssthresh;
- TCP_ECN_withdraw_cwr(tp);
+ tcp_ecn_withdraw_cwr(tp);
}
} else {
tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
@@ -2475,7 +2489,7 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
* losses and/or application stalls), do not perform any further cwnd
* reductions, but instead slow start up to ssthresh.
*/
-static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
+static void tcp_init_cwnd_reduction(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -2485,9 +2499,8 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
tp->prior_cwnd = tp->snd_cwnd;
tp->prr_delivered = 0;
tp->prr_out = 0;
- if (set_ssthresh)
- tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
- TCP_ECN_queue_cwr(tp);
+ tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
+ tcp_ecn_queue_cwr(tp);
}
static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
@@ -2528,14 +2541,14 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk)
}
/* Enter CWR state. Disable cwnd undo since congestion is proven with ECN */
-void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
+void tcp_enter_cwr(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
tp->prior_ssthresh = 0;
if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
tp->undo_marker = 0;
- tcp_init_cwnd_reduction(sk, set_ssthresh);
+ tcp_init_cwnd_reduction(sk);
tcp_set_ca_state(sk, TCP_CA_CWR);
}
}
@@ -2564,7 +2577,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
tp->retrans_stamp = 0;
if (flag & FLAG_ECE)
- tcp_enter_cwr(sk, 1);
+ tcp_enter_cwr(sk);
if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
tcp_try_keep_open(sk);
@@ -2664,13 +2677,12 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
NET_INC_STATS_BH(sock_net(sk), mib_idx);
tp->prior_ssthresh = 0;
- tp->undo_marker = tp->snd_una;
- tp->undo_retrans = tp->retrans_out ? : -1;
+ tcp_init_undo(tp);
if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
if (!ece_ack)
tp->prior_ssthresh = tcp_current_ssthresh(sk);
- tcp_init_cwnd_reduction(sk, true);
+ tcp_init_cwnd_reduction(sk);
}
tcp_set_ca_state(sk, TCP_CA_Recovery);
}
@@ -2680,7 +2692,6 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
*/
static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
{
- struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
bool recovered = !before(tp->snd_una, tp->high_seq);
@@ -2706,12 +2717,9 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
if (recovered) {
/* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */
- icsk->icsk_retransmits = 0;
tcp_try_undo_recovery(sk);
return;
}
- if (flag & FLAG_DATA_ACKED)
- icsk->icsk_retransmits = 0;
if (tcp_is_reno(tp)) {
/* A Reno DUPACK means new data in F-RTO step 2.b above are
* delivered. Lower inflight to clock out (re)tranmissions.
@@ -2968,7 +2976,8 @@ void tcp_rearm_rto(struct sock *sk)
if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
struct sk_buff *skb = tcp_write_queue_head(sk);
- const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto;
+ const u32 rto_time_stamp =
+ tcp_skb_timestamp(skb) + rto;
s32 delta = (s32)(rto_time_stamp - tcp_time_stamp);
/* delta may not be positive if the socket is locked
* when the retrans timer fires and is rescheduled.
@@ -3043,10 +3052,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
first_ackt.v64 = 0;
while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
u8 sacked = scb->sacked;
u32 acked_pcount;
+ if (unlikely(shinfo->tx_flags & SKBTX_ACK_TSTAMP) &&
+ between(shinfo->tskey, prior_snd_una, tp->snd_una - 1))
+ __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+
/* Determine how many packets and what bytes were acked, tso and else */
if (after(scb->end_seq, tp->snd_una)) {
if (tcp_skb_pcount(skb) == 1 ||
@@ -3203,9 +3217,10 @@ static void tcp_ack_probe(struct sock *sk)
* This function is not for random using!
*/
} else {
+ unsigned long when = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
+
inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
- min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
- TCP_RTO_MAX);
+ when, TCP_RTO_MAX);
}
}
@@ -3346,7 +3361,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
tp->tlp_high_seq = 0;
/* Don't reduce cwnd if DSACK arrives for TLP retrans. */
if (!(flag & FLAG_DSACKING_ACK)) {
- tcp_init_cwnd_reduction(sk, true);
+ tcp_init_cwnd_reduction(sk);
tcp_set_ca_state(sk, TCP_CA_CWR);
tcp_end_cwnd_reduction(sk);
tcp_try_keep_open(sk);
@@ -3356,6 +3371,14 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
}
}
+static inline void tcp_in_ack_event(struct sock *sk, u32 flags)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+
+ if (icsk->icsk_ca_ops->in_ack_event)
+ icsk->icsk_ca_ops->in_ack_event(sk, flags);
+}
+
/* This routine deals with incoming acks, but not outgoing ones. */
static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
{
@@ -3393,8 +3416,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
tcp_rearm_rto(sk);
- if (after(ack, prior_snd_una))
+ if (after(ack, prior_snd_una)) {
flag |= FLAG_SND_UNA_ADVANCED;
+ icsk->icsk_retransmits = 0;
+ }
prior_fackets = tp->fackets_out;
@@ -3413,10 +3438,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tp->snd_una = ack;
flag |= FLAG_WIN_UPDATE;
- tcp_ca_event(sk, CA_EVENT_FAST_ACK);
+ tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS);
} else {
+ u32 ack_ev_flags = CA_ACK_SLOWPATH;
+
if (ack_seq != TCP_SKB_CB(skb)->end_seq)
flag |= FLAG_DATA;
else
@@ -3428,10 +3455,15 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
&sack_rtt_us);
- if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
+ if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
flag |= FLAG_ECE;
+ ack_ev_flags |= CA_ACK_ECE;
+ }
- tcp_ca_event(sk, CA_EVENT_SLOW_ACK);
+ if (flag & FLAG_WIN_UPDATE)
+ ack_ev_flags |= CA_ACK_WIN_UPDATE;
+
+ tcp_in_ack_event(sk, ack_ev_flags);
}
/* We passed data and got it acked, remove any soft error
@@ -4053,6 +4085,44 @@ static void tcp_sack_remove(struct tcp_sock *tp)
tp->rx_opt.num_sacks = num_sacks;
}
+/**
+ * tcp_try_coalesce - try to merge skb to prior one
+ * @sk: socket
+ * @to: prior buffer
+ * @from: buffer to add in queue
+ * @fragstolen: pointer to boolean
+ *
+ * Before queueing skb @from after @to, try to merge them
+ * to reduce overall memory use and queue lengths, if cost is small.
+ * Packets in ofo or receive queues can stay a long time.
+ * Better try to coalesce them right now to avoid future collapses.
+ * Returns true if caller should free @from instead of queueing it
+ */
+static bool tcp_try_coalesce(struct sock *sk,
+ struct sk_buff *to,
+ struct sk_buff *from,
+ bool *fragstolen)
+{
+ int delta;
+
+ *fragstolen = false;
+
+ /* Its possible this segment overlaps with prior segment in queue */
+ if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
+ return false;
+
+ if (!skb_try_coalesce(to, from, fragstolen, &delta))
+ return false;
+
+ atomic_add(delta, &sk->sk_rmem_alloc);
+ sk_mem_charge(sk, delta);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
+ TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
+ TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
+ TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
+ return true;
+}
+
/* This one checks to see if we can put data from the
* out_of_order queue into the receive_queue.
*/
@@ -4060,7 +4130,8 @@ static void tcp_ofo_queue(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
__u32 dsack_high = tp->rcv_nxt;
- struct sk_buff *skb;
+ struct sk_buff *skb, *tail;
+ bool fragstolen, eaten;
while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
@@ -4073,9 +4144,9 @@ static void tcp_ofo_queue(struct sock *sk)
tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
}
+ __skb_unlink(skb, &tp->out_of_order_queue);
if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
SOCK_DEBUG(sk, "ofo packet was already received\n");
- __skb_unlink(skb, &tp->out_of_order_queue);
__kfree_skb(skb);
continue;
}
@@ -4083,11 +4154,15 @@ static void tcp_ofo_queue(struct sock *sk)
tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
TCP_SKB_CB(skb)->end_seq);
- __skb_unlink(skb, &tp->out_of_order_queue);
- __skb_queue_tail(&sk->sk_receive_queue, skb);
+ tail = skb_peek_tail(&sk->sk_receive_queue);
+ eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
- if (tcp_hdr(skb)->fin)
+ if (!eaten)
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
tcp_fin(sk);
+ if (eaten)
+ kfree_skb_partial(skb, fragstolen);
}
}
@@ -4114,53 +4189,13 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
return 0;
}
-/**
- * tcp_try_coalesce - try to merge skb to prior one
- * @sk: socket
- * @to: prior buffer
- * @from: buffer to add in queue
- * @fragstolen: pointer to boolean
- *
- * Before queueing skb @from after @to, try to merge them
- * to reduce overall memory use and queue lengths, if cost is small.
- * Packets in ofo or receive queues can stay a long time.
- * Better try to coalesce them right now to avoid future collapses.
- * Returns true if caller should free @from instead of queueing it
- */
-static bool tcp_try_coalesce(struct sock *sk,
- struct sk_buff *to,
- struct sk_buff *from,
- bool *fragstolen)
-{
- int delta;
-
- *fragstolen = false;
-
- if (tcp_hdr(from)->fin)
- return false;
-
- /* Its possible this segment overlaps with prior segment in queue */
- if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
- return false;
-
- if (!skb_try_coalesce(to, from, fragstolen, &delta))
- return false;
-
- atomic_add(delta, &sk->sk_rmem_alloc);
- sk_mem_charge(sk, delta);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
- TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
- TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
- return true;
-}
-
static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb1;
u32 seq, end_seq;
- TCP_ECN_check_ce(tp, skb);
+ tcp_ecn_check_ce(tp, skb);
if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP);
@@ -4299,24 +4334,19 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
{
- struct sk_buff *skb = NULL;
- struct tcphdr *th;
+ struct sk_buff *skb;
bool fragstolen;
if (size == 0)
return 0;
- skb = alloc_skb(size + sizeof(*th), sk->sk_allocation);
+ skb = alloc_skb(size, sk->sk_allocation);
if (!skb)
goto err;
- if (tcp_try_rmem_schedule(sk, skb, size + sizeof(*th)))
+ if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
goto err_free;
- th = (struct tcphdr *)skb_put(skb, sizeof(*th));
- skb_reset_transport_header(skb);
- memset(th, 0, sizeof(*th));
-
if (memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size))
goto err_free;
@@ -4324,7 +4354,7 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
- if (tcp_queue_rcv(sk, skb, sizeof(*th), &fragstolen)) {
+ if (tcp_queue_rcv(sk, skb, 0, &fragstolen)) {
WARN_ON_ONCE(fragstolen); /* should not happen */
__kfree_skb(skb);
}
@@ -4338,7 +4368,6 @@ err:
static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
{
- const struct tcphdr *th = tcp_hdr(skb);
struct tcp_sock *tp = tcp_sk(sk);
int eaten = -1;
bool fragstolen = false;
@@ -4347,9 +4376,9 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
goto drop;
skb_dst_drop(skb);
- __skb_pull(skb, th->doff * 4);
+ __skb_pull(skb, tcp_hdr(skb)->doff * 4);
- TCP_ECN_accept_cwr(tp, skb);
+ tcp_ecn_accept_cwr(tp, skb);
tp->rx_opt.dsack = 0;
@@ -4391,7 +4420,7 @@ queue_and_out:
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
if (skb->len)
tcp_event_data_recv(sk, skb);
- if (th->fin)
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
tcp_fin(sk);
if (!skb_queue_empty(&tp->out_of_order_queue)) {
@@ -4506,7 +4535,7 @@ restart:
* - bloated or contains data before "start" or
* overlaps to the next one.
*/
- if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin &&
+ if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
(tcp_win_from_space(skb->truesize) > skb->len ||
before(TCP_SKB_CB(skb)->seq, start))) {
end_of_skbs = false;
@@ -4525,30 +4554,18 @@ restart:
/* Decided to skip this, advance start seq. */
start = TCP_SKB_CB(skb)->end_seq;
}
- if (end_of_skbs || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin)
+ if (end_of_skbs ||
+ (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
return;
while (before(start, end)) {
+ int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start);
struct sk_buff *nskb;
- unsigned int header = skb_headroom(skb);
- int copy = SKB_MAX_ORDER(header, 0);
- /* Too big header? This can happen with IPv6. */
- if (copy < 0)
- return;
- if (end - start < copy)
- copy = end - start;
- nskb = alloc_skb(copy + header, GFP_ATOMIC);
+ nskb = alloc_skb(copy, GFP_ATOMIC);
if (!nskb)
return;
- skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
- skb_set_network_header(nskb, (skb_network_header(skb) -
- skb->head));
- skb_set_transport_header(nskb, (skb_transport_header(skb) -
- skb->head));
- skb_reserve(nskb, header);
- memcpy(nskb->head, skb->head, header);
memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
__skb_queue_before(list, skb, nskb);
@@ -4572,8 +4589,7 @@ restart:
skb = tcp_collapse_one(sk, skb, list);
if (!skb ||
skb == tail ||
- tcp_hdr(skb)->syn ||
- tcp_hdr(skb)->fin)
+ (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
return;
}
}
@@ -4941,53 +4957,6 @@ static inline bool tcp_checksum_complete_user(struct sock *sk,
__tcp_checksum_complete_user(sk, skb);
}
-#ifdef CONFIG_NET_DMA
-static bool tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
- int hlen)
-{
- struct tcp_sock *tp = tcp_sk(sk);
- int chunk = skb->len - hlen;
- int dma_cookie;
- bool copied_early = false;
-
- if (tp->ucopy.wakeup)
- return false;
-
- if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
- tp->ucopy.dma_chan = net_dma_find_channel();
-
- if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
-
- dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
- skb, hlen,
- tp->ucopy.iov, chunk,
- tp->ucopy.pinned_list);
-
- if (dma_cookie < 0)
- goto out;
-
- tp->ucopy.dma_cookie = dma_cookie;
- copied_early = true;
-
- tp->ucopy.len -= chunk;
- tp->copied_seq += chunk;
- tcp_rcv_space_adjust(sk);
-
- if ((tp->ucopy.len == 0) ||
- (tcp_flag_word(tcp_hdr(skb)) & TCP_FLAG_PSH) ||
- (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) {
- tp->ucopy.wakeup = 1;
- sk->sk_data_ready(sk);
- }
- } else if (chunk > 0) {
- tp->ucopy.wakeup = 1;
- sk->sk_data_ready(sk);
- }
-out:
- return copied_early;
-}
-#endif /* CONFIG_NET_DMA */
-
/* Does PAWS and seqno based validation of an incoming segment, flags will
* play significant role here.
*/
@@ -5167,27 +5136,15 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
}
} else {
int eaten = 0;
- int copied_early = 0;
bool fragstolen = false;
- if (tp->copied_seq == tp->rcv_nxt &&
- len - tcp_header_len <= tp->ucopy.len) {
-#ifdef CONFIG_NET_DMA
- if (tp->ucopy.task == current &&
- sock_owned_by_user(sk) &&
- tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
- copied_early = 1;
- eaten = 1;
- }
-#endif
- if (tp->ucopy.task == current &&
- sock_owned_by_user(sk) && !copied_early) {
- __set_current_state(TASK_RUNNING);
+ if (tp->ucopy.task == current &&
+ tp->copied_seq == tp->rcv_nxt &&
+ len - tcp_header_len <= tp->ucopy.len &&
+ sock_owned_by_user(sk)) {
+ __set_current_state(TASK_RUNNING);
- if (!tcp_copy_to_iovec(sk, skb, tcp_header_len))
- eaten = 1;
- }
- if (eaten) {
+ if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) {
/* Predicted packet is in window by definition.
* seq == rcv_nxt and rcv_wup <= rcv_nxt.
* Hence, check seq<=rcv_wup reduces to:
@@ -5203,9 +5160,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
__skb_pull(skb, tcp_header_len);
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER);
+ eaten = 1;
}
- if (copied_early)
- tcp_cleanup_rbuf(sk, skb->len);
}
if (!eaten) {
if (tcp_checksum_complete_user(sk, skb))
@@ -5242,14 +5198,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
goto no_ack;
}
- if (!copied_early || tp->rcv_nxt != tp->rcv_wup)
- __tcp_ack_snd_check(sk, 0);
+ __tcp_ack_snd_check(sk, 0);
no_ack:
-#ifdef CONFIG_NET_DMA
- if (copied_early)
- __skb_queue_tail(&sk->sk_async_wait_queue, skb);
- else
-#endif
if (eaten)
kfree_skb_partial(skb, fragstolen);
sk->sk_data_ready(sk);
@@ -5443,7 +5393,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
* state to ESTABLISHED..."
*/
- TCP_ECN_rcv_synack(tp, th);
+ tcp_ecn_rcv_synack(tp, th);
tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
tcp_ack(sk, skb, FLAG_SLOWPATH);
@@ -5562,7 +5512,7 @@ discard:
tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
tp->max_window = tp->snd_wnd;
- TCP_ECN_rcv_syn(tp, th);
+ tcp_ecn_rcv_syn(tp, th);
tcp_mtup_init(sk);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
@@ -5877,3 +5827,190 @@ discard:
return 0;
}
EXPORT_SYMBOL(tcp_rcv_state_process);
+
+static inline void pr_drop_req(struct request_sock *req, __u16 port, int family)
+{
+ struct inet_request_sock *ireq = inet_rsk(req);
+
+ if (family == AF_INET)
+ LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
+ &ireq->ir_rmt_addr, port);
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (family == AF_INET6)
+ LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI6/%u\n"),
+ &ireq->ir_v6_rmt_addr, port);
+#endif
+}
+
+/* RFC3168 : 6.1.1 SYN packets must not have ECT/ECN bits set
+ *
+ * If we receive a SYN packet with these bits set, it means a
+ * network is playing bad games with TOS bits. In order to
+ * avoid possible false congestion notifications, we disable
+ * TCP ECN negociation.
+ *
+ * Exception: tcp_ca wants ECN. This is required for DCTCP
+ * congestion control; it requires setting ECT on all packets,
+ * including SYN. We inverse the test in this case: If our
+ * local socket wants ECN, but peer only set ece/cwr (but not
+ * ECT in IP header) its probably a non-DCTCP aware sender.
+ */
+static void tcp_ecn_create_request(struct request_sock *req,
+ const struct sk_buff *skb,
+ const struct sock *listen_sk)
+{
+ const struct tcphdr *th = tcp_hdr(skb);
+ const struct net *net = sock_net(listen_sk);
+ bool th_ecn = th->ece && th->cwr;
+ bool ect, need_ecn;
+
+ if (!th_ecn)
+ return;
+
+ ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
+ need_ecn = tcp_ca_needs_ecn(listen_sk);
+
+ if (!ect && !need_ecn && net->ipv4.sysctl_tcp_ecn)
+ inet_rsk(req)->ecn_ok = 1;
+ else if (ect && need_ecn)
+ inet_rsk(req)->ecn_ok = 1;
+}
+
+int tcp_conn_request(struct request_sock_ops *rsk_ops,
+ const struct tcp_request_sock_ops *af_ops,
+ struct sock *sk, struct sk_buff *skb)
+{
+ struct tcp_options_received tmp_opt;
+ struct request_sock *req;
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct dst_entry *dst = NULL;
+ __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
+ bool want_cookie = false, fastopen;
+ struct flowi fl;
+ struct tcp_fastopen_cookie foc = { .len = -1 };
+ int err;
+
+
+ /* TW buckets are converted to open requests without
+ * limitations, they conserve resources and peer is
+ * evidently real one.
+ */
+ if ((sysctl_tcp_syncookies == 2 ||
+ inet_csk_reqsk_queue_is_full(sk)) && !isn) {
+ want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name);
+ if (!want_cookie)
+ goto drop;
+ }
+
+
+ /* Accept backlog is full. If we have already queued enough
+ * of warm entries in syn queue, drop request. It is better than
+ * clogging syn queue with openreqs with exponentially increasing
+ * timeout.
+ */
+ if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
+ goto drop;
+ }
+
+ req = inet_reqsk_alloc(rsk_ops);
+ if (!req)
+ goto drop;
+
+ tcp_rsk(req)->af_specific = af_ops;
+
+ tcp_clear_options(&tmp_opt);
+ tmp_opt.mss_clamp = af_ops->mss_clamp;
+ tmp_opt.user_mss = tp->rx_opt.user_mss;
+ tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
+
+ if (want_cookie && !tmp_opt.saw_tstamp)
+ tcp_clear_options(&tmp_opt);
+
+ tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
+ tcp_openreq_init(req, &tmp_opt, skb, sk);
+
+ af_ops->init_req(req, sk, skb);
+
+ if (security_inet_conn_request(sk, skb, req))
+ goto drop_and_free;
+
+ if (!want_cookie || tmp_opt.tstamp_ok)
+ tcp_ecn_create_request(req, skb, sk);
+
+ if (want_cookie) {
+ isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
+ req->cookie_ts = tmp_opt.tstamp_ok;
+ } else if (!isn) {
+ /* VJ's idea. We save last timestamp seen
+ * from the destination in peer table, when entering
+ * state TIME-WAIT, and check against it before
+ * accepting new connection request.
+ *
+ * If "isn" is not zero, this request hit alive
+ * timewait bucket, so that all the necessary checks
+ * are made in the function processing timewait state.
+ */
+ if (tcp_death_row.sysctl_tw_recycle) {
+ bool strict;
+
+ dst = af_ops->route_req(sk, &fl, req, &strict);
+
+ if (dst && strict &&
+ !tcp_peer_is_proven(req, dst, true,
+ tmp_opt.saw_tstamp)) {
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
+ goto drop_and_release;
+ }
+ }
+ /* Kill the following clause, if you dislike this way. */
+ else if (!sysctl_tcp_syncookies &&
+ (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+ (sysctl_max_syn_backlog >> 2)) &&
+ !tcp_peer_is_proven(req, dst, false,
+ tmp_opt.saw_tstamp)) {
+ /* Without syncookies last quarter of
+ * backlog is filled with destinations,
+ * proven to be alive.
+ * It means that we continue to communicate
+ * to destinations, already remembered
+ * to the moment of synflood.
+ */
+ pr_drop_req(req, ntohs(tcp_hdr(skb)->source),
+ rsk_ops->family);
+ goto drop_and_release;
+ }
+
+ isn = af_ops->init_seq(skb);
+ }
+ if (!dst) {
+ dst = af_ops->route_req(sk, &fl, req, NULL);
+ if (!dst)
+ goto drop_and_free;
+ }
+
+ tcp_rsk(req)->snt_isn = isn;
+ tcp_openreq_init_rwin(req, sk, dst);
+ fastopen = !want_cookie &&
+ tcp_try_fastopen(sk, skb, req, &foc, dst);
+ err = af_ops->send_synack(sk, dst, &fl, req,
+ skb_get_queue_mapping(skb), &foc);
+ if (!fastopen) {
+ if (err || want_cookie)
+ goto drop_and_free;
+
+ tcp_rsk(req)->listener = NULL;
+ af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+ }
+
+ return 0;
+
+drop_and_release:
+ dst_release(dst);
+drop_and_free:
+ reqsk_free(req);
+drop:
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
+ return 0;
+}
+EXPORT_SYMBOL(tcp_conn_request);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 77cccda1ad0c..552e87e3c269 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -72,7 +72,6 @@
#include <net/inet_common.h>
#include <net/timewait_sock.h>
#include <net/xfrm.h>
-#include <net/netdma.h>
#include <net/secure_seq.h>
#include <net/tcp_memcontrol.h>
#include <net/busy_poll.h>
@@ -90,7 +89,6 @@ int sysctl_tcp_tw_reuse __read_mostly;
int sysctl_tcp_low_latency __read_mostly;
EXPORT_SYMBOL(sysctl_tcp_low_latency);
-
#ifdef CONFIG_TCP_MD5SIG
static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
__be32 daddr, __be32 saddr, const struct tcphdr *th);
@@ -99,7 +97,7 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
struct inet_hashinfo tcp_hashinfo;
EXPORT_SYMBOL(tcp_hashinfo);
-static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
+static __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
{
return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
ip_hdr(skb)->saddr,
@@ -208,6 +206,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_dport = usin->sin_port;
inet->inet_daddr = daddr;
+ inet_set_txhash(sk);
+
inet_csk(sk)->icsk_ext_hdr_len = 0;
if (inet_opt)
inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
@@ -269,7 +269,7 @@ EXPORT_SYMBOL(tcp_v4_connect);
* It can be called through tcp_release_cb() if socket was owned by user
* at the time tcp_v4_err() was called to handle ICMP message.
*/
-static void tcp_v4_mtu_reduced(struct sock *sk)
+void tcp_v4_mtu_reduced(struct sock *sk)
{
struct dst_entry *dst;
struct inet_sock *inet = inet_sk(sk);
@@ -300,6 +300,7 @@ static void tcp_v4_mtu_reduced(struct sock *sk)
tcp_simple_retransmit(sk);
} /* else let the usual retransmit timer handle it */
}
+EXPORT_SYMBOL(tcp_v4_mtu_reduced);
static void do_redirect(struct sk_buff *skb, struct sock *sk)
{
@@ -342,11 +343,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
int err;
struct net *net = dev_net(icmp_skb->dev);
- if (icmp_skb->len < (iph->ihl << 2) + 8) {
- ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
- return;
- }
-
sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
iph->saddr, th->source, inet_iif(icmp_skb));
if (!sk) {
@@ -433,15 +429,16 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
break;
icsk->icsk_backoff--;
- inet_csk(sk)->icsk_rto = (tp->srtt_us ? __tcp_set_rto(tp) :
- TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
- tcp_bound_rto(sk);
+ icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
+ TCP_TIMEOUT_INIT;
+ icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
skb = tcp_write_queue_head(sk);
BUG_ON(!skb);
- remaining = icsk->icsk_rto - min(icsk->icsk_rto,
- tcp_time_stamp - TCP_SKB_CB(skb)->when);
+ remaining = icsk->icsk_rto -
+ min(icsk->icsk_rto,
+ tcp_time_stamp - tcp_skb_timestamp(skb));
if (remaining) {
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
@@ -683,8 +680,9 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
net = dev_net(skb_dst(skb)->dev);
arg.tos = ip_hdr(skb)->tos;
- ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
- ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
+ ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt,
+ ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
+ &arg, arg.iov[0].iov_len);
TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
@@ -766,8 +764,9 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
if (oif)
arg.bound_dev_if = oif;
arg.tos = tos;
- ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
- ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
+ ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt,
+ ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
+ &arg, arg.iov[0].iov_len);
TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
}
@@ -814,6 +813,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
* socket.
*/
static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
+ struct flowi *fl,
struct request_sock *req,
u16 queue_mapping,
struct tcp_fastopen_cookie *foc)
@@ -837,24 +837,11 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
ireq->ir_rmt_addr,
ireq->opt);
err = net_xmit_eval(err);
- if (!tcp_rsk(req)->snt_synack && !err)
- tcp_rsk(req)->snt_synack = tcp_time_stamp;
}
return err;
}
-static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
-{
- int res = tcp_v4_send_synack(sk, NULL, req, 0, NULL);
-
- if (!res) {
- TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
- }
- return res;
-}
-
/*
* IPv4 request_sock destructor.
*/
@@ -898,18 +885,16 @@ EXPORT_SYMBOL(tcp_syn_flood_action);
*/
static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
{
- const struct ip_options *opt = &(IPCB(skb)->opt);
+ const struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
struct ip_options_rcu *dopt = NULL;
if (opt && opt->optlen) {
int opt_size = sizeof(*dopt) + opt->optlen;
dopt = kmalloc(opt_size, GFP_ATOMIC);
- if (dopt) {
- if (ip_options_echo(&dopt->opt, skb)) {
- kfree(dopt);
- dopt = NULL;
- }
+ if (dopt && __ip_options_echo(&dopt->opt, skb, opt)) {
+ kfree(dopt);
+ dopt = NULL;
}
}
return dopt;
@@ -1064,7 +1049,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
if (sin->sin_family != AF_INET)
return -EINVAL;
- if (!cmd.tcpm_key || !cmd.tcpm_keylen)
+ if (!cmd.tcpm_keylen)
return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
AF_INET);
@@ -1182,7 +1167,8 @@ clear_hash_noput:
}
EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
-static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
+static bool __tcp_v4_inbound_md5_hash(struct sock *sk,
+ const struct sk_buff *skb)
{
/*
* This gets called for each TCP segment that arrives
@@ -1235,163 +1221,81 @@ static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
return false;
}
+static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
+{
+ bool ret;
+
+ rcu_read_lock();
+ ret = __tcp_v4_inbound_md5_hash(sk, skb);
+ rcu_read_unlock();
+
+ return ret;
+}
+
#endif
+static void tcp_v4_init_req(struct request_sock *req, struct sock *sk,
+ struct sk_buff *skb)
+{
+ struct inet_request_sock *ireq = inet_rsk(req);
+
+ ireq->ir_loc_addr = ip_hdr(skb)->daddr;
+ ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
+ ireq->no_srccheck = inet_sk(sk)->transparent;
+ ireq->opt = tcp_v4_save_options(skb);
+}
+
+static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl,
+ const struct request_sock *req,
+ bool *strict)
+{
+ struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
+
+ if (strict) {
+ if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
+ *strict = true;
+ else
+ *strict = false;
+ }
+
+ return dst;
+}
+
struct request_sock_ops tcp_request_sock_ops __read_mostly = {
.family = PF_INET,
.obj_size = sizeof(struct tcp_request_sock),
- .rtx_syn_ack = tcp_v4_rtx_synack,
+ .rtx_syn_ack = tcp_rtx_synack,
.send_ack = tcp_v4_reqsk_send_ack,
.destructor = tcp_v4_reqsk_destructor,
.send_reset = tcp_v4_send_reset,
- .syn_ack_timeout = tcp_syn_ack_timeout,
+ .syn_ack_timeout = tcp_syn_ack_timeout,
};
-#ifdef CONFIG_TCP_MD5SIG
static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
+ .mss_clamp = TCP_MSS_DEFAULT,
+#ifdef CONFIG_TCP_MD5SIG
.md5_lookup = tcp_v4_reqsk_md5_lookup,
.calc_md5_hash = tcp_v4_md5_hash_skb,
-};
#endif
+ .init_req = tcp_v4_init_req,
+#ifdef CONFIG_SYN_COOKIES
+ .cookie_init_seq = cookie_v4_init_sequence,
+#endif
+ .route_req = tcp_v4_route_req,
+ .init_seq = tcp_v4_init_sequence,
+ .send_synack = tcp_v4_send_synack,
+ .queue_hash_add = inet_csk_reqsk_queue_hash_add,
+};
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
{
- struct tcp_options_received tmp_opt;
- struct request_sock *req;
- struct inet_request_sock *ireq;
- struct tcp_sock *tp = tcp_sk(sk);
- struct dst_entry *dst = NULL;
- __be32 saddr = ip_hdr(skb)->saddr;
- __be32 daddr = ip_hdr(skb)->daddr;
- __u32 isn = TCP_SKB_CB(skb)->when;
- bool want_cookie = false, fastopen;
- struct flowi4 fl4;
- struct tcp_fastopen_cookie foc = { .len = -1 };
- int err;
-
/* Never answer to SYNs send to broadcast or multicast */
if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
goto drop;
- /* TW buckets are converted to open requests without
- * limitations, they conserve resources and peer is
- * evidently real one.
- */
- if ((sysctl_tcp_syncookies == 2 ||
- inet_csk_reqsk_queue_is_full(sk)) && !isn) {
- want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
- if (!want_cookie)
- goto drop;
- }
+ return tcp_conn_request(&tcp_request_sock_ops,
+ &tcp_request_sock_ipv4_ops, sk, skb);
- /* Accept backlog is full. If we have already queued enough
- * of warm entries in syn queue, drop request. It is better than
- * clogging syn queue with openreqs with exponentially increasing
- * timeout.
- */
- if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
- goto drop;
- }
-
- req = inet_reqsk_alloc(&tcp_request_sock_ops);
- if (!req)
- goto drop;
-
-#ifdef CONFIG_TCP_MD5SIG
- tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
-#endif
-
- tcp_clear_options(&tmp_opt);
- tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
- tmp_opt.user_mss = tp->rx_opt.user_mss;
- tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
-
- if (want_cookie && !tmp_opt.saw_tstamp)
- tcp_clear_options(&tmp_opt);
-
- tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
- tcp_openreq_init(req, &tmp_opt, skb);
-
- ireq = inet_rsk(req);
- ireq->ir_loc_addr = daddr;
- ireq->ir_rmt_addr = saddr;
- ireq->no_srccheck = inet_sk(sk)->transparent;
- ireq->opt = tcp_v4_save_options(skb);
- ireq->ir_mark = inet_request_mark(sk, skb);
-
- if (security_inet_conn_request(sk, skb, req))
- goto drop_and_free;
-
- if (!want_cookie || tmp_opt.tstamp_ok)
- TCP_ECN_create_request(req, skb, sock_net(sk));
-
- if (want_cookie) {
- isn = cookie_v4_init_sequence(sk, skb, &req->mss);
- req->cookie_ts = tmp_opt.tstamp_ok;
- } else if (!isn) {
- /* VJ's idea. We save last timestamp seen
- * from the destination in peer table, when entering
- * state TIME-WAIT, and check against it before
- * accepting new connection request.
- *
- * If "isn" is not zero, this request hit alive
- * timewait bucket, so that all the necessary checks
- * are made in the function processing timewait state.
- */
- if (tmp_opt.saw_tstamp &&
- tcp_death_row.sysctl_tw_recycle &&
- (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
- fl4.daddr == saddr) {
- if (!tcp_peer_is_proven(req, dst, true)) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
- goto drop_and_release;
- }
- }
- /* Kill the following clause, if you dislike this way. */
- else if (!sysctl_tcp_syncookies &&
- (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
- (sysctl_max_syn_backlog >> 2)) &&
- !tcp_peer_is_proven(req, dst, false)) {
- /* Without syncookies last quarter of
- * backlog is filled with destinations,
- * proven to be alive.
- * It means that we continue to communicate
- * to destinations, already remembered
- * to the moment of synflood.
- */
- LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
- &saddr, ntohs(tcp_hdr(skb)->source));
- goto drop_and_release;
- }
-
- isn = tcp_v4_init_sequence(skb);
- }
- if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
- goto drop_and_free;
-
- tcp_rsk(req)->snt_isn = isn;
- tcp_rsk(req)->snt_synack = tcp_time_stamp;
- tcp_openreq_init_rwin(req, sk, dst);
- fastopen = !want_cookie &&
- tcp_try_fastopen(sk, skb, req, &foc, dst);
- err = tcp_v4_send_synack(sk, dst, req,
- skb_get_queue_mapping(skb), &foc);
- if (!fastopen) {
- if (err || want_cookie)
- goto drop_and_free;
-
- tcp_rsk(req)->snt_synack = tcp_time_stamp;
- tcp_rsk(req)->listener = NULL;
- inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
- }
-
- return 0;
-
-drop_and_release:
- dst_release(dst);
-drop_and_free:
- reqsk_free(req);
drop:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
return 0;
@@ -1439,6 +1343,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newinet->mc_ttl = ip_hdr(skb)->ttl;
newinet->rcv_tos = ip_hdr(skb)->tos;
inet_csk(newsk)->icsk_ext_hdr_len = 0;
+ inet_set_txhash(newsk);
if (inet_opt)
inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
newinet->inet_id = newtp->write_seq ^ jiffies;
@@ -1523,7 +1428,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
#ifdef CONFIG_SYN_COOKIES
if (!th->syn)
- sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
+ sk = cookie_v4_check(sk, skb, &TCP_SKB_CB(skb)->header.h4.opt);
#endif
return sk;
}
@@ -1539,16 +1444,6 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
{
struct sock *rsk;
-#ifdef CONFIG_TCP_MD5SIG
- /*
- * We really want to reject the packet as early as possible
- * if:
- * o We're expecting an MD5'd packet and this is no MD5 tcp option
- * o There is an MD5 option and we're not expecting one
- */
- if (tcp_v4_inbound_md5_hash(sk, skb))
- goto discard;
-#endif
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
struct dst_entry *dst = sk->sk_rx_dst;
@@ -1663,7 +1558,17 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
skb_queue_len(&tp->ucopy.prequeue) == 0)
return false;
- skb_dst_force(skb);
+ /* Before escaping RCU protected region, we need to take care of skb
+ * dst. Prequeue is only enabled for established sockets.
+ * For such sockets, we might need the skb dst only to set sk->sk_rx_dst
+ * Instead of doing full sk_rx_dst validity here, let's perform
+ * an optimistic check.
+ */
+ if (likely(sk->sk_rx_dst))
+ skb_dst_drop(skb);
+ else
+ skb_dst_force(skb);
+
__skb_queue_tail(&tp->ucopy.prequeue, skb);
tp->ucopy.memory += skb->truesize;
if (tp->ucopy.memory > sk->sk_rcvbuf) {
@@ -1728,11 +1633,19 @@ int tcp_v4_rcv(struct sk_buff *skb)
th = tcp_hdr(skb);
iph = ip_hdr(skb);
+ /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
+ * barrier() makes sure compiler wont play fool^Waliasing games.
+ */
+ memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
+ sizeof(struct inet_skb_parm));
+ barrier();
+
TCP_SKB_CB(skb)->seq = ntohl(th->seq);
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
skb->len - th->doff * 4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
- TCP_SKB_CB(skb)->when = 0;
+ TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
+ TCP_SKB_CB(skb)->tcp_tw_isn = 0;
TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
TCP_SKB_CB(skb)->sacked = 0;
@@ -1751,6 +1664,18 @@ process:
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
+
+#ifdef CONFIG_TCP_MD5SIG
+ /*
+ * We really want to reject the packet as early as possible
+ * if:
+ * o We're expecting an MD5'd packet and this is no MD5 tcp option
+ * o There is an MD5 option and we're not expecting one
+ */
+ if (tcp_v4_inbound_md5_hash(sk, skb))
+ goto discard_and_relse;
+#endif
+
nf_reset(skb);
if (sk_filter(sk, skb))
@@ -1762,18 +1687,8 @@ process:
bh_lock_sock_nested(sk);
ret = 0;
if (!sock_owned_by_user(sk)) {
-#ifdef CONFIG_NET_DMA
- struct tcp_sock *tp = tcp_sk(sk);
- if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
- tp->ucopy.dma_chan = net_dma_find_channel();
- if (tp->ucopy.dma_chan)
+ if (!tcp_prequeue(sk, skb))
ret = tcp_v4_do_rcv(sk, skb);
- else
-#endif
- {
- if (!tcp_prequeue(sk, skb))
- ret = tcp_v4_do_rcv(sk, skb);
- }
} else if (unlikely(sk_add_backlog(sk, skb,
sk->sk_rcvbuf + sk->sk_sndbuf))) {
bh_unlock_sock(sk);
@@ -1857,9 +1772,11 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- dst_hold(dst);
- sk->sk_rx_dst = dst;
- inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
+ if (dst) {
+ dst_hold(dst);
+ sk->sk_rx_dst = dst;
+ inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
+ }
}
EXPORT_SYMBOL(inet_sk_rx_dst_set);
@@ -1880,6 +1797,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = {
.compat_setsockopt = compat_ip_setsockopt,
.compat_getsockopt = compat_ip_getsockopt,
#endif
+ .mtu_reduced = tcp_v4_mtu_reduced,
};
EXPORT_SYMBOL(ipv4_specific);
@@ -1932,11 +1850,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
}
#endif
-#ifdef CONFIG_NET_DMA
- /* Cleans up our sk_async_wait_queue */
- __skb_queue_purge(&sk->sk_async_wait_queue);
-#endif
-
/* Clean prequeue, it must be empty really */
__skb_queue_purge(&tp->ucopy.prequeue);
@@ -2274,7 +2187,7 @@ int tcp_seq_open(struct inode *inode, struct file *file)
s = ((struct seq_file *)file->private_data)->private;
s->family = afinfo->family;
- s->last_pos = 0;
+ s->last_pos = 0;
return 0;
}
EXPORT_SYMBOL(tcp_seq_open);
@@ -2499,7 +2412,6 @@ struct proto tcp_prot = {
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v4_do_rcv,
.release_cb = tcp_release_cb,
- .mtu_reduced = tcp_v4_mtu_reduced,
.hash = inet_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index f7a2ec3ac584..1d191357bf88 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -32,7 +32,7 @@ int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
res_parent = &parent_cg->memory_allocated;
res_counter_init(&cg_proto->memory_allocated, res_parent);
- percpu_counter_init(&cg_proto->sockets_allocated, 0);
+ percpu_counter_init(&cg_proto->sockets_allocated, 0, GFP_KERNEL);
return 0;
}
@@ -222,7 +222,7 @@ static struct cftype tcp_files[] = {
static int __init tcp_memcontrol_init(void)
{
- WARN_ON(cgroup_add_cftypes(&memory_cgrp_subsys, tcp_files));
+ WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, tcp_files));
return 0;
}
__initcall(tcp_memcontrol_init);
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 4fe041805989..ed9c9a91851c 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -576,7 +576,8 @@ reset:
tp->snd_cwnd_stamp = tcp_time_stamp;
}
-bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check)
+bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst,
+ bool paws_check, bool timestamps)
{
struct tcp_metrics_block *tm;
bool ret;
@@ -589,7 +590,8 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool pa
if (paws_check) {
if (tm &&
(u32)get_seconds() - tm->tcpm_ts_stamp < TCP_PAWS_MSL &&
- (s32)(tm->tcpm_ts - req->ts_recent) > TCP_PAWS_WINDOW)
+ ((s32)(tm->tcpm_ts - req->ts_recent) > TCP_PAWS_WINDOW ||
+ !timestamps))
ret = false;
else
ret = true;
@@ -1093,7 +1095,6 @@ static const struct genl_ops tcp_metrics_nl_ops[] = {
.doit = tcp_metrics_nl_cmd_get,
.dumpit = tcp_metrics_nl_dump,
.policy = tcp_metrics_nl_policy,
- .flags = GENL_ADMIN_PERM,
},
{
.cmd = TCP_METRICS_CMD_DEL,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e68e0d4af6c9..63d2680b65db 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -232,7 +232,7 @@ kill:
u32 isn = tcptw->tw_snd_nxt + 65535 + 2;
if (isn == 0)
isn++;
- TCP_SKB_CB(skb)->when = isn;
+ TCP_SKB_CB(skb)->tcp_tw_isn = isn;
return TCP_TW_SYN;
}
@@ -298,7 +298,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
tw->tw_tclass = np->tclass;
tw->tw_flowlabel = np->flow_label >> 12;
- tw->tw_ipv6only = np->ipv6only;
+ tw->tw_ipv6only = sk->sk_ipv6only;
}
#endif
@@ -393,8 +393,8 @@ void tcp_openreq_init_rwin(struct request_sock *req,
}
EXPORT_SYMBOL(tcp_openreq_init_rwin);
-static inline void TCP_ECN_openreq_child(struct tcp_sock *tp,
- struct request_sock *req)
+static void tcp_ecn_openreq_child(struct tcp_sock *tp,
+ const struct request_sock *req)
{
tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0;
}
@@ -451,9 +451,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->snd_cwnd = TCP_INIT_CWND;
newtp->snd_cwnd_cnt = 0;
- if (newicsk->icsk_ca_ops != &tcp_init_congestion_ops &&
- !try_module_get(newicsk->icsk_ca_ops->owner))
- newicsk->icsk_ca_ops = &tcp_init_congestion_ops;
+ if (!try_module_get(newicsk->icsk_ca_ops->owner))
+ tcp_assign_congestion_control(newsk);
tcp_set_ca_state(newsk, TCP_CA_Open);
tcp_init_xmit_timers(newsk);
@@ -508,7 +507,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
newtp->rx_opt.mss_clamp = req->mss;
- TCP_ECN_openreq_child(newtp, req);
+ tcp_ecn_openreq_child(newtp, req);
newtp->fastopen_rsk = NULL;
newtp->syn_data_acked = 0;
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 55046ecd083e..5b90f2f447a5 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -14,6 +14,43 @@
#include <net/tcp.h>
#include <net/protocol.h>
+static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq,
+ unsigned int seq, unsigned int mss)
+{
+ while (skb) {
+ if (before(ts_seq, seq + mss)) {
+ skb_shinfo(skb)->tx_flags |= SKBTX_SW_TSTAMP;
+ skb_shinfo(skb)->tskey = ts_seq;
+ return;
+ }
+
+ skb = skb->next;
+ seq += mss;
+ }
+}
+
+struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
+ return ERR_PTR(-EINVAL);
+
+ if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
+ const struct iphdr *iph = ip_hdr(skb);
+ struct tcphdr *th = tcp_hdr(skb);
+
+ /* Set up checksum pseudo header, usually expect stack to
+ * have done this already.
+ */
+
+ th->check = 0;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
+ }
+
+ return tcp_gso_segment(skb, features);
+}
+
struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
@@ -29,9 +66,6 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
__sum16 newcheck;
bool ooo_okay, copy_destructor;
- if (!pskb_may_pull(skb, sizeof(*th)))
- goto out;
-
th = tcp_hdr(skb);
thlen = th->doff * 4;
if (thlen < sizeof(*th))
@@ -91,6 +125,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
th = tcp_hdr(skb);
seq = ntohl(th->seq);
+ if (unlikely(skb_shinfo(gso_skb)->tx_flags & SKBTX_SW_TSTAMP))
+ tcp_gso_tstamp(segs, skb_shinfo(gso_skb)->tskey, seq, mss);
+
newcheck = ~csum_fold((__force __wsum)((__force u32)th->check +
(__force u32)delta));
@@ -251,54 +288,16 @@ int tcp_gro_complete(struct sk_buff *skb)
}
EXPORT_SYMBOL(tcp_gro_complete);
-static int tcp_v4_gso_send_check(struct sk_buff *skb)
-{
- const struct iphdr *iph;
- struct tcphdr *th;
-
- if (!pskb_may_pull(skb, sizeof(*th)))
- return -EINVAL;
-
- iph = ip_hdr(skb);
- th = tcp_hdr(skb);
-
- th->check = 0;
- skb->ip_summed = CHECKSUM_PARTIAL;
- __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
- return 0;
-}
-
static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
{
- /* Use the IP hdr immediately proceeding for this transport */
- const struct iphdr *iph = skb_gro_network_header(skb);
- __wsum wsum;
-
/* Don't bother verifying checksum if we're going to flush anyway. */
- if (NAPI_GRO_CB(skb)->flush)
- goto skip_csum;
-
- wsum = NAPI_GRO_CB(skb)->csum;
-
- switch (skb->ip_summed) {
- case CHECKSUM_NONE:
- wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb),
- 0);
-
- /* fall through */
-
- case CHECKSUM_COMPLETE:
- if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
- wsum)) {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- break;
- }
-
+ if (!NAPI_GRO_CB(skb)->flush &&
+ skb_gro_checksum_validate(skb, IPPROTO_TCP,
+ inet_gro_compute_pseudo)) {
NAPI_GRO_CB(skb)->flush = 1;
return NULL;
}
-skip_csum:
return tcp_gro_receive(head, skb);
}
@@ -316,8 +315,7 @@ static int tcp4_gro_complete(struct sk_buff *skb, int thoff)
static const struct net_offload tcpv4_offload = {
.callbacks = {
- .gso_send_check = tcp_v4_gso_send_check,
- .gso_segment = tcp_gso_segment,
+ .gso_segment = tcp4_gso_segment,
.gro_receive = tcp4_gro_receive,
.gro_complete = tcp4_gro_complete,
},
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 179b51e6bda3..8d4eac793700 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -318,36 +318,47 @@ static u16 tcp_select_window(struct sock *sk)
}
/* Packet ECN state for a SYN-ACK */
-static inline void TCP_ECN_send_synack(const struct tcp_sock *tp, struct sk_buff *skb)
+static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
{
+ const struct tcp_sock *tp = tcp_sk(sk);
+
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
if (!(tp->ecn_flags & TCP_ECN_OK))
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
+ else if (tcp_ca_needs_ecn(sk))
+ INET_ECN_xmit(sk);
}
/* Packet ECN state for a SYN. */
-static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
+static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
tp->ecn_flags = 0;
- if (sock_net(sk)->ipv4.sysctl_tcp_ecn == 1) {
+ if (sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
+ tcp_ca_needs_ecn(sk)) {
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
tp->ecn_flags = TCP_ECN_OK;
+ if (tcp_ca_needs_ecn(sk))
+ INET_ECN_xmit(sk);
}
}
-static __inline__ void
-TCP_ECN_make_synack(const struct request_sock *req, struct tcphdr *th)
+static void
+tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th,
+ struct sock *sk)
{
- if (inet_rsk(req)->ecn_ok)
+ if (inet_rsk(req)->ecn_ok) {
th->ece = 1;
+ if (tcp_ca_needs_ecn(sk))
+ INET_ECN_xmit(sk);
+ }
}
/* Set up ECN state for a packet on a ESTABLISHED socket that is about to
* be sent.
*/
-static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
+static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
int tcp_header_len)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -362,7 +373,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
tcp_hdr(skb)->cwr = 1;
skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
}
- } else {
+ } else if (!tcp_ca_needs_ecn(sk)) {
/* ACK or retransmitted segment: clear ECT|CE */
INET_ECN_dontxmit(sk);
}
@@ -384,7 +395,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
TCP_SKB_CB(skb)->tcp_flags = flags;
TCP_SKB_CB(skb)->sacked = 0;
- shinfo->gso_segs = 1;
+ tcp_skb_pcount_set(skb, 1);
shinfo->gso_size = 0;
shinfo->gso_type = 0;
@@ -550,7 +561,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
opts->options |= OPTION_TS;
- opts->tsval = TCP_SKB_CB(skb)->when + tp->tsoffset;
+ opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
opts->tsecr = tp->rx_opt.ts_recent;
remaining -= TCPOLEN_TSTAMP_ALIGNED;
}
@@ -618,7 +629,7 @@ static unsigned int tcp_synack_options(struct sock *sk,
}
if (likely(ireq->tstamp_ok)) {
opts->options |= OPTION_TS;
- opts->tsval = TCP_SKB_CB(skb)->when;
+ opts->tsval = tcp_skb_timestamp(skb);
opts->tsecr = req->ts_recent;
remaining -= TCPOLEN_TSTAMP_ALIGNED;
}
@@ -647,7 +658,6 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
struct tcp_out_options *opts,
struct tcp_md5sig_key **md5)
{
- struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
struct tcp_sock *tp = tcp_sk(sk);
unsigned int size = 0;
unsigned int eff_sacks;
@@ -666,7 +676,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
if (likely(tp->rx_opt.tstamp_ok)) {
opts->options |= OPTION_TS;
- opts->tsval = tcb ? tcb->when + tp->tsoffset : 0;
+ opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0;
opts->tsecr = tp->rx_opt.ts_recent;
size += TCPOLEN_TSTAMP_ALIGNED;
}
@@ -800,7 +810,7 @@ void tcp_release_cb(struct sock *sk)
__sock_put(sk);
}
if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
- sk->sk_prot->mtu_reduced(sk);
+ inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
__sock_put(sk);
}
}
@@ -886,8 +896,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
skb = skb_clone(skb, gfp_mask);
if (unlikely(!skb))
return -ENOBUFS;
- /* Our usage of tstamp should remain private */
- skb->tstamp.tv64 = 0;
}
inet = inet_sk(sk);
@@ -916,6 +924,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
skb_orphan(skb);
skb->sk = sk;
skb->destructor = tcp_wfree;
+ skb_set_hash_from_sk(skb, sk);
atomic_add(skb->truesize, &sk->sk_wmem_alloc);
/* Build TCP header and checksum it. */
@@ -951,7 +960,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
tcp_options_write((__be32 *)(th + 1), tp, &opts);
if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0))
- TCP_ECN_send(sk, skb, tcp_header_size);
+ tcp_ecn_send(sk, skb, tcp_header_size);
#ifdef CONFIG_TCP_MD5SIG
/* Calculate the MD5 hash, as we have all we need now */
@@ -974,11 +983,22 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
tcp_skb_pcount(skb));
+ /* OK, its time to fill skb_shinfo(skb)->gso_segs */
+ skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
+
+ /* Our usage of tstamp should remain private */
+ skb->tstamp.tv64 = 0;
+
+ /* Cleanup our debris for IP stacks */
+ memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
+ sizeof(struct inet6_skb_parm)));
+
err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
+
if (likely(err <= 0))
return err;
- tcp_enter_cwr(sk, 1);
+ tcp_enter_cwr(sk);
return net_xmit_eval(err);
}
@@ -994,7 +1014,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
/* Advance write_seq and place onto the write_queue. */
tp->write_seq = TCP_SKB_CB(skb)->end_seq;
- skb_header_release(skb);
+ __skb_header_release(skb);
tcp_add_write_queue_tail(sk, skb);
sk->sk_wmem_queued += skb->truesize;
sk_mem_charge(sk, skb->truesize);
@@ -1013,11 +1033,11 @@ static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
/* Avoid the costly divide in the normal
* non-TSO case.
*/
- shinfo->gso_segs = 1;
+ tcp_skb_pcount_set(skb, 1);
shinfo->gso_size = 0;
shinfo->gso_type = 0;
} else {
- shinfo->gso_segs = DIV_ROUND_UP(skb->len, mss_now);
+ tcp_skb_pcount_set(skb, DIV_ROUND_UP(skb->len, mss_now));
shinfo->gso_size = mss_now;
shinfo->gso_type = sk->sk_gso_type;
}
@@ -1068,6 +1088,21 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de
tcp_verify_left_out(tp);
}
+static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2)
+{
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+
+ if (unlikely(shinfo->tx_flags & SKBTX_ANY_TSTAMP) &&
+ !before(shinfo->tskey, TCP_SKB_CB(skb2)->seq)) {
+ struct skb_shared_info *shinfo2 = skb_shinfo(skb2);
+ u8 tsflags = shinfo->tx_flags & SKBTX_ANY_TSTAMP;
+
+ shinfo->tx_flags &= ~tsflags;
+ shinfo2->tx_flags |= tsflags;
+ swap(shinfo->tskey, shinfo2->tskey);
+ }
+}
+
/* Function to create two new TCP segments. Shrinks the given segment
* to the specified size and appends a new segment with the rest of the
* packet to the list. This won't be called frequently, I hope.
@@ -1130,11 +1165,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
buff->ip_summed = skb->ip_summed;
- /* Looks stupid, but our code really uses when of
- * skbs, which it never sent before. --ANK
- */
- TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
buff->tstamp = skb->tstamp;
+ tcp_fragment_tstamp(skb, buff);
old_factor = tcp_skb_pcount(skb);
@@ -1154,7 +1186,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
}
/* Link BUFF into the send queue. */
- skb_header_release(buff);
+ __skb_header_release(buff);
tcp_insert_write_queue_after(skb, buff, sk);
return 0;
@@ -1651,13 +1683,14 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
skb_split(skb, buff, len);
+ tcp_fragment_tstamp(skb, buff);
/* Fix up tso_factor for both original and new SKB. */
tcp_set_skb_tso_segs(sk, skb, mss_now);
tcp_set_skb_tso_segs(sk, buff, mss_now);
/* Link BUFF into the send queue. */
- skb_header_release(buff);
+ __skb_header_release(buff);
tcp_insert_write_queue_after(skb, buff, sk);
return 0;
@@ -1856,8 +1889,8 @@ static int tcp_mtu_probe(struct sock *sk)
tcp_init_tso_segs(sk, nskb, nskb->len);
/* We're ready to send. If this fails, the probe will
- * be resegmented into mss-sized pieces by tcp_write_xmit(). */
- TCP_SKB_CB(nskb)->when = tcp_time_stamp;
+ * be resegmented into mss-sized pieces by tcp_write_xmit().
+ */
if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
/* Decrement cwnd here because we are sending
* effectively two packets. */
@@ -1916,8 +1949,11 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
BUG_ON(!tso_segs);
- if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE)
+ if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
+ /* "skb_mstamp" is used as a start point for the retransmit timer */
+ skb_mstamp_get(&skb->skb_mstamp);
goto repair; /* Skip network transmission */
+ }
cwnd_quota = tcp_cwnd_test(tp, skb);
if (!cwnd_quota) {
@@ -1979,8 +2015,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
break;
- TCP_SKB_CB(skb)->when = tcp_time_stamp;
-
if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
break;
@@ -2076,10 +2110,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
static bool skb_still_in_host_queue(const struct sock *sk,
const struct sk_buff *skb)
{
- const struct sk_buff *fclone = skb + 1;
-
- if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
- fclone->fclone == SKB_FCLONE_CLONE)) {
+ if (unlikely(skb_fclone_busy(skb))) {
NET_INC_STATS_BH(sock_net(sk),
LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
return true;
@@ -2478,7 +2509,6 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
/* Make a copy, if the first transmission SKB clone we made
* is still in somebody's hands, else make a clone.
*/
- TCP_SKB_CB(skb)->when = tcp_time_stamp;
/* make sure skb->data is aligned on arches that require it
* and check if ack-trimming & collapsing extended the headroom
@@ -2523,7 +2553,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
/* Save stamp of the first retransmit. */
if (!tp->retrans_stamp)
- tp->retrans_stamp = TCP_SKB_CB(skb)->when;
+ tp->retrans_stamp = tcp_skb_timestamp(skb);
/* snd_nxt is stored to detect loss of retransmitted segment,
* see tcp_input.c tcp_sacktag_write_queue().
@@ -2731,7 +2761,6 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
TCPHDR_ACK | TCPHDR_RST);
/* Send it off. */
- TCP_SKB_CB(skb)->when = tcp_time_stamp;
if (tcp_transmit_skb(sk, skb, 0, priority))
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
@@ -2759,7 +2788,7 @@ int tcp_send_synack(struct sock *sk)
if (nskb == NULL)
return -ENOMEM;
tcp_unlink_write_queue(skb, sk);
- skb_header_release(nskb);
+ __skb_header_release(nskb);
__tcp_add_write_queue_head(sk, nskb);
sk_wmem_free_skb(sk, skb);
sk->sk_wmem_queued += nskb->truesize;
@@ -2768,9 +2797,8 @@ int tcp_send_synack(struct sock *sk)
}
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK;
- TCP_ECN_send_synack(tcp_sk(sk), skb);
+ tcp_ecn_send_synack(sk, skb);
}
- TCP_SKB_CB(skb)->when = tcp_time_stamp;
return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
}
@@ -2814,10 +2842,10 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
memset(&opts, 0, sizeof(opts));
#ifdef CONFIG_SYN_COOKIES
if (unlikely(req->cookie_ts))
- TCP_SKB_CB(skb)->when = cookie_init_timestamp(req);
+ skb->skb_mstamp.stamp_jiffies = cookie_init_timestamp(req);
else
#endif
- TCP_SKB_CB(skb)->when = tcp_time_stamp;
+ skb_mstamp_get(&skb->skb_mstamp);
tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5,
foc) + sizeof(*th);
@@ -2828,7 +2856,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
memset(th, 0, sizeof(struct tcphdr));
th->syn = 1;
th->ack = 1;
- TCP_ECN_make_synack(req, th);
+ tcp_ecn_make_synack(req, th, sk);
th->source = htons(ireq->ir_num);
th->dest = ireq->ir_rmt_port;
/* Setting of flags are superfluous here for callers (and ECE is
@@ -2935,7 +2963,7 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
tcb->end_seq += skb->len;
- skb_header_release(skb);
+ __skb_header_release(skb);
__tcp_add_write_queue_tail(sk, skb);
sk->sk_wmem_queued += skb->truesize;
sk_mem_charge(sk, skb->truesize);
@@ -3065,9 +3093,9 @@ int tcp_connect(struct sock *sk)
skb_reserve(buff, MAX_TCP_HEADER);
tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
- tp->retrans_stamp = TCP_SKB_CB(buff)->when = tcp_time_stamp;
+ tp->retrans_stamp = tcp_time_stamp;
tcp_connect_queue_skb(sk, buff);
- TCP_ECN_send_syn(sk, buff);
+ tcp_ecn_send_syn(sk, buff);
/* Send off SYN; include data in Fast Open. */
err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
@@ -3099,6 +3127,8 @@ void tcp_send_delayed_ack(struct sock *sk)
int ato = icsk->icsk_ack.ato;
unsigned long timeout;
+ tcp_ca_event(sk, CA_EVENT_DELAYED_ACK);
+
if (ato > TCP_DELACK_MIN) {
const struct tcp_sock *tp = tcp_sk(sk);
int max_ato = HZ / 2;
@@ -3155,6 +3185,8 @@ void tcp_send_ack(struct sock *sk)
if (sk->sk_state == TCP_CLOSE)
return;
+ tcp_ca_event(sk, CA_EVENT_NON_DELAYED_ACK);
+
/* We are not putting this on the write queue, so
* tcp_transmit_skb() will set the ownership to this
* sock.
@@ -3173,9 +3205,10 @@ void tcp_send_ack(struct sock *sk)
tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
/* Send it off, this clears delayed acks for us. */
- TCP_SKB_CB(buff)->when = tcp_time_stamp;
+ skb_mstamp_get(&buff->skb_mstamp);
tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));
}
+EXPORT_SYMBOL_GPL(tcp_send_ack);
/* This routine sends a packet with an out of date sequence
* number. It assumes the other end will try to ack it.
@@ -3205,7 +3238,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
* send it.
*/
tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
- TCP_SKB_CB(skb)->when = tcp_time_stamp;
+ skb_mstamp_get(&skb->skb_mstamp);
return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
}
@@ -3249,7 +3282,6 @@ int tcp_write_wakeup(struct sock *sk)
tcp_set_skb_tso_segs(sk, skb, mss);
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
- TCP_SKB_CB(skb)->when = tcp_time_stamp;
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
if (!err)
tcp_event_new_data_sent(sk, skb);
@@ -3268,6 +3300,7 @@ void tcp_send_probe0(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ unsigned long probe_max;
int err;
err = tcp_write_wakeup(sk);
@@ -3283,9 +3316,7 @@ void tcp_send_probe0(struct sock *sk)
if (icsk->icsk_backoff < sysctl_tcp_retries2)
icsk->icsk_backoff++;
icsk->icsk_probes_out++;
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
- min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
- TCP_RTO_MAX);
+ probe_max = TCP_RTO_MAX;
} else {
/* If packet was not sent due to local congestion,
* do not backoff and do not remember icsk_probes_out.
@@ -3295,9 +3326,24 @@ void tcp_send_probe0(struct sock *sk)
*/
if (!icsk->icsk_probes_out)
icsk->icsk_probes_out = 1;
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
- min(icsk->icsk_rto << icsk->icsk_backoff,
- TCP_RESOURCE_PROBE_INTERVAL),
- TCP_RTO_MAX);
+ probe_max = TCP_RESOURCE_PROBE_INTERVAL;
+ }
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
+ inet_csk_rto_backoff(icsk, probe_max),
+ TCP_RTO_MAX);
+}
+
+int tcp_rtx_synack(struct sock *sk, struct request_sock *req)
+{
+ const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific;
+ struct flowi fl;
+ int res;
+
+ res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL);
+ if (!res) {
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
}
+ return res;
}
+EXPORT_SYMBOL(tcp_rtx_synack);
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 3b66610d4156..ebf5ff57526e 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -83,7 +83,6 @@ static struct {
struct tcp_log *log;
} tcp_probe;
-
static inline int tcp_probe_used(void)
{
return (tcp_probe.head - tcp_probe.tail) & (bufsize - 1);
@@ -101,7 +100,6 @@ static inline int tcp_probe_avail(void)
si4.sin_addr.s_addr = inet->inet_##mem##addr; \
} while (0) \
-
/*
* Hook inserted to be called before each receive packet.
* Note: arguments must match tcp_rcv_established()!
@@ -194,8 +192,8 @@ static int tcpprobe_sprint(char *tbuf, int n)
return scnprintf(tbuf, n,
"%lu.%09lu %pISpc %pISpc %d %#x %#x %u %u %u %u %u\n",
- (unsigned long) tv.tv_sec,
- (unsigned long) tv.tv_nsec,
+ (unsigned long)tv.tv_sec,
+ (unsigned long)tv.tv_nsec,
&p->src, &p->dst, p->length, p->snd_nxt, p->snd_una,
p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt, p->rcv_wnd);
}
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 8250949b8853..6824afb65d93 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -31,10 +31,10 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
static u32 tcp_scalable_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
+
return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U);
}
-
static struct tcp_congestion_ops tcp_scalable __read_mostly = {
.ssthresh = tcp_scalable_ssthresh,
.cong_avoid = tcp_scalable_cong_avoid,
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 286227abed10..9b21ae8b2e31 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -52,7 +52,7 @@ static void tcp_write_err(struct sock *sk)
* limit.
* 2. If we have strong memory pressure.
*/
-static int tcp_out_of_resources(struct sock *sk, int do_reset)
+static int tcp_out_of_resources(struct sock *sk, bool do_reset)
{
struct tcp_sock *tp = tcp_sk(sk);
int shift = 0;
@@ -72,7 +72,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset)
if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
/* 2. Window is closed. */
(!tp->snd_wnd && !tp->packets_out))
- do_reset = 1;
+ do_reset = true;
if (do_reset)
tcp_send_active_reset(sk, GFP_ATOMIC);
tcp_done(sk);
@@ -135,10 +135,9 @@ static bool retransmits_timed_out(struct sock *sk,
if (!inet_csk(sk)->icsk_retransmits)
return false;
- if (unlikely(!tcp_sk(sk)->retrans_stamp))
- start_ts = TCP_SKB_CB(tcp_write_queue_head(sk))->when;
- else
- start_ts = tcp_sk(sk)->retrans_stamp;
+ start_ts = tcp_sk(sk)->retrans_stamp;
+ if (unlikely(!start_ts))
+ start_ts = tcp_skb_timestamp(tcp_write_queue_head(sk));
if (likely(timeout == 0)) {
linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
@@ -181,7 +180,7 @@ static int tcp_write_timeout(struct sock *sk)
retry_until = sysctl_tcp_retries2;
if (sock_flag(sk, SOCK_DEAD)) {
- const int alive = (icsk->icsk_rto < TCP_RTO_MAX);
+ const int alive = icsk->icsk_rto < TCP_RTO_MAX;
retry_until = tcp_orphan_retries(sk, alive);
do_reset = alive ||
@@ -271,40 +270,41 @@ static void tcp_probe_timer(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
int max_probes;
+ u32 start_ts;
if (tp->packets_out || !tcp_send_head(sk)) {
icsk->icsk_probes_out = 0;
return;
}
- /* *WARNING* RFC 1122 forbids this
- *
- * It doesn't AFAIK, because we kill the retransmit timer -AK
- *
- * FIXME: We ought not to do it, Solaris 2.5 actually has fixing
- * this behaviour in Solaris down as a bug fix. [AC]
- *
- * Let me to explain. icsk_probes_out is zeroed by incoming ACKs
- * even if they advertise zero window. Hence, connection is killed only
- * if we received no ACKs for normal connection timeout. It is not killed
- * only because window stays zero for some time, window may be zero
- * until armageddon and even later. We are in full accordance
- * with RFCs, only probe timer combines both retransmission timeout
- * and probe timeout in one bottle. --ANK
+ /* RFC 1122 4.2.2.17 requires the sender to stay open indefinitely as
+ * long as the receiver continues to respond probes. We support this by
+ * default and reset icsk_probes_out with incoming ACKs. But if the
+ * socket is orphaned or the user specifies TCP_USER_TIMEOUT, we
+ * kill the socket when the retry count and the time exceeds the
+ * corresponding system limit. We also implement similar policy when
+ * we use RTO to probe window in tcp_retransmit_timer().
*/
- max_probes = sysctl_tcp_retries2;
+ start_ts = tcp_skb_timestamp(tcp_send_head(sk));
+ if (!start_ts)
+ skb_mstamp_get(&tcp_send_head(sk)->skb_mstamp);
+ else if (icsk->icsk_user_timeout &&
+ (s32)(tcp_time_stamp - start_ts) > icsk->icsk_user_timeout)
+ goto abort;
+ max_probes = sysctl_tcp_retries2;
if (sock_flag(sk, SOCK_DEAD)) {
- const int alive = ((icsk->icsk_rto << icsk->icsk_backoff) < TCP_RTO_MAX);
+ const int alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
max_probes = tcp_orphan_retries(sk, alive);
-
- if (tcp_out_of_resources(sk, alive || icsk->icsk_probes_out <= max_probes))
+ if (!alive && icsk->icsk_backoff >= max_probes)
+ goto abort;
+ if (tcp_out_of_resources(sk, true))
return;
}
if (icsk->icsk_probes_out > max_probes) {
- tcp_write_err(sk);
+abort: tcp_write_err(sk);
} else {
/* Only send another probe if we didn't close things up. */
tcp_send_probe0(sk);
@@ -391,7 +391,7 @@ void tcp_retransmit_timer(struct sock *sk)
tcp_write_err(sk);
goto out;
}
- tcp_enter_loss(sk, 0);
+ tcp_enter_loss(sk);
tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
__sk_dst_reset(sk);
goto out_reset_timer;
@@ -422,7 +422,7 @@ void tcp_retransmit_timer(struct sock *sk)
NET_INC_STATS_BH(sock_net(sk), mib_idx);
}
- tcp_enter_loss(sk, 0);
+ tcp_enter_loss(sk);
if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) {
/* Retransmission failed because of local congestion,
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 9a5e05f27f4f..a6afde666ab1 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -51,7 +51,6 @@ MODULE_PARM_DESC(beta, "upper bound of packets in network");
module_param(gamma, int, 0644);
MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)");
-
/* There are several situations when we must "re-start" Vegas:
*
* o when a connection is established
@@ -133,7 +132,6 @@ EXPORT_SYMBOL_GPL(tcp_vegas_pkts_acked);
void tcp_vegas_state(struct sock *sk, u8 ca_state)
{
-
if (ca_state == TCP_CA_Open)
vegas_enable(sk);
else
@@ -218,7 +216,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
* This is:
* (actual rate in segments) * baseRTT
*/
- target_cwnd = tp->snd_cwnd * vegas->baseRTT / rtt;
+ target_cwnd = (u64)tp->snd_cwnd * vegas->baseRTT;
+ do_div(target_cwnd, rtt);
/* Calculate the difference between the window we had,
* and the window we would like to have. This quantity
@@ -284,7 +283,6 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
/* Use normal slow start */
else if (tp->snd_cwnd <= tp->snd_ssthresh)
tcp_slow_start(tp, acked);
-
}
/* Extract info for Tcp socket info provided via netlink. */
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 27b9825753d1..a4d2d2d88dca 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -144,7 +144,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
rtt = veno->minrtt;
- target_cwnd = (tp->snd_cwnd * veno->basertt);
+ target_cwnd = (u64)tp->snd_cwnd * veno->basertt;
target_cwnd <<= V_PARAM_SHIFT;
do_div(target_cwnd, rtt);
@@ -175,7 +175,6 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
} else
tp->snd_cwnd_cnt++;
}
-
}
if (tp->snd_cwnd < 2)
tp->snd_cwnd = 2;
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index b94a04ae2ed5..bb63fba47d47 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -42,7 +42,6 @@ struct westwood {
u8 reset_rtt_min; /* Reset RTT min to next RTT sample*/
};
-
/* TCP Westwood functions and constants */
#define TCP_WESTWOOD_RTT_MIN (HZ/20) /* 50ms */
#define TCP_WESTWOOD_INIT_RTT (20*HZ) /* maybe too conservative?! */
@@ -153,7 +152,6 @@ static inline void update_rtt_min(struct westwood *w)
w->rtt_min = min(w->rtt, w->rtt_min);
}
-
/*
* @westwood_fast_bw
* It is called when we are in fast path. In particular it is called when
@@ -208,7 +206,6 @@ static inline u32 westwood_acked_count(struct sock *sk)
return w->cumul_ack;
}
-
/*
* TCP Westwood
* Here limit is evaluated as Bw estimation*RTTmin (for obtaining it
@@ -219,47 +216,51 @@ static u32 tcp_westwood_bw_rttmin(const struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
const struct westwood *w = inet_csk_ca(sk);
+
return max_t(u32, (w->bw_est * w->rtt_min) / tp->mss_cache, 2);
}
+static void tcp_westwood_ack(struct sock *sk, u32 ack_flags)
+{
+ if (ack_flags & CA_ACK_SLOWPATH) {
+ struct westwood *w = inet_csk_ca(sk);
+
+ westwood_update_window(sk);
+ w->bk += westwood_acked_count(sk);
+
+ update_rtt_min(w);
+ return;
+ }
+
+ westwood_fast_bw(sk);
+}
+
static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
{
struct tcp_sock *tp = tcp_sk(sk);
struct westwood *w = inet_csk_ca(sk);
switch (event) {
- case CA_EVENT_FAST_ACK:
- westwood_fast_bw(sk);
- break;
-
case CA_EVENT_COMPLETE_CWR:
tp->snd_cwnd = tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
break;
-
case CA_EVENT_LOSS:
tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
/* Update RTT_min when next ack arrives */
w->reset_rtt_min = 1;
break;
-
- case CA_EVENT_SLOW_ACK:
- westwood_update_window(sk);
- w->bk += westwood_acked_count(sk);
- update_rtt_min(w);
- break;
-
default:
/* don't care */
break;
}
}
-
/* Extract info for Tcp socket info provided via netlink. */
static void tcp_westwood_info(struct sock *sk, u32 ext,
struct sk_buff *skb)
{
const struct westwood *ca = inet_csk_ca(sk);
+
if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
struct tcpvegas_info info = {
.tcpv_enabled = 1,
@@ -271,12 +272,12 @@ static void tcp_westwood_info(struct sock *sk, u32 ext,
}
}
-
static struct tcp_congestion_ops tcp_westwood __read_mostly = {
.init = tcp_westwood_init,
.ssthresh = tcp_reno_ssthresh,
.cong_avoid = tcp_reno_cong_avoid,
.cwnd_event = tcp_westwood_event,
+ .in_ack_event = tcp_westwood_ack,
.get_info = tcp_westwood_info,
.pkts_acked = tcp_westwood_pkts_acked,
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 599b79b8eac0..cd7273218598 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -54,10 +54,8 @@ static void tcp_yeah_init(struct sock *sk)
/* Ensure the MD arithmetic works. This is somewhat pedantic,
* since I don't think we will see a cwnd this large. :) */
tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);
-
}
-
static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, s32 rtt_us)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -84,7 +82,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
/* Scalable */
tp->snd_cwnd_cnt += yeah->pkts_acked;
- if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){
+ if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)) {
if (tp->snd_cwnd < tp->snd_cwnd_clamp)
tp->snd_cwnd++;
tp->snd_cwnd_cnt = 0;
@@ -120,7 +118,6 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
*/
if (after(ack, yeah->vegas.beg_snd_nxt)) {
-
/* We do the Vegas calculations only if we got enough RTT
* samples that we can be reasonably sure that we got
* at least one RTT sample that wasn't from a delayed ACK.
@@ -189,7 +186,6 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
}
yeah->lastQ = queue;
-
}
/* Save the extent of the current window so we can use this
@@ -205,7 +201,8 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
}
}
-static u32 tcp_yeah_ssthresh(struct sock *sk) {
+static u32 tcp_yeah_ssthresh(struct sock *sk)
+{
const struct tcp_sock *tp = tcp_sk(sk);
struct yeah *yeah = inet_csk_ca(sk);
u32 reduction;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 7d5a8661df76..cd0db5471bb5 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -99,6 +99,7 @@
#include <linux/slab.h>
#include <net/tcp_states.h>
#include <linux/skbuff.h>
+#include <linux/netdevice.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <net/net_namespace.h>
@@ -224,7 +225,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
remaining = (high - low) + 1;
rand = prandom_u32();
- first = (((u64)rand * remaining) >> 32) + low;
+ first = reciprocal_scale(rand, remaining) + low;
/*
* force rand to be an odd multiple of UDP_HTABLE_SIZE
*/
@@ -448,7 +449,7 @@ begin:
}
} else if (score == badness && reuseport) {
matches++;
- if (((u64)hash * matches) >> 32 == 0)
+ if (reciprocal_scale(hash, matches) == 0)
result = sk;
hash = next_pseudo_random32(hash);
}
@@ -529,7 +530,7 @@ begin:
}
} else if (score == badness && reuseport) {
matches++;
- if (((u64)hash * matches) >> 32 == 0)
+ if (reciprocal_scale(hash, matches) == 0)
result = sk;
hash = next_pseudo_random32(hash);
}
@@ -594,27 +595,6 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
return true;
}
-static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk,
- __be16 loc_port, __be32 loc_addr,
- __be16 rmt_port, __be32 rmt_addr,
- int dif)
-{
- struct hlist_nulls_node *node;
- struct sock *s = sk;
- unsigned short hnum = ntohs(loc_port);
-
- sk_nulls_for_each_from(s, node) {
- if (__udp_is_mcast_sock(net, s,
- loc_port, loc_addr,
- rmt_port, rmt_addr,
- dif, hnum))
- goto found;
- }
- s = NULL;
-found:
- return s;
-}
-
/*
* This routine is called by the ICMP module when it gets some
* sort of error condition. If err < 0 then the socket should
@@ -1588,7 +1568,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
goto csum_error;
- if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) {
+ if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
is_udplite);
goto drop;
@@ -1640,6 +1620,8 @@ static void flush_stack(struct sock **stack, unsigned int count,
if (skb1 && udp_queue_rcv_skb(sk, skb1) <= 0)
skb1 = NULL;
+
+ sock_put(sk);
}
if (unlikely(skb1))
kfree_skb(skb1);
@@ -1668,41 +1650,50 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
struct udp_table *udptable)
{
struct sock *sk, *stack[256 / sizeof(struct sock *)];
- struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
- int dif;
- unsigned int i, count = 0;
+ struct hlist_nulls_node *node;
+ unsigned short hnum = ntohs(uh->dest);
+ struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum);
+ int dif = skb->dev->ifindex;
+ unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node);
+ unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
+
+ if (use_hash2) {
+ hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) &
+ udp_table.mask;
+ hash2 = udp4_portaddr_hash(net, daddr, hnum) & udp_table.mask;
+start_lookup:
+ hslot = &udp_table.hash2[hash2];
+ offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
+ }
spin_lock(&hslot->lock);
- sk = sk_nulls_head(&hslot->head);
- dif = skb->dev->ifindex;
- sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
- while (sk) {
- stack[count++] = sk;
- sk = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest,
- daddr, uh->source, saddr, dif);
- if (unlikely(count == ARRAY_SIZE(stack))) {
- if (!sk)
- break;
- flush_stack(stack, count, skb, ~0);
- count = 0;
+ sk_nulls_for_each_entry_offset(sk, node, &hslot->head, offset) {
+ if (__udp_is_mcast_sock(net, sk,
+ uh->dest, daddr,
+ uh->source, saddr,
+ dif, hnum)) {
+ if (unlikely(count == ARRAY_SIZE(stack))) {
+ flush_stack(stack, count, skb, ~0);
+ count = 0;
+ }
+ stack[count++] = sk;
+ sock_hold(sk);
}
}
- /*
- * before releasing chain lock, we must take a reference on sockets
- */
- for (i = 0; i < count; i++)
- sock_hold(stack[i]);
spin_unlock(&hslot->lock);
+ /* Also lookup *:port if we are using hash2 and haven't done so yet. */
+ if (use_hash2 && hash2 != hash2_any) {
+ hash2 = hash2_any;
+ goto start_lookup;
+ }
+
/*
* do the slow work with no lock held
*/
if (count) {
flush_stack(stack, count, skb, count - 1);
-
- for (i = 0; i < count; i++)
- sock_put(stack[i]);
} else {
kfree_skb(skb);
}
@@ -1797,6 +1788,10 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (sk != NULL) {
int ret;
+ if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk))
+ skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
+ inet_compute_pseudo);
+
ret = udp_queue_rcv_skb(sk, skb);
sock_put(sk);
@@ -1977,7 +1972,7 @@ void udp_v4_early_demux(struct sk_buff *skb)
return;
skb->sk = sk;
- skb->destructor = sock_edemux;
+ skb->destructor = sock_efree;
dst = sk->sk_rx_dst;
if (dst)
@@ -2526,79 +2521,3 @@ void __init udp_init(void)
sysctl_udp_rmem_min = SK_MEM_QUANTUM;
sysctl_udp_wmem_min = SK_MEM_QUANTUM;
}
-
-struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
- netdev_features_t features)
-{
- struct sk_buff *segs = ERR_PTR(-EINVAL);
- u16 mac_offset = skb->mac_header;
- int mac_len = skb->mac_len;
- int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
- __be16 protocol = skb->protocol;
- netdev_features_t enc_features;
- int udp_offset, outer_hlen;
- unsigned int oldlen;
- bool need_csum;
-
- oldlen = (u16)~skb->len;
-
- if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
- goto out;
-
- skb->encapsulation = 0;
- __skb_pull(skb, tnl_hlen);
- skb_reset_mac_header(skb);
- skb_set_network_header(skb, skb_inner_network_offset(skb));
- skb->mac_len = skb_inner_network_offset(skb);
- skb->protocol = htons(ETH_P_TEB);
-
- need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
- if (need_csum)
- skb->encap_hdr_csum = 1;
-
- /* segment inner packet. */
- enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
- segs = skb_mac_gso_segment(skb, enc_features);
- if (!segs || IS_ERR(segs)) {
- skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
- mac_len);
- goto out;
- }
-
- outer_hlen = skb_tnl_header_len(skb);
- udp_offset = outer_hlen - tnl_hlen;
- skb = segs;
- do {
- struct udphdr *uh;
- int len;
-
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
-
- skb->mac_len = mac_len;
-
- skb_push(skb, outer_hlen);
- skb_reset_mac_header(skb);
- skb_set_network_header(skb, mac_len);
- skb_set_transport_header(skb, udp_offset);
- len = skb->len - udp_offset;
- uh = udp_hdr(skb);
- uh->len = htons(len);
-
- if (need_csum) {
- __be32 delta = htonl(oldlen + len);
-
- uh->check = ~csum_fold((__force __wsum)
- ((__force u32)uh->check +
- (__force u32)delta));
- uh->check = gso_make_checksum(skb, ~uh->check);
-
- if (uh->check == 0)
- uh->check = CSUM_MANGLED_0;
- }
-
- skb->protocol = protocol;
- } while ((skb = skb->next));
-out:
- return segs;
-}
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 546d2d439dda..507310ef4b56 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -25,26 +25,121 @@ struct udp_offload_priv {
struct udp_offload_priv __rcu *next;
};
-static int udp4_ufo_send_check(struct sk_buff *skb)
+static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
+ netdev_features_t features,
+ struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
+ netdev_features_t features),
+ __be16 new_protocol)
{
- if (!pskb_may_pull(skb, sizeof(struct udphdr)))
- return -EINVAL;
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ u16 mac_offset = skb->mac_header;
+ int mac_len = skb->mac_len;
+ int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
+ __be16 protocol = skb->protocol;
+ netdev_features_t enc_features;
+ int udp_offset, outer_hlen;
+ unsigned int oldlen;
+ bool need_csum;
+
+ oldlen = (u16)~skb->len;
+
+ if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
+ goto out;
+
+ skb->encapsulation = 0;
+ __skb_pull(skb, tnl_hlen);
+ skb_reset_mac_header(skb);
+ skb_set_network_header(skb, skb_inner_network_offset(skb));
+ skb->mac_len = skb_inner_network_offset(skb);
+ skb->protocol = new_protocol;
+
+ need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
+ if (need_csum)
+ skb->encap_hdr_csum = 1;
+
+ /* segment inner packet. */
+ enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
+ segs = gso_inner_segment(skb, enc_features);
+ if (IS_ERR_OR_NULL(segs)) {
+ skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
+ mac_len);
+ goto out;
+ }
- if (likely(!skb->encapsulation)) {
- const struct iphdr *iph;
+ outer_hlen = skb_tnl_header_len(skb);
+ udp_offset = outer_hlen - tnl_hlen;
+ skb = segs;
+ do {
struct udphdr *uh;
+ int len;
- iph = ip_hdr(skb);
+ skb_reset_inner_headers(skb);
+ skb->encapsulation = 1;
+
+ skb->mac_len = mac_len;
+
+ skb_push(skb, outer_hlen);
+ skb_reset_mac_header(skb);
+ skb_set_network_header(skb, mac_len);
+ skb_set_transport_header(skb, udp_offset);
+ len = skb->len - udp_offset;
uh = udp_hdr(skb);
+ uh->len = htons(len);
+
+ if (need_csum) {
+ __be32 delta = htonl(oldlen + len);
+
+ uh->check = ~csum_fold((__force __wsum)
+ ((__force u32)uh->check +
+ (__force u32)delta));
+ uh->check = gso_make_checksum(skb, ~uh->check);
+
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+ }
- uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
- IPPROTO_UDP, 0);
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct udphdr, check);
- skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->protocol = protocol;
+ } while ((skb = skb->next));
+out:
+ return segs;
+}
+
+struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
+ netdev_features_t features,
+ bool is_ipv6)
+{
+ __be16 protocol = skb->protocol;
+ const struct net_offload **offloads;
+ const struct net_offload *ops;
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
+ netdev_features_t features);
+
+ rcu_read_lock();
+
+ switch (skb->inner_protocol_type) {
+ case ENCAP_TYPE_ETHER:
+ protocol = skb->inner_protocol;
+ gso_inner_segment = skb_mac_gso_segment;
+ break;
+ case ENCAP_TYPE_IPPROTO:
+ offloads = is_ipv6 ? inet6_offloads : inet_offloads;
+ ops = rcu_dereference(offloads[skb->inner_ipproto]);
+ if (!ops || !ops->callbacks.gso_segment)
+ goto out_unlock;
+ gso_inner_segment = ops->callbacks.gso_segment;
+ break;
+ default:
+ goto out_unlock;
}
- return 0;
+ segs = __skb_udp_tunnel_segment(skb, features, gso_inner_segment,
+ protocol);
+
+out_unlock:
+ rcu_read_unlock();
+
+ return segs;
}
static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
@@ -52,16 +147,20 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
unsigned int mss;
- int offset;
__wsum csum;
+ struct udphdr *uh;
+ struct iphdr *iph;
if (skb->encapsulation &&
(skb_shinfo(skb)->gso_type &
(SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
- segs = skb_udp_tunnel_segment(skb, features);
+ segs = skb_udp_tunnel_segment(skb, features, false);
goto out;
}
+ if (!pskb_may_pull(skb, sizeof(struct udphdr)))
+ goto out;
+
mss = skb_shinfo(skb)->gso_size;
if (unlikely(skb->len <= mss))
goto out;
@@ -89,10 +188,16 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
* HW cannot do checksum of UDP packets sent as multiple
* IP fragments.
*/
- offset = skb_checksum_start_offset(skb);
- csum = skb_checksum(skb, offset, skb->len - offset, 0);
- offset += skb->csum_offset;
- *(__sum16 *)(skb->data + offset) = csum_fold(csum);
+
+ uh = udp_hdr(skb);
+ iph = ip_hdr(skb);
+
+ uh->check = 0;
+ csum = skb_checksum(skb, 0, skb->len, 0);
+ uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum);
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+
skb->ip_summed = CHECKSUM_NONE;
/* Fragment the skb. IP headers of the fragments are updated in
@@ -152,30 +257,24 @@ unlock:
}
EXPORT_SYMBOL(udp_del_offload);
-static struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
+ struct udphdr *uh)
{
struct udp_offload_priv *uo_priv;
struct sk_buff *p, **pp = NULL;
- struct udphdr *uh, *uh2;
- unsigned int hlen, off;
+ struct udphdr *uh2;
+ unsigned int off = skb_gro_offset(skb);
int flush = 1;
if (NAPI_GRO_CB(skb)->udp_mark ||
- (!skb->encapsulation && skb->ip_summed != CHECKSUM_COMPLETE))
+ (skb->ip_summed != CHECKSUM_PARTIAL &&
+ NAPI_GRO_CB(skb)->csum_cnt == 0 &&
+ !NAPI_GRO_CB(skb)->csum_valid))
goto out;
/* mark that this skb passed once through the udp gro layer */
NAPI_GRO_CB(skb)->udp_mark = 1;
- off = skb_gro_offset(skb);
- hlen = off + sizeof(*uh);
- uh = skb_gro_header_fast(skb, off);
- if (skb_gro_header_hard(skb, hlen)) {
- uh = skb_gro_header_slow(skb, hlen, off);
- if (unlikely(!uh))
- goto out;
- }
-
rcu_read_lock();
uo_priv = rcu_dereference(udp_offload_base);
for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) {
@@ -193,7 +292,12 @@ unflush:
continue;
uh2 = (struct udphdr *)(p->data + off);
- if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) {
+
+ /* Match ports and either checksums are either both zero
+ * or nonzero.
+ */
+ if ((*(u32 *)&uh->source != *(u32 *)&uh2->source) ||
+ (!uh->check ^ !uh2->check)) {
NAPI_GRO_CB(p)->same_flow = 0;
continue;
}
@@ -201,6 +305,7 @@ unflush:
skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */
skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
+ NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto;
pp = uo_priv->offload->callbacks.gro_receive(head, skb);
out_unlock:
@@ -210,7 +315,34 @@ out:
return pp;
}
-static int udp_gro_complete(struct sk_buff *skb, int nhoff)
+static struct sk_buff **udp4_gro_receive(struct sk_buff **head,
+ struct sk_buff *skb)
+{
+ struct udphdr *uh = udp_gro_udphdr(skb);
+
+ if (unlikely(!uh))
+ goto flush;
+
+ /* Don't bother verifying checksum if we're going to flush anyway. */
+ if (NAPI_GRO_CB(skb)->flush)
+ goto skip;
+
+ if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
+ inet_gro_compute_pseudo))
+ goto flush;
+ else if (uh->check)
+ skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
+ inet_gro_compute_pseudo);
+skip:
+ NAPI_GRO_CB(skb)->is_ipv6 = 0;
+ return udp_gro_receive(head, skb, uh);
+
+flush:
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
+}
+
+int udp_gro_complete(struct sk_buff *skb, int nhoff)
{
struct udp_offload_priv *uo_priv;
__be16 newlen = htons(skb->len - nhoff);
@@ -228,19 +360,32 @@ static int udp_gro_complete(struct sk_buff *skb, int nhoff)
break;
}
- if (uo_priv != NULL)
+ if (uo_priv != NULL) {
+ NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto;
err = uo_priv->offload->callbacks.gro_complete(skb, nhoff + sizeof(struct udphdr));
+ }
rcu_read_unlock();
return err;
}
+static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
+
+ if (uh->check)
+ uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
+ iph->daddr, 0);
+
+ return udp_gro_complete(skb, nhoff);
+}
+
static const struct net_offload udpv4_offload = {
.callbacks = {
- .gso_send_check = udp4_ufo_send_check,
.gso_segment = udp4_ufo_fragment,
- .gro_receive = udp_gro_receive,
- .gro_complete = udp_gro_complete,
+ .gro_receive = udp4_gro_receive,
+ .gro_complete = udp4_gro_complete,
},
};
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
new file mode 100644
index 000000000000..1671263e5fa0
--- /dev/null
+++ b/net/ipv4/udp_tunnel.c
@@ -0,0 +1,108 @@
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/socket.h>
+#include <linux/udp.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <net/udp.h>
+#include <net/udp_tunnel.h>
+#include <net/net_namespace.h>
+
+int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
+ struct socket **sockp)
+{
+ int err;
+ struct socket *sock = NULL;
+ struct sockaddr_in udp_addr;
+
+ err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock);
+ if (err < 0)
+ goto error;
+
+ sk_change_net(sock->sk, net);
+
+ udp_addr.sin_family = AF_INET;
+ udp_addr.sin_addr = cfg->local_ip;
+ udp_addr.sin_port = cfg->local_udp_port;
+ err = kernel_bind(sock, (struct sockaddr *)&udp_addr,
+ sizeof(udp_addr));
+ if (err < 0)
+ goto error;
+
+ if (cfg->peer_udp_port) {
+ udp_addr.sin_family = AF_INET;
+ udp_addr.sin_addr = cfg->peer_ip;
+ udp_addr.sin_port = cfg->peer_udp_port;
+ err = kernel_connect(sock, (struct sockaddr *)&udp_addr,
+ sizeof(udp_addr), 0);
+ if (err < 0)
+ goto error;
+ }
+
+ sock->sk->sk_no_check_tx = !cfg->use_udp_checksums;
+
+ *sockp = sock;
+ return 0;
+
+error:
+ if (sock) {
+ kernel_sock_shutdown(sock, SHUT_RDWR);
+ sk_release_kernel(sock->sk);
+ }
+ *sockp = NULL;
+ return err;
+}
+EXPORT_SYMBOL(udp_sock_create4);
+
+void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
+ struct udp_tunnel_sock_cfg *cfg)
+{
+ struct sock *sk = sock->sk;
+
+ /* Disable multicast loopback */
+ inet_sk(sk)->mc_loop = 0;
+
+ /* Enable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */
+ udp_set_convert_csum(sk, true);
+
+ rcu_assign_sk_user_data(sk, cfg->sk_user_data);
+
+ udp_sk(sk)->encap_type = cfg->encap_type;
+ udp_sk(sk)->encap_rcv = cfg->encap_rcv;
+ udp_sk(sk)->encap_destroy = cfg->encap_destroy;
+
+ udp_tunnel_encap_enable(sock);
+}
+EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock);
+
+int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt,
+ struct sk_buff *skb, __be32 src, __be32 dst,
+ __u8 tos, __u8 ttl, __be16 df, __be16 src_port,
+ __be16 dst_port, bool xnet)
+{
+ struct udphdr *uh;
+
+ __skb_push(skb, sizeof(*uh));
+ skb_reset_transport_header(skb);
+ uh = udp_hdr(skb);
+
+ uh->dest = dst_port;
+ uh->source = src_port;
+ uh->len = htons(skb->len);
+
+ udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len);
+
+ return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP,
+ tos, ttl, df, xnet);
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb);
+
+void udp_tunnel_sock_release(struct socket *sock)
+{
+ rcu_assign_sk_user_data(sock->sk, NULL);
+ kernel_sock_shutdown(sock, SHUT_RDWR);
+ sk_release_kernel(sock->sk);
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_sock_release);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c
index a2ce0101eaac..dccefa9d84cf 100644
--- a/net/ipv4/xfrm4_protocol.c
+++ b/net/ipv4/xfrm4_protocol.c
@@ -124,7 +124,7 @@ static int xfrm4_ah_rcv(struct sk_buff *skb)
for_each_protocol_rcu(ah4_handlers, handler)
if ((ret = handler->handler(skb)) != -EINVAL)
- return ret;;
+ return ret;
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 2fe68364bb20..2e8c06108ab9 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -45,3 +45,7 @@ obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o
obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload)
obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o
+
+ifneq ($(CONFIG_IPV6),)
+obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o
+endif
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 5667b3003af9..725c763270a0 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -108,11 +108,12 @@ static inline u32 cstamp_delta(unsigned long cstamp)
}
#ifdef CONFIG_SYSCTL
-static void addrconf_sysctl_register(struct inet6_dev *idev);
+static int addrconf_sysctl_register(struct inet6_dev *idev);
static void addrconf_sysctl_unregister(struct inet6_dev *idev);
#else
-static inline void addrconf_sysctl_register(struct inet6_dev *idev)
+static inline int addrconf_sysctl_register(struct inet6_dev *idev)
{
+ return 0;
}
static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)
@@ -179,13 +180,14 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.rtr_solicits = MAX_RTR_SOLICITATIONS,
.rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
.rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY,
- .use_tempaddr = 0,
+ .use_tempaddr = 0,
.temp_valid_lft = TEMP_VALID_LIFETIME,
.temp_prefered_lft = TEMP_PREFERRED_LIFETIME,
.regen_max_retry = REGEN_MAX_RETRY,
.max_desync_factor = MAX_DESYNC_FACTOR,
.max_addresses = IPV6_MAX_ADDRESSES,
.accept_ra_defrtr = 1,
+ .accept_ra_from_local = 0,
.accept_ra_pinfo = 1,
#ifdef CONFIG_IPV6_ROUTER_PREF
.accept_ra_rtr_pref = 1,
@@ -222,6 +224,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.max_desync_factor = MAX_DESYNC_FACTOR,
.max_addresses = IPV6_MAX_ADDRESSES,
.accept_ra_defrtr = 1,
+ .accept_ra_from_local = 0,
.accept_ra_pinfo = 1,
#ifdef CONFIG_IPV6_ROUTER_PREF
.accept_ra_rtr_pref = 1,
@@ -308,16 +311,16 @@ err_ip:
static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
{
struct inet6_dev *ndev;
+ int err = -ENOMEM;
ASSERT_RTNL();
if (dev->mtu < IPV6_MIN_MTU)
- return NULL;
+ return ERR_PTR(-EINVAL);
ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL);
-
if (ndev == NULL)
- return NULL;
+ return ERR_PTR(err);
rwlock_init(&ndev->lock);
ndev->dev = dev;
@@ -330,7 +333,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
if (ndev->nd_parms == NULL) {
kfree(ndev);
- return NULL;
+ return ERR_PTR(err);
}
if (ndev->cnf.forwarding)
dev_disable_lro(dev);
@@ -344,17 +347,14 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
neigh_parms_release(&nd_tbl, ndev->nd_parms);
dev_put(dev);
kfree(ndev);
- return NULL;
+ return ERR_PTR(err);
}
if (snmp6_register_dev(ndev) < 0) {
ADBG(KERN_WARNING
"%s: cannot create /proc/net/dev_snmp6/%s\n",
__func__, dev->name);
- neigh_parms_release(&nd_tbl, ndev->nd_parms);
- ndev->dead = 1;
- in6_dev_finish_destroy(ndev);
- return NULL;
+ goto err_release;
}
/* One reference from device. We must do this before
@@ -392,7 +392,12 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
ipv6_mc_init_dev(ndev);
ndev->tstamp = jiffies;
- addrconf_sysctl_register(ndev);
+ err = addrconf_sysctl_register(ndev);
+ if (err) {
+ ipv6_mc_destroy_dev(ndev);
+ del_timer(&ndev->regen_timer);
+ goto err_release;
+ }
/* protected by rtnl_lock */
rcu_assign_pointer(dev->ip6_ptr, ndev);
@@ -407,6 +412,12 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
return ndev;
+
+err_release:
+ neigh_parms_release(&nd_tbl, ndev->nd_parms);
+ ndev->dead = 1;
+ in6_dev_finish_destroy(ndev);
+ return ERR_PTR(err);
}
static struct inet6_dev *ipv6_find_idev(struct net_device *dev)
@@ -418,7 +429,7 @@ static struct inet6_dev *ipv6_find_idev(struct net_device *dev)
idev = __in6_dev_get(dev);
if (!idev) {
idev = ipv6_add_dev(dev);
- if (!idev)
+ if (IS_ERR(idev))
return NULL;
}
@@ -1094,8 +1105,8 @@ retry:
spin_unlock_bh(&ifp->lock);
regen_advance = idev->cnf.regen_max_retry *
- idev->cnf.dad_transmits *
- NEIGH_VAR(idev->nd_parms, RETRANS_TIME) / HZ;
+ idev->cnf.dad_transmits *
+ NEIGH_VAR(idev->nd_parms, RETRANS_TIME) / HZ;
write_unlock_bh(&idev->lock);
/* A temporary address is created only if this calculated Preferred
@@ -1679,14 +1690,12 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
addrconf_mod_dad_work(ifp, 0);
}
-/* Join to solicited addr multicast group. */
-
+/* Join to solicited addr multicast group.
+ * caller must hold RTNL */
void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr)
{
struct in6_addr maddr;
- ASSERT_RTNL();
-
if (dev->flags&(IFF_LOOPBACK|IFF_NOARP))
return;
@@ -1694,12 +1703,11 @@ void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr)
ipv6_dev_mc_inc(dev, &maddr);
}
+/* caller must hold RTNL */
void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
{
struct in6_addr maddr;
- ASSERT_RTNL();
-
if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP))
return;
@@ -1707,26 +1715,24 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
__ipv6_dev_mc_dec(idev, &maddr);
}
+/* caller must hold RTNL */
static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
{
struct in6_addr addr;
- ASSERT_RTNL();
-
if (ifp->prefix_len >= 127) /* RFC 6164 */
return;
ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
if (ipv6_addr_any(&addr))
return;
- ipv6_dev_ac_inc(ifp->idev->dev, &addr);
+ __ipv6_dev_ac_inc(ifp->idev, &addr);
}
+/* caller must hold RTNL */
static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
{
struct in6_addr addr;
- ASSERT_RTNL();
-
if (ifp->prefix_len >= 127) /* RFC 6164 */
return;
ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
@@ -2728,9 +2734,25 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr
}
}
+static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
+{
+ if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) {
+ struct in6_addr addr;
+
+ ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
+ /* addrconf_add_linklocal also adds a prefix_route and we
+ * only need to care about prefix routes if ipv6_generate_eui64
+ * couldn't generate one.
+ */
+ if (ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) == 0)
+ addrconf_add_linklocal(idev, &addr);
+ else if (prefix_route)
+ addrconf_prefix_route(&addr, 64, idev->dev, 0, 0);
+ }
+}
+
static void addrconf_dev_config(struct net_device *dev)
{
- struct in6_addr addr;
struct inet6_dev *idev;
ASSERT_RTNL();
@@ -2751,11 +2773,7 @@ static void addrconf_dev_config(struct net_device *dev)
if (IS_ERR(idev))
return;
- memset(&addr, 0, sizeof(struct in6_addr));
- addr.s6_addr32[0] = htonl(0xFE800000);
-
- if (ipv6_generate_eui64(addr.s6_addr + 8, dev) == 0)
- addrconf_add_linklocal(idev, &addr);
+ addrconf_addr_gen(idev, false);
}
#if IS_ENABLED(CONFIG_IPV6_SIT)
@@ -2777,11 +2795,7 @@ static void addrconf_sit_config(struct net_device *dev)
}
if (dev->priv_flags & IFF_ISATAP) {
- struct in6_addr addr;
-
- ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
- if (!ipv6_generate_eui64(addr.s6_addr + 8, dev))
- addrconf_add_linklocal(idev, &addr);
+ addrconf_addr_gen(idev, false);
return;
}
@@ -2796,7 +2810,6 @@ static void addrconf_sit_config(struct net_device *dev)
static void addrconf_gre_config(struct net_device *dev)
{
struct inet6_dev *idev;
- struct in6_addr addr;
ASSERT_RTNL();
@@ -2805,11 +2818,7 @@ static void addrconf_gre_config(struct net_device *dev)
return;
}
- ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
- if (!ipv6_generate_eui64(addr.s6_addr + 8, dev))
- addrconf_add_linklocal(idev, &addr);
- else
- addrconf_prefix_route(&addr, 64, dev, 0, 0);
+ addrconf_addr_gen(idev, true);
}
#endif
@@ -2825,8 +2834,8 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
case NETDEV_REGISTER:
if (!idev && dev->mtu >= IPV6_MIN_MTU) {
idev = ipv6_add_dev(dev);
- if (!idev)
- return notifier_from_errno(-ENOMEM);
+ if (IS_ERR(idev))
+ return notifier_from_errno(PTR_ERR(idev));
}
break;
@@ -2835,6 +2844,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (dev->flags & IFF_SLAVE)
break;
+ if (idev && idev->cnf.disable_ipv6)
+ break;
+
if (event == NETDEV_UP) {
if (!addrconf_qdisc_ok(dev)) {
/* device is not ready yet. */
@@ -2846,7 +2858,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (!idev && dev->mtu >= IPV6_MIN_MTU)
idev = ipv6_add_dev(dev);
- if (idev) {
+ if (!IS_ERR_OR_NULL(idev)) {
idev->if_flags |= IF_READY;
run_pending = 1;
}
@@ -2889,7 +2901,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
break;
}
- if (idev) {
+ if (!IS_ERR_OR_NULL(idev)) {
if (run_pending)
addrconf_dad_run(idev);
@@ -2924,7 +2936,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (!idev && dev->mtu >= IPV6_MIN_MTU) {
idev = ipv6_add_dev(dev);
- if (idev)
+ if (!IS_ERR(idev))
break;
}
@@ -2945,10 +2957,14 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (idev) {
snmp6_unregister_dev(idev);
addrconf_sysctl_unregister(idev);
- addrconf_sysctl_register(idev);
- err = snmp6_register_dev(idev);
+ err = addrconf_sysctl_register(idev);
if (err)
return notifier_from_errno(err);
+ err = snmp6_register_dev(idev);
+ if (err) {
+ addrconf_sysctl_unregister(idev);
+ return notifier_from_errno(err);
+ }
}
break;
@@ -3017,7 +3033,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
struct hlist_head *h = &inet6_addr_lst[i];
spin_lock_bh(&addrconf_hash_lock);
- restart:
+restart:
hlist_for_each_entry_rcu(ifa, h, addr_lst) {
if (ifa->idev == idev) {
hlist_del_init_rcu(&ifa->addr_lst);
@@ -3081,11 +3097,13 @@ static int addrconf_ifdown(struct net_device *dev, int how)
write_unlock_bh(&idev->lock);
- /* Step 5: Discard multicast list */
- if (how)
+ /* Step 5: Discard anycast and multicast list */
+ if (how) {
+ ipv6_ac_destroy_dev(idev);
ipv6_mc_destroy_dev(idev);
- else
+ } else {
ipv6_mc_down(idev);
+ }
idev->tstamp = jiffies;
@@ -3529,8 +3547,8 @@ static void __net_exit if6_proc_net_exit(struct net *net)
}
static struct pernet_operations if6_proc_net_ops = {
- .init = if6_proc_net_init,
- .exit = if6_proc_net_exit,
+ .init = if6_proc_net_init,
+ .exit = if6_proc_net_exit,
};
int __init if6_proc_init(void)
@@ -4321,6 +4339,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify;
array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc;
+ array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local;
}
static inline size_t inet6_ifla6_size(void)
@@ -4420,6 +4439,10 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)
nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr));
if (nla == NULL)
goto nla_put_failure;
+
+ if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->addr_gen_mode))
+ goto nla_put_failure;
+
read_lock_bh(&idev->lock);
memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla));
read_unlock_bh(&idev->lock);
@@ -4524,8 +4547,21 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla)
if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL) < 0)
BUG();
- if (tb[IFLA_INET6_TOKEN])
+ if (tb[IFLA_INET6_TOKEN]) {
err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]));
+ if (err)
+ return err;
+ }
+
+ if (tb[IFLA_INET6_ADDR_GEN_MODE]) {
+ u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]);
+
+ if (mode != IN6_ADDR_GEN_MODE_EUI64 &&
+ mode != IN6_ADDR_GEN_MODE_NONE)
+ return -EINVAL;
+ idev->addr_gen_mode = mode;
+ err = 0;
+ }
return err;
}
@@ -4737,24 +4773,21 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
addrconf_leave_solict(ifp->idev, &ifp->addr);
if (!ipv6_addr_any(&ifp->peer_addr)) {
struct rt6_info *rt;
- struct net_device *dev = ifp->idev->dev;
-
- rt = rt6_lookup(dev_net(dev), &ifp->peer_addr, NULL,
- dev->ifindex, 1);
- if (rt) {
- dst_hold(&rt->dst);
- if (ip6_del_rt(rt))
- dst_free(&rt->dst);
- }
+
+ rt = addrconf_get_prefix_route(&ifp->peer_addr, 128,
+ ifp->idev->dev, 0, 0);
+ if (rt && ip6_del_rt(rt))
+ dst_free(&rt->dst);
}
dst_hold(&ifp->rt->dst);
if (ip6_del_rt(ifp->rt))
dst_free(&ifp->rt->dst);
+
+ rt_genid_bump_ipv6(net);
break;
}
atomic_inc(&net->ipv6.dev_addr_genid);
- rt_genid_bump_ipv6(net);
}
static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
@@ -5168,6 +5201,13 @@ static struct addrconf_sysctl_table
.proc_handler = proc_dointvec
},
{
+ .procname = "accept_ra_from_local",
+ .data = &ipv6_devconf.accept_ra_from_local,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
/* sentinel */
}
},
@@ -5218,12 +5258,23 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)
kfree(t);
}
-static void addrconf_sysctl_register(struct inet6_dev *idev)
+static int addrconf_sysctl_register(struct inet6_dev *idev)
{
- neigh_sysctl_register(idev->dev, idev->nd_parms,
- &ndisc_ifinfo_sysctl_change);
- __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name,
- idev, &idev->cnf);
+ int err;
+
+ if (!sysctl_dev_name_is_allowed(idev->dev->name))
+ return -EINVAL;
+
+ err = neigh_sysctl_register(idev->dev, idev->nd_parms,
+ &ndisc_ifinfo_sysctl_change);
+ if (err)
+ return err;
+ err = __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name,
+ idev, &idev->cnf);
+ if (err)
+ neigh_sysctl_unregister(idev->nd_parms);
+
+ return err;
}
static void addrconf_sysctl_unregister(struct inet6_dev *idev)
@@ -5308,6 +5359,7 @@ static struct rtnl_af_ops inet6_ops = {
int __init addrconf_init(void)
{
+ struct inet6_dev *idev;
int i, err;
err = ipv6_addr_label_init();
@@ -5346,11 +5398,12 @@ int __init addrconf_init(void)
* device and it being up should be removed.
*/
rtnl_lock();
- if (!ipv6_add_dev(init_net.loopback_dev))
- err = -ENOMEM;
+ idev = ipv6_add_dev(init_net.loopback_dev);
rtnl_unlock();
- if (err)
+ if (IS_ERR(idev)) {
+ err = PTR_ERR(idev);
goto errlo;
+ }
for (i = 0; i < IN6_ADDR_HSIZE; i++)
INIT_HLIST_HEAD(&inet6_addr_lst[i]);
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index e6960457f625..98cc4cd570e2 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -8,6 +8,13 @@
#include <net/addrconf.h>
#include <net/ip.h>
+/* if ipv6 module registers this function is used by xfrm to force all
+ * sockets to relookup their nodes - this is fairly expensive, be
+ * careful
+ */
+void (*__fib6_flush_trees)(struct net *);
+EXPORT_SYMBOL(__fib6_flush_trees);
+
#define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16)
static inline unsigned int ipv6_addr_scope2type(unsigned int scope)
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 731e1e1722d9..fd0dc47f471d 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -277,7 +277,7 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
last = p;
}
if (last)
- hlist_add_after_rcu(&last->list, &newp->list);
+ hlist_add_behind_rcu(&newp->list, &last->list);
else
hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
out:
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 7cb4392690dd..e8c4400f23e9 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -7,15 +7,15 @@
*
* Adapted from linux/net/ipv4/af_inet.c
*
- * Fixes:
+ * Fixes:
* piggy, Karl Knutson : Socket protocol table
- * Hideaki YOSHIFUJI : sin6_scope_id support
- * Arnaldo Melo : check proc_net_create return, cleanups
+ * Hideaki YOSHIFUJI : sin6_scope_id support
+ * Arnaldo Melo : check proc_net_create return, cleanups
*
* This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
*/
#define pr_fmt(fmt) "IPv6: " fmt
@@ -197,7 +197,7 @@ lookup_protocol:
np->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
np->mc_loop = 1;
np->pmtudisc = IPV6_PMTUDISC_WANT;
- np->ipv6only = net->ipv6.sysctl.bindv6only;
+ sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
/* Init the ipv4 part of the socket since we can have sockets
* using v6 API for ipv4.
@@ -294,7 +294,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
/* Binding to v4-mapped address on a v6-only socket
* makes no sense
*/
- if (np->ipv6only) {
+ if (sk->sk_ipv6only) {
err = -EINVAL;
goto out;
}
@@ -302,7 +302,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
/* Reproduce AF_INET checks to make the bindings consistent */
v4addr = addr->sin6_addr.s6_addr32[3];
chk_addr_ret = inet_addr_type(net, v4addr);
- if (!sysctl_ip_nonlocal_bind &&
+ if (!net->ipv4.sysctl_ip_nonlocal_bind &&
!(inet->freebind || inet->transparent) &&
v4addr != htonl(INADDR_ANY) &&
chk_addr_ret != RTN_LOCAL &&
@@ -371,7 +371,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (addr_type != IPV6_ADDR_ANY) {
sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
if (addr_type != IPV6_ADDR_MAPPED)
- np->ipv6only = 1;
+ sk->sk_ipv6only = 1;
}
if (snum)
sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
@@ -672,10 +672,10 @@ int inet6_sk_rebuild_header(struct sock *sk)
}
EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header);
-bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb)
+bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb,
+ const struct inet6_skb_parm *opt)
{
const struct ipv6_pinfo *np = inet6_sk(sk);
- const struct inet6_skb_parm *opt = IP6CB(skb);
if (np->rxopt.all) {
if ((opt->hop && (np->rxopt.bits.hopopts ||
@@ -765,7 +765,8 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.bindv6only = 0;
net->ipv6.sysctl.icmpv6_time = 1*HZ;
net->ipv6.sysctl.flowlabel_consistency = 1;
- atomic_set(&net->ipv6.rt_genid, 0);
+ net->ipv6.sysctl.auto_flowlabels = 0;
+ atomic_set(&net->ipv6.fib6_sernum, 1);
err = ipv6_init_mibs(net);
if (err)
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 72a4930bdc0a..6d16eb0e0c7f 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -17,10 +17,10 @@
* Authors
*
* Mitsuru KANDA @USAGI : IPv6 Support
- * Kazunori MIYAZAWA @USAGI :
- * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ * Kazunori MIYAZAWA @USAGI :
+ * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
*
- * This file is derived from net/ipv4/ah.c.
+ * This file is derived from net/ipv4/ah.c.
*/
#define pr_fmt(fmt) "IPv6: " fmt
@@ -284,7 +284,7 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir)
ipv6_rearrange_rthdr(iph, exthdr.rth);
break;
- default :
+ default:
return 0;
}
@@ -478,7 +478,7 @@ static void ah6_input_done(struct crypto_async_request *base, int err)
auth_data = ah_tmp_auth(work_iph, hdr_len);
icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len);
- err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG: 0;
+ err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0;
if (err)
goto out;
@@ -622,7 +622,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
goto out_free;
}
- err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG: 0;
+ err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0;
if (err)
goto out_free;
@@ -647,8 +647,8 @@ static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
- struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
- struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+offset);
+ struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
+ struct ip_auth_hdr *ah = (struct ip_auth_hdr *)(skb->data+offset);
struct xfrm_state *x;
if (type != ICMPV6_PKT_TOOBIG &&
@@ -713,8 +713,6 @@ static int ah6_init_state(struct xfrm_state *x)
ahp->icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8;
ahp->icv_trunc_len = x->aalg->alg_trunc_len/8;
- BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN);
-
x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) +
ahp->icv_trunc_len);
switch (x->props.mode) {
@@ -755,11 +753,10 @@ static int ah6_rcv_cb(struct sk_buff *skb, int err)
return 0;
}
-static const struct xfrm_type ah6_type =
-{
+static const struct xfrm_type ah6_type = {
.description = "AH6",
.owner = THIS_MODULE,
- .proto = IPPROTO_AH,
+ .proto = IPPROTO_AH,
.flags = XFRM_TYPE_REPLAY_PROT,
.init_state = ah6_init_state,
.destructor = ah6_destroy,
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 210183244689..f5e319a8d4e2 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -46,10 +46,6 @@
static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
-/* Big ac list lock for all the sockets */
-static DEFINE_SPINLOCK(ipv6_sk_ac_lock);
-
-
/*
* socket join an anycast group
*/
@@ -77,7 +73,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
pac->acl_next = NULL;
pac->acl_addr = *addr;
- rcu_read_lock();
+ rtnl_lock();
if (ifindex == 0) {
struct rt6_info *rt;
@@ -90,11 +86,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
goto error;
} else {
/* router, no matching interface: just pick one */
- dev = dev_get_by_flags_rcu(net, IFF_UP,
- IFF_UP | IFF_LOOPBACK);
+ dev = __dev_get_by_flags(net, IFF_UP,
+ IFF_UP | IFF_LOOPBACK);
}
} else
- dev = dev_get_by_index_rcu(net, ifindex);
+ dev = __dev_get_by_index(net, ifindex);
if (dev == NULL) {
err = -ENODEV;
@@ -126,17 +122,15 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
goto error;
}
- err = ipv6_dev_ac_inc(dev, addr);
+ err = __ipv6_dev_ac_inc(idev, addr);
if (!err) {
- spin_lock_bh(&ipv6_sk_ac_lock);
pac->acl_next = np->ipv6_ac_list;
np->ipv6_ac_list = pac;
- spin_unlock_bh(&ipv6_sk_ac_lock);
pac = NULL;
}
error:
- rcu_read_unlock();
+ rtnl_unlock();
if (pac)
sock_kfree_s(sk, pac, sizeof(*pac));
return err;
@@ -152,7 +146,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
struct ipv6_ac_socklist *pac, *prev_pac;
struct net *net = sock_net(sk);
- spin_lock_bh(&ipv6_sk_ac_lock);
+ rtnl_lock();
prev_pac = NULL;
for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) {
if ((ifindex == 0 || pac->acl_ifindex == ifindex) &&
@@ -161,7 +155,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
prev_pac = pac;
}
if (!pac) {
- spin_unlock_bh(&ipv6_sk_ac_lock);
+ rtnl_unlock();
return -ENOENT;
}
if (prev_pac)
@@ -169,13 +163,10 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
else
np->ipv6_ac_list = pac->acl_next;
- spin_unlock_bh(&ipv6_sk_ac_lock);
-
- rcu_read_lock();
- dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
+ dev = __dev_get_by_index(net, pac->acl_ifindex);
if (dev)
ipv6_dev_ac_dec(dev, &pac->acl_addr);
- rcu_read_unlock();
+ rtnl_unlock();
sock_kfree_s(sk, pac, sizeof(*pac));
return 0;
@@ -192,18 +183,16 @@ void ipv6_sock_ac_close(struct sock *sk)
if (!np->ipv6_ac_list)
return;
- spin_lock_bh(&ipv6_sk_ac_lock);
+ rtnl_lock();
pac = np->ipv6_ac_list;
np->ipv6_ac_list = NULL;
- spin_unlock_bh(&ipv6_sk_ac_lock);
prev_index = 0;
- rcu_read_lock();
while (pac) {
struct ipv6_ac_socklist *next = pac->acl_next;
if (pac->acl_ifindex != prev_index) {
- dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
+ dev = __dev_get_by_index(net, pac->acl_ifindex);
prev_index = pac->acl_ifindex;
}
if (dev)
@@ -211,7 +200,12 @@ void ipv6_sock_ac_close(struct sock *sk)
sock_kfree_s(sk, pac, sizeof(*pac));
pac = next;
}
- rcu_read_unlock();
+ rtnl_unlock();
+}
+
+static void aca_get(struct ifacaddr6 *aca)
+{
+ atomic_inc(&aca->aca_refcnt);
}
static void aca_put(struct ifacaddr6 *ac)
@@ -223,20 +217,39 @@ static void aca_put(struct ifacaddr6 *ac)
}
}
+static struct ifacaddr6 *aca_alloc(struct rt6_info *rt,
+ const struct in6_addr *addr)
+{
+ struct inet6_dev *idev = rt->rt6i_idev;
+ struct ifacaddr6 *aca;
+
+ aca = kzalloc(sizeof(*aca), GFP_ATOMIC);
+ if (aca == NULL)
+ return NULL;
+
+ aca->aca_addr = *addr;
+ in6_dev_hold(idev);
+ aca->aca_idev = idev;
+ aca->aca_rt = rt;
+ aca->aca_users = 1;
+ /* aca_tstamp should be updated upon changes */
+ aca->aca_cstamp = aca->aca_tstamp = jiffies;
+ atomic_set(&aca->aca_refcnt, 1);
+ spin_lock_init(&aca->aca_lock);
+
+ return aca;
+}
+
/*
* device anycast group inc (add if not found)
*/
-int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr)
+int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
{
struct ifacaddr6 *aca;
- struct inet6_dev *idev;
struct rt6_info *rt;
int err;
- idev = in6_dev_get(dev);
-
- if (idev == NULL)
- return -EINVAL;
+ ASSERT_RTNL();
write_lock_bh(&idev->lock);
if (idev->dead) {
@@ -252,46 +265,35 @@ int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr)
}
}
- /*
- * not found: create a new one.
- */
-
- aca = kzalloc(sizeof(struct ifacaddr6), GFP_ATOMIC);
-
- if (aca == NULL) {
- err = -ENOMEM;
- goto out;
- }
-
rt = addrconf_dst_alloc(idev, addr, true);
if (IS_ERR(rt)) {
- kfree(aca);
err = PTR_ERR(rt);
goto out;
}
-
- aca->aca_addr = *addr;
- aca->aca_idev = idev;
- aca->aca_rt = rt;
- aca->aca_users = 1;
- /* aca_tstamp should be updated upon changes */
- aca->aca_cstamp = aca->aca_tstamp = jiffies;
- atomic_set(&aca->aca_refcnt, 2);
- spin_lock_init(&aca->aca_lock);
+ aca = aca_alloc(rt, addr);
+ if (aca == NULL) {
+ ip6_rt_put(rt);
+ err = -ENOMEM;
+ goto out;
+ }
aca->aca_next = idev->ac_list;
idev->ac_list = aca;
+
+ /* Hold this for addrconf_join_solict() below before we unlock,
+ * it is already exposed via idev->ac_list.
+ */
+ aca_get(aca);
write_unlock_bh(&idev->lock);
ip6_ins_rt(rt);
- addrconf_join_solict(dev, &aca->aca_addr);
+ addrconf_join_solict(idev->dev, &aca->aca_addr);
aca_put(aca);
return 0;
out:
write_unlock_bh(&idev->lock);
- in6_dev_put(idev);
return err;
}
@@ -302,6 +304,8 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
{
struct ifacaddr6 *aca, *prev_aca;
+ ASSERT_RTNL();
+
write_lock_bh(&idev->lock);
prev_aca = NULL;
for (aca = idev->ac_list; aca; aca = aca->aca_next) {
@@ -331,7 +335,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
return 0;
}
-/* called with rcu_read_lock() */
+/* called with rtnl_lock() */
static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr)
{
struct inet6_dev *idev = __in6_dev_get(dev);
@@ -341,6 +345,27 @@ static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr)
return __ipv6_dev_ac_dec(idev, addr);
}
+void ipv6_ac_destroy_dev(struct inet6_dev *idev)
+{
+ struct ifacaddr6 *aca;
+
+ write_lock_bh(&idev->lock);
+ while ((aca = idev->ac_list) != NULL) {
+ idev->ac_list = aca->aca_next;
+ write_unlock_bh(&idev->lock);
+
+ addrconf_leave_solict(idev, &aca->aca_addr);
+
+ dst_hold(&aca->aca_rt->dst);
+ ip6_del_rt(aca->aca_rt);
+
+ aca_put(aca);
+
+ write_lock_bh(&idev->lock);
+ }
+ write_unlock_bh(&idev->lock);
+}
+
/*
* check if the interface has this anycast address
* called with rcu_read_lock()
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index c3bf2d2e519e..2cdc38338be3 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -43,13 +43,13 @@ static bool ipv6_mapped_addr_any(const struct in6_addr *a)
int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
- struct inet_sock *inet = inet_sk(sk);
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct in6_addr *daddr, *final_p, final;
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct in6_addr *daddr, *final_p, final;
struct dst_entry *dst;
struct flowi6 fl6;
struct ip6_flowlabel *flowlabel = NULL;
- struct ipv6_txoptions *opt;
+ struct ipv6_txoptions *opt;
int addr_type;
int err;
@@ -199,6 +199,7 @@ ipv4_connected:
NULL);
sk->sk_state = TCP_ESTABLISHED;
+ ip6_set_txhash(sk);
out:
fl6_sock_release(flowlabel);
return err;
@@ -331,7 +332,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct sock_exterr_skb *serr;
- struct sk_buff *skb, *skb2;
+ struct sk_buff *skb;
DECLARE_SOCKADDR(struct sockaddr_in6 *, sin, msg->msg_name);
struct {
struct sock_extended_err ee;
@@ -341,7 +342,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
int copied;
err = -EAGAIN;
- skb = skb_dequeue(&sk->sk_error_queue);
+ skb = sock_dequeue_err_skb(sk);
if (skb == NULL)
goto out;
@@ -414,17 +415,6 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
msg->msg_flags |= MSG_ERRQUEUE;
err = copied;
- /* Reset and regenerate socket error */
- spin_lock_bh(&sk->sk_error_queue.lock);
- sk->sk_err = 0;
- if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
- sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
- spin_unlock_bh(&sk->sk_error_queue.lock);
- sk->sk_error_report(sk);
- } else {
- spin_unlock_bh(&sk->sk_error_queue.lock);
- }
-
out_free_skb:
kfree_skb(skb);
out:
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index d15da1377149..83fc3a385a26 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -17,10 +17,10 @@
* Authors
*
* Mitsuru KANDA @USAGI : IPv6 Support
- * Kazunori MIYAZAWA @USAGI :
- * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ * Kazunori MIYAZAWA @USAGI :
+ * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
*
- * This file is derived from net/ipv4/esp.c
+ * This file is derived from net/ipv4/esp.c
*/
#define pr_fmt(fmt) "IPv6: " fmt
@@ -598,7 +598,7 @@ static int esp6_init_state(struct xfrm_state *x)
case XFRM_MODE_BEET:
if (x->sel.family != AF_INET6)
x->props.header_len += IPV4_BEET_PHMAXLEN +
- (sizeof(struct ipv6hdr) - sizeof(struct iphdr));
+ (sizeof(struct ipv6hdr) - sizeof(struct iphdr));
break;
case XFRM_MODE_TRANSPORT:
break;
@@ -621,11 +621,10 @@ static int esp6_rcv_cb(struct sk_buff *skb, int err)
return 0;
}
-static const struct xfrm_type esp6_type =
-{
+static const struct xfrm_type esp6_type = {
.description = "ESP6",
- .owner = THIS_MODULE,
- .proto = IPPROTO_ESP,
+ .owner = THIS_MODULE,
+ .proto = IPPROTO_ESP,
.flags = XFRM_TYPE_REPLAY_PROT,
.init_state = esp6_init_state,
.destructor = esp6_destroy,
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 8d67900aa003..bfde361b6134 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -142,7 +142,7 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb)
default: /* Other TLV code so scan list */
if (optlen > len)
goto bad;
- for (curr=procs; curr->type >= 0; curr++) {
+ for (curr = procs; curr->type >= 0; curr++) {
if (curr->type == nh[off]) {
/* type specific length/alignment
checks will be performed in the
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index f6c84a6eb238..97ae70077a4f 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -170,11 +170,11 @@ static bool is_ineligible(const struct sk_buff *skb)
/*
* Check the ICMP output rate limit
*/
-static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
- struct flowi6 *fl6)
+static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
+ struct flowi6 *fl6)
{
- struct dst_entry *dst;
struct net *net = sock_net(sk);
+ struct dst_entry *dst;
bool res = false;
/* Informational messages are not limited. */
@@ -199,16 +199,20 @@ static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
} else {
struct rt6_info *rt = (struct rt6_info *)dst;
int tmo = net->ipv6.sysctl.icmpv6_time;
- struct inet_peer *peer;
/* Give more bandwidth to wider prefixes. */
if (rt->rt6i_dst.plen < 128)
tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
- peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
- res = inet_peer_xrlim_allow(peer, tmo);
- if (peer)
- inet_putpeer(peer);
+ if (icmp_global_allow()) {
+ struct inet_peer *peer;
+
+ peer = inet_getpeer_v6(net->ipv6.peers,
+ &rt->rt6i_dst.addr, 1);
+ res = inet_peer_xrlim_allow(peer, tmo);
+ if (peer)
+ inet_putpeer(peer);
+ }
}
dst_release(dst);
return res;
@@ -503,7 +507,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
msg.type = type;
len = skb->len - msg.offset;
- len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
+ len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
if (len < 0) {
LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
goto out_dst_release;
@@ -626,24 +630,25 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
int inner_offset;
__be16 frag_off;
u8 nexthdr;
+ struct net *net = dev_net(skb->dev);
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
- return;
+ goto out;
nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
if (ipv6_ext_hdr(nexthdr)) {
/* now skip over extension headers */
inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
&nexthdr, &frag_off);
- if (inner_offset<0)
- return;
+ if (inner_offset < 0)
+ goto out;
} else {
inner_offset = sizeof(struct ipv6hdr);
}
/* Checkin header including 8 bytes of inner protocol header. */
if (!pskb_may_pull(skb, inner_offset+8))
- return;
+ goto out;
/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
Without this we will not able f.e. to make source routed
@@ -652,13 +657,15 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
--ANK (980726)
*/
- rcu_read_lock();
ipprot = rcu_dereference(inet6_protos[nexthdr]);
if (ipprot && ipprot->err_handler)
ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
- rcu_read_unlock();
raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
+ return;
+
+out:
+ ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
}
/*
@@ -770,12 +777,12 @@ static int icmpv6_rcv(struct sk_buff *skb)
break;
default:
- LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
-
/* informational */
if (type & ICMPV6_INFOMSG_MASK)
break;
+ LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
+
/*
* error of unknown type.
* must pass to upper level
@@ -805,7 +812,7 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
memset(fl6, 0, sizeof(*fl6));
fl6->saddr = *saddr;
fl6->daddr = *daddr;
- fl6->flowi6_proto = IPPROTO_ICMPV6;
+ fl6->flowi6_proto = IPPROTO_ICMPV6;
fl6->fl6_icmp_type = type;
fl6->fl6_icmp_code = 0;
fl6->flowi6_oif = oif;
@@ -872,8 +879,8 @@ static void __net_exit icmpv6_sk_exit(struct net *net)
}
static struct pernet_operations icmpv6_sk_ops = {
- .init = icmpv6_sk_init,
- .exit = icmpv6_sk_exit,
+ .init = icmpv6_sk_init,
+ .exit = icmpv6_sk_exit,
};
int __init icmpv6_init(void)
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index a245e5ddffbd..29b32206e494 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -63,7 +63,6 @@ int inet6_csk_bind_conflict(const struct sock *sk,
return sk2 != NULL;
}
-
EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
struct dst_entry *inet6_csk_route_req(struct sock *sk,
@@ -144,7 +143,6 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk,
return NULL;
}
-
EXPORT_SYMBOL_GPL(inet6_csk_search_req);
void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
@@ -160,10 +158,9 @@ void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
inet_csk_reqsk_queue_added(sk, timeout);
}
-
EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add);
-void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
+void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
{
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
@@ -175,7 +172,6 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr,
sk->sk_bound_dev_if);
}
-
EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
static inline
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 262e13c02ec2..051dffb49c90 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -6,7 +6,7 @@
* Generic INET6 transport hashtables
*
* Authors: Lotsa people, from code originally in tcp, generalised here
- * by Arnaldo Carvalho de Melo <acme@mandriva.com>
+ * by Arnaldo Carvalho de Melo <acme@mandriva.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -198,7 +198,7 @@ begin:
}
} else if (score == hiscore && reuseport) {
matches++;
- if (((u64)phash * matches) >> 32 == 0)
+ if (reciprocal_scale(phash, matches) == 0)
result = sk;
phash = next_pseudo_random32(phash);
}
@@ -222,7 +222,6 @@ begin:
rcu_read_unlock();
return result;
}
-
EXPORT_SYMBOL_GPL(inet6_lookup_listener);
struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
@@ -238,7 +237,6 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
return sk;
}
-
EXPORT_SYMBOL_GPL(inet6_lookup);
static int __inet6_check_established(struct inet_timewait_death_row *death_row,
@@ -324,5 +322,4 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row,
return __inet_hash_connect(death_row, sk, inet6_sk_port_offset(sk),
__inet6_check_established, __inet6_hash);
}
-
EXPORT_SYMBOL_GPL(inet6_hash_connect);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index cb4459bd1d29..b2d1838897c9 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -46,20 +46,11 @@
static struct kmem_cache *fib6_node_kmem __read_mostly;
-enum fib_walk_state_t {
-#ifdef CONFIG_IPV6_SUBTREES
- FWS_S,
-#endif
- FWS_L,
- FWS_R,
- FWS_C,
- FWS_U
-};
-
-struct fib6_cleaner_t {
- struct fib6_walker_t w;
+struct fib6_cleaner {
+ struct fib6_walker w;
struct net *net;
int (*func)(struct rt6_info *, void *arg);
+ int sernum;
void *arg;
};
@@ -74,8 +65,8 @@ static DEFINE_RWLOCK(fib6_walker_lock);
static void fib6_prune_clones(struct net *net, struct fib6_node *fn);
static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn);
static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn);
-static int fib6_walk(struct fib6_walker_t *w);
-static int fib6_walk_continue(struct fib6_walker_t *w);
+static int fib6_walk(struct fib6_walker *w);
+static int fib6_walk_continue(struct fib6_walker *w);
/*
* A routing update causes an increase of the serial number on the
@@ -84,34 +75,41 @@ static int fib6_walk_continue(struct fib6_walker_t *w);
* result of redirects, path MTU changes, etc.
*/
-static __u32 rt_sernum;
-
static void fib6_gc_timer_cb(unsigned long arg);
static LIST_HEAD(fib6_walkers);
#define FOR_WALKERS(w) list_for_each_entry(w, &fib6_walkers, lh)
-static inline void fib6_walker_link(struct fib6_walker_t *w)
+static void fib6_walker_link(struct fib6_walker *w)
{
write_lock_bh(&fib6_walker_lock);
list_add(&w->lh, &fib6_walkers);
write_unlock_bh(&fib6_walker_lock);
}
-static inline void fib6_walker_unlink(struct fib6_walker_t *w)
+static void fib6_walker_unlink(struct fib6_walker *w)
{
write_lock_bh(&fib6_walker_lock);
list_del(&w->lh);
write_unlock_bh(&fib6_walker_lock);
}
-static __inline__ u32 fib6_new_sernum(void)
+
+static int fib6_new_sernum(struct net *net)
{
- u32 n = ++rt_sernum;
- if ((__s32)n <= 0)
- rt_sernum = n = 1;
- return n;
+ int new, old;
+
+ do {
+ old = atomic_read(&net->ipv6.fib6_sernum);
+ new = old < INT_MAX ? old + 1 : 1;
+ } while (atomic_cmpxchg(&net->ipv6.fib6_sernum,
+ old, new) != old);
+ return new;
}
+enum {
+ FIB6_NO_SERNUM_CHANGE = 0,
+};
+
/*
* Auxiliary address test functions for the radix tree.
*
@@ -128,7 +126,7 @@ static __inline__ u32 fib6_new_sernum(void)
# define BITOP_BE32_SWIZZLE 0
#endif
-static __inline__ __be32 addr_bit_set(const void *token, int fn_bit)
+static __be32 addr_bit_set(const void *token, int fn_bit)
{
const __be32 *addr = token;
/*
@@ -142,7 +140,7 @@ static __inline__ __be32 addr_bit_set(const void *token, int fn_bit)
addr[fn_bit >> 5];
}
-static __inline__ struct fib6_node *node_alloc(void)
+static struct fib6_node *node_alloc(void)
{
struct fib6_node *fn;
@@ -151,12 +149,12 @@ static __inline__ struct fib6_node *node_alloc(void)
return fn;
}
-static __inline__ void node_free(struct fib6_node *fn)
+static void node_free(struct fib6_node *fn)
{
kmem_cache_free(fib6_node_kmem, fn);
}
-static __inline__ void rt6_release(struct rt6_info *rt)
+static void rt6_release(struct rt6_info *rt)
{
if (atomic_dec_and_test(&rt->rt6i_ref))
dst_free(&rt->dst);
@@ -267,7 +265,7 @@ static void __net_init fib6_tables_init(struct net *net)
#endif
-static int fib6_dump_node(struct fib6_walker_t *w)
+static int fib6_dump_node(struct fib6_walker *w)
{
int res;
struct rt6_info *rt;
@@ -287,7 +285,7 @@ static int fib6_dump_node(struct fib6_walker_t *w)
static void fib6_dump_end(struct netlink_callback *cb)
{
- struct fib6_walker_t *w = (void *)cb->args[2];
+ struct fib6_walker *w = (void *)cb->args[2];
if (w) {
if (cb->args[4]) {
@@ -310,7 +308,7 @@ static int fib6_dump_done(struct netlink_callback *cb)
static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
struct netlink_callback *cb)
{
- struct fib6_walker_t *w;
+ struct fib6_walker *w;
int res;
w = (void *)cb->args[2];
@@ -355,7 +353,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
unsigned int h, s_h;
unsigned int e = 0, s_e;
struct rt6_rtnl_dump_arg arg;
- struct fib6_walker_t *w;
+ struct fib6_walker *w;
struct fib6_table *tb;
struct hlist_head *head;
int res = 0;
@@ -423,14 +421,13 @@ out:
static struct fib6_node *fib6_add_1(struct fib6_node *root,
struct in6_addr *addr, int plen,
int offset, int allow_create,
- int replace_required)
+ int replace_required, int sernum)
{
struct fib6_node *fn, *in, *ln;
struct fib6_node *pn = NULL;
struct rt6key *key;
int bit;
__be32 dir = 0;
- __u32 sernum = fib6_new_sernum();
RT6_TRACE("fib6_add_1\n");
@@ -627,7 +624,7 @@ insert_above:
return ln;
}
-static inline bool rt6_qualify_for_ecmp(struct rt6_info *rt)
+static bool rt6_qualify_for_ecmp(struct rt6_info *rt)
{
return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
RTF_GATEWAY;
@@ -643,7 +640,7 @@ static int fib6_commit_metrics(struct dst_entry *dst,
if (dst->flags & DST_HOST) {
mp = dst_metrics_write_ptr(dst);
} else {
- mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
+ mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
if (!mp)
return -ENOMEM;
dst_init_metrics(dst, mp, 0);
@@ -820,7 +817,7 @@ add:
return 0;
}
-static __inline__ void fib6_start_gc(struct net *net, struct rt6_info *rt)
+static void fib6_start_gc(struct net *net, struct rt6_info *rt)
{
if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
(rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE)))
@@ -848,6 +845,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info,
int err = -ENOMEM;
int allow_create = 1;
int replace_required = 0;
+ int sernum = fib6_new_sernum(info->nl_net);
if (info->nlh) {
if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
@@ -860,7 +858,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info,
fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
offsetof(struct rt6_info, rt6i_dst), allow_create,
- replace_required);
+ replace_required, sernum);
if (IS_ERR(fn)) {
err = PTR_ERR(fn);
fn = NULL;
@@ -894,14 +892,14 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info,
sfn->leaf = info->nl_net->ipv6.ip6_null_entry;
atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref);
sfn->fn_flags = RTN_ROOT;
- sfn->fn_sernum = fib6_new_sernum();
+ sfn->fn_sernum = sernum;
/* Now add the first leaf node to new subtree */
sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
rt->rt6i_src.plen,
offsetof(struct rt6_info, rt6i_src),
- allow_create, replace_required);
+ allow_create, replace_required, sernum);
if (IS_ERR(sn)) {
/* If it is failed, discard just allocated
@@ -920,7 +918,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info,
sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
rt->rt6i_src.plen,
offsetof(struct rt6_info, rt6i_src),
- allow_create, replace_required);
+ allow_create, replace_required, sernum);
if (IS_ERR(sn)) {
err = PTR_ERR(sn);
@@ -1174,7 +1172,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
int children;
int nstate;
struct fib6_node *child, *pn;
- struct fib6_walker_t *w;
+ struct fib6_walker *w;
int iter = 0;
for (;;) {
@@ -1276,7 +1274,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
struct nl_info *info)
{
- struct fib6_walker_t *w;
+ struct fib6_walker *w;
struct rt6_info *rt = *rtp;
struct net *net = info->nl_net;
@@ -1414,7 +1412,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
* <0 -> walk is terminated by an error.
*/
-static int fib6_walk_continue(struct fib6_walker_t *w)
+static int fib6_walk_continue(struct fib6_walker *w)
{
struct fib6_node *fn, *pn;
@@ -1498,7 +1496,7 @@ skip:
}
}
-static int fib6_walk(struct fib6_walker_t *w)
+static int fib6_walk(struct fib6_walker *w)
{
int res;
@@ -1512,15 +1510,25 @@ static int fib6_walk(struct fib6_walker_t *w)
return res;
}
-static int fib6_clean_node(struct fib6_walker_t *w)
+static int fib6_clean_node(struct fib6_walker *w)
{
int res;
struct rt6_info *rt;
- struct fib6_cleaner_t *c = container_of(w, struct fib6_cleaner_t, w);
+ struct fib6_cleaner *c = container_of(w, struct fib6_cleaner, w);
struct nl_info info = {
.nl_net = c->net,
};
+ if (c->sernum != FIB6_NO_SERNUM_CHANGE &&
+ w->node->fn_sernum != c->sernum)
+ w->node->fn_sernum = c->sernum;
+
+ if (!c->func) {
+ WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE);
+ w->leaf = NULL;
+ return 0;
+ }
+
for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
res = c->func(rt, c->arg);
if (res < 0) {
@@ -1554,9 +1562,9 @@ static int fib6_clean_node(struct fib6_walker_t *w)
static void fib6_clean_tree(struct net *net, struct fib6_node *root,
int (*func)(struct rt6_info *, void *arg),
- int prune, void *arg)
+ bool prune, int sernum, void *arg)
{
- struct fib6_cleaner_t c;
+ struct fib6_cleaner c;
c.w.root = root;
c.w.func = fib6_clean_node;
@@ -1564,14 +1572,16 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root,
c.w.count = 0;
c.w.skip = 0;
c.func = func;
+ c.sernum = sernum;
c.arg = arg;
c.net = net;
fib6_walk(&c.w);
}
-void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
- void *arg)
+static void __fib6_clean_all(struct net *net,
+ int (*func)(struct rt6_info *, void *),
+ int sernum, void *arg)
{
struct fib6_table *table;
struct hlist_head *head;
@@ -1583,13 +1593,19 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
hlist_for_each_entry_rcu(table, head, tb6_hlist) {
write_lock_bh(&table->tb6_lock);
fib6_clean_tree(net, &table->tb6_root,
- func, 0, arg);
+ func, false, sernum, arg);
write_unlock_bh(&table->tb6_lock);
}
}
rcu_read_unlock();
}
+void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *),
+ void *arg)
+{
+ __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg);
+}
+
static int fib6_prune_clone(struct rt6_info *rt, void *arg)
{
if (rt->rt6i_flags & RTF_CACHE) {
@@ -1602,7 +1618,15 @@ static int fib6_prune_clone(struct rt6_info *rt, void *arg)
static void fib6_prune_clones(struct net *net, struct fib6_node *fn)
{
- fib6_clean_tree(net, fn, fib6_prune_clone, 1, NULL);
+ fib6_clean_tree(net, fn, fib6_prune_clone, true,
+ FIB6_NO_SERNUM_CHANGE, NULL);
+}
+
+static void fib6_flush_trees(struct net *net)
+{
+ int new_sernum = fib6_new_sernum(net);
+
+ __fib6_clean_all(net, NULL, new_sernum, NULL);
}
/*
@@ -1788,6 +1812,8 @@ int __init fib6_init(void)
NULL);
if (ret)
goto out_unregister_subsys;
+
+ __fib6_flush_trees = fib6_flush_trees;
out:
return ret;
@@ -1808,10 +1834,10 @@ void fib6_gc_cleanup(void)
struct ipv6_route_iter {
struct seq_net_private p;
- struct fib6_walker_t w;
+ struct fib6_walker w;
loff_t skip;
struct fib6_table *tbl;
- __u32 sernum;
+ int sernum;
};
static int ipv6_route_seq_show(struct seq_file *seq, void *v)
@@ -1839,7 +1865,7 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static int ipv6_route_yield(struct fib6_walker_t *w)
+static int ipv6_route_yield(struct fib6_walker *w)
{
struct ipv6_route_iter *iter = w->args;
@@ -1960,7 +1986,7 @@ static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos)
static bool ipv6_route_iter_active(struct ipv6_route_iter *iter)
{
- struct fib6_walker_t *w = &iter->w;
+ struct fib6_walker *w = &iter->w;
return w->node && !(w->state == FWS_U && w->node == w->root);
}
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 4052694c6f2c..3dd7d4ebd7cd 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -136,7 +136,7 @@ static void ip6_fl_gc(unsigned long dummy)
spin_lock(&ip6_fl_lock);
- for (i=0; i<=FL_HASH_MASK; i++) {
+ for (i = 0; i <= FL_HASH_MASK; i++) {
struct ip6_flowlabel *fl;
struct ip6_flowlabel __rcu **flp;
@@ -239,7 +239,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net,
/* Socket flowlabel lists */
-struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label)
+struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label)
{
struct ipv6_fl_socklist *sfl;
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -259,7 +259,6 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label)
rcu_read_unlock_bh();
return NULL;
}
-
EXPORT_SYMBOL_GPL(fl6_sock_lookup);
void fl6_free_socklist(struct sock *sk)
@@ -293,11 +292,11 @@ void fl6_free_socklist(struct sock *sk)
following rthdr.
*/
-struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space,
- struct ip6_flowlabel * fl,
- struct ipv6_txoptions * fopt)
+struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
+ struct ip6_flowlabel *fl,
+ struct ipv6_txoptions *fopt)
{
- struct ipv6_txoptions * fl_opt = fl->opt;
+ struct ipv6_txoptions *fl_opt = fl->opt;
if (fopt == NULL || fopt->opt_flen == 0)
return fl_opt;
@@ -388,7 +387,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
goto done;
msg.msg_controllen = olen;
- msg.msg_control = (void*)(fl->opt+1);
+ msg.msg_control = (void *)(fl->opt+1);
memset(&flowi6, 0, sizeof(flowi6));
err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt,
@@ -517,7 +516,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
struct net *net = sock_net(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_flowlabel_req freq;
- struct ipv6_fl_socklist *sfl1=NULL;
+ struct ipv6_fl_socklist *sfl1 = NULL;
struct ipv6_fl_socklist *sfl;
struct ipv6_fl_socklist __rcu **sflp;
struct ip6_flowlabel *fl, *fl1 = NULL;
@@ -542,7 +541,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
}
spin_lock_bh(&ip6_sk_fl_lock);
for (sflp = &np->ipv6_fl_list;
- (sfl = rcu_dereference(*sflp))!=NULL;
+ (sfl = rcu_dereference(*sflp)) != NULL;
sflp = &sfl->next) {
if (sfl->fl->label == freq.flr_label) {
if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 3873181ed856..12c3c8ef3849 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -314,6 +314,8 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
t = ip6gre_tunnel_find(net, parms, ARPHRD_IP6GRE);
+ if (t && create)
+ return NULL;
if (t || !create)
return t;
@@ -322,7 +324,8 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
else
strcpy(name, "ip6gre%d");
- dev = alloc_netdev(sizeof(*t), name, ip6gre_tunnel_setup);
+ dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
+ ip6gre_tunnel_setup);
if (!dev)
return NULL;
@@ -615,6 +618,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
int err = -1;
u8 proto;
struct sk_buff *new_skb;
+ __be16 protocol;
if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;
@@ -723,15 +727,17 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
* Push down and install the IP header.
*/
ipv6h = ipv6_hdr(skb);
- ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel);
+ ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
+ ip6_make_flowlabel(net, skb, fl6->flowlabel, false));
ipv6h->hop_limit = tunnel->parms.hop_limit;
ipv6h->nexthdr = proto;
ipv6h->saddr = fl6->saddr;
ipv6h->daddr = fl6->daddr;
((__be16 *)(ipv6h + 1))[0] = tunnel->parms.o_flags;
- ((__be16 *)(ipv6h + 1))[1] = (dev->type == ARPHRD_ETHER) ?
- htons(ETH_P_TEB) : skb->protocol;
+ protocol = (dev->type == ARPHRD_ETHER) ?
+ htons(ETH_P_TEB) : skb->protocol;
+ ((__be16 *)(ipv6h + 1))[1] = protocol;
if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
__be32 *ptr = (__be32 *)(((u8 *)ipv6h) + tunnel->hlen - 4);
@@ -752,6 +758,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
}
}
+ skb_set_inner_protocol(skb, protocol);
+
ip6tunnel_xmit(skb, dev);
if (ndst)
ip6_tnl_dst_store(tunnel, ndst);
@@ -778,7 +786,7 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
encap_limit = t->parms.encap_limit;
memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_IPIP;
+ fl6.flowi6_proto = IPPROTO_GRE;
dsfield = ipv4_get_dsfield(iph);
@@ -828,7 +836,7 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
encap_limit = t->parms.encap_limit;
memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_IPV6;
+ fl6.flowi6_proto = IPPROTO_GRE;
dsfield = ipv6_get_dsfield(ipv6h);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
@@ -1174,7 +1182,9 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen);
__be16 *p = (__be16 *)(ipv6h+1);
- ip6_flow_hdr(ipv6h, 0, t->fl.u.ip6.flowlabel);
+ ip6_flow_hdr(ipv6h, 0,
+ ip6_make_flowlabel(dev_net(dev), skb,
+ t->fl.u.ip6.flowlabel, false));
ipv6h->hop_limit = t->parms.hop_limit;
ipv6h->nexthdr = NEXTHDR_GRE;
ipv6h->saddr = t->parms.laddr;
@@ -1232,7 +1242,7 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
dev->flags |= IFF_NOARP;
dev->iflink = 0;
dev->addr_len = sizeof(struct in6_addr);
- dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+ netif_keep_dst(dev);
}
static int ip6gre_tunnel_init(struct net_device *dev)
@@ -1323,7 +1333,8 @@ static int __net_init ip6gre_init_net(struct net *net)
int err;
ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
- ip6gre_tunnel_setup);
+ NET_NAME_UNKNOWN,
+ ip6gre_tunnel_setup);
if (!ign->fb_tunnel_dev) {
err = -ENOMEM;
goto err_alloc_dev;
@@ -1719,4 +1730,5 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
MODULE_DESCRIPTION("GRE over IPv6 tunneling device");
MODULE_ALIAS_RTNL_LINK("ip6gre");
+MODULE_ALIAS_RTNL_LINK("ip6gretap");
MODULE_ALIAS_NETDEV("ip6gre0");
diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
index 4578e23834f7..14dacc544c3e 100644
--- a/net/ipv6/ip6_icmp.c
+++ b/net/ipv6/ip6_icmp.c
@@ -13,7 +13,7 @@ static ip6_icmp_send_t __rcu *ip6_icmp_send;
int inet6_register_icmp_sender(ip6_icmp_send_t *fn)
{
return (cmpxchg((ip6_icmp_send_t **)&ip6_icmp_send, NULL, fn) == NULL) ?
- 0 : -EBUSY;
+ 0 : -EBUSY;
}
EXPORT_SYMBOL(inet6_register_icmp_sender);
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 51d54dc376f3..a3084ab5df6c 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -15,8 +15,8 @@
*/
/* Changes
*
- * Mitsuru KANDA @USAGI and
- * YOSHIFUJI Hideaki @USAGI: Remove ipv6_parse_exthdrs().
+ * Mitsuru KANDA @USAGI and
+ * YOSHIFUJI Hideaki @USAGI: Remove ipv6_parse_exthdrs().
*/
#include <linux/errno.h>
@@ -65,7 +65,7 @@ int ip6_rcv_finish(struct sk_buff *skb)
int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
const struct ipv6hdr *hdr;
- u32 pkt_len;
+ u32 pkt_len;
struct inet6_dev *idev;
struct net *net = dev_net(skb->dev);
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 65eda2a8af48..9034f76ae013 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -53,31 +53,6 @@ static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
return proto;
}
-static int ipv6_gso_send_check(struct sk_buff *skb)
-{
- const struct ipv6hdr *ipv6h;
- const struct net_offload *ops;
- int err = -EINVAL;
-
- if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
- goto out;
-
- ipv6h = ipv6_hdr(skb);
- __skb_pull(skb, sizeof(*ipv6h));
- err = -EPROTONOSUPPORT;
-
- ops = rcu_dereference(inet6_offloads[
- ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]);
-
- if (likely(ops && ops->callbacks.gso_send_check)) {
- skb_reset_transport_header(skb);
- err = ops->callbacks.gso_send_check(skb);
- }
-
-out:
- return err;
-}
-
static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
@@ -244,7 +219,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
continue;
iph2 = (struct ipv6hdr *)(p->data + off);
- first_word = *(__be32 *)iph ^ *(__be32 *)iph2 ;
+ first_word = *(__be32 *)iph ^ *(__be32 *)iph2;
/* All fields must match except length and Traffic Class.
* XXX skbs on the gro_list have all been parsed and pulled
@@ -261,6 +236,9 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
/* flush if Traffic Class fields are different */
NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000));
NAPI_GRO_CB(p)->flush |= flush;
+
+ /* Clear flush_id, there's really no concept of ID in IPv6. */
+ NAPI_GRO_CB(p)->flush_id = 0;
}
NAPI_GRO_CB(skb)->flush |= flush;
@@ -303,7 +281,6 @@ out_unlock:
static struct packet_offload ipv6_packet_offload __read_mostly = {
.type = cpu_to_be16(ETH_P_IPV6),
.callbacks = {
- .gso_send_check = ipv6_gso_send_check,
.gso_segment = ipv6_gso_segment,
.gro_receive = ipv6_gro_receive,
.gro_complete = ipv6_gro_complete,
@@ -312,8 +289,9 @@ static struct packet_offload ipv6_packet_offload __read_mostly = {
static const struct net_offload sit_offload = {
.callbacks = {
- .gso_send_check = ipv6_gso_send_check,
.gso_segment = ipv6_gso_segment,
+ .gro_receive = ipv6_gro_receive,
+ .gro_complete = ipv6_gro_complete,
},
};
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 45702b8cd141..8e950c250ada 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -20,7 +20,7 @@
* etc.
*
* H. von Brand : Added missing #include <linux/string.h>
- * Imran Patel : frag id should be in NBO
+ * Imran Patel : frag id should be in NBO
* Kazunori MIYAZAWA @USAGI
* : add ip6_append_data and related functions
* for datagram xmit
@@ -205,7 +205,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
if (hlimit < 0)
hlimit = ip6_dst_hoplimit(dst);
- ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
+ ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
+ np->autoflowlabel));
hdr->payload_len = htons(seg_len);
hdr->nexthdr = proto;
@@ -232,7 +233,6 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
kfree_skb(skb);
return -EMSGSIZE;
}
-
EXPORT_SYMBOL(ip6_xmit);
static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
@@ -554,14 +554,14 @@ static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
{
struct sk_buff *frag;
- struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
+ struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
struct ipv6hdr *tmp_hdr;
struct frag_hdr *fh;
unsigned int mtu, hlen, left, len;
int hroom, troom;
__be32 frag_id = 0;
- int ptr, offset = 0, err=0;
+ int ptr, offset = 0, err = 0;
u8 *prevhdr, nexthdr = 0;
struct net *net = dev_net(skb_dst(skb)->dev);
@@ -636,7 +636,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
}
__skb_pull(skb, hlen);
- fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
+ fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
__skb_push(skb, hlen);
skb_reset_network_header(skb);
memcpy(skb_network_header(skb), tmp_hdr, hlen);
@@ -661,7 +661,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
if (frag) {
frag->ip_summed = CHECKSUM_NONE;
skb_reset_transport_header(frag);
- fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
+ fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
__skb_push(frag, hlen);
skb_reset_network_header(frag);
memcpy(skb_network_header(frag), tmp_hdr,
@@ -680,7 +680,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
}
err = output(skb);
- if(!err)
+ if (!err)
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGCREATES);
@@ -701,11 +701,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
return 0;
}
- while (frag) {
- skb = frag->next;
- kfree_skb(frag);
- frag = skb;
- }
+ kfree_skb_list(frag);
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGFAILS);
@@ -741,7 +737,7 @@ slow_path:
/*
* Keep copying data until we run out.
*/
- while(left > 0) {
+ while (left > 0) {
len = left;
/* IF: it doesn't fit, use 'mtu' - the data space left */
if (len > mtu)
@@ -802,8 +798,8 @@ slow_path:
/*
* Copy a block of the IP datagram.
*/
- if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
- BUG();
+ BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
+ len));
left -= len;
fh->frag_off = htons(offset);
@@ -864,7 +860,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
/* Yes, checking route validity in not connected
* case is not very simple. Take into account,
* that we do not support routing by source, TOS,
- * and MSG_DONTROUTE --ANK (980726)
+ * and MSG_DONTROUTE --ANK (980726)
*
* 1. ip6_rt_check(): If route was host route,
* check that cached destination is current.
@@ -1008,7 +1004,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
if (final_dst)
fl6->daddr = *final_dst;
- return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
+ return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
}
EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
@@ -1040,7 +1036,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
if (final_dst)
fl6->daddr = *final_dst;
- return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
+ return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
}
EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
@@ -1048,7 +1044,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset, int len,
int odd, struct sk_buff *skb),
void *from, int length, int hh_len, int fragheaderlen,
- int transhdrlen, int mtu,unsigned int flags,
+ int transhdrlen, int mtu, unsigned int flags,
struct rt6_info *rt)
{
@@ -1071,7 +1067,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,
skb_reserve(skb, hh_len);
/* create space for UDP/IP header */
- skb_put(skb,fragheaderlen + transhdrlen);
+ skb_put(skb, fragheaderlen + transhdrlen);
/* initialize network header pointer */
skb_reset_network_header(skb);
@@ -1156,6 +1152,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
int err;
int offset = 0;
__u8 tx_flags = 0;
+ u32 tskey = 0;
if (flags&MSG_PROBE)
return 0;
@@ -1271,9 +1268,12 @@ emsgsize:
}
}
- /* For UDP, check if TX timestamp is enabled */
- if (sk->sk_type == SOCK_DGRAM)
+ if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
sock_tx_timestamp(sk, &tx_flags);
+ if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
+ sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+ tskey = sk->sk_tskey++;
+ }
/*
* Let's try using as much space as possible.
@@ -1381,12 +1381,6 @@ alloc_new_skb:
sk->sk_allocation);
if (unlikely(skb == NULL))
err = -ENOBUFS;
- else {
- /* Only the initial fragment
- * is time stamped.
- */
- tx_flags = 0;
- }
}
if (skb == NULL)
goto error;
@@ -1400,8 +1394,11 @@ alloc_new_skb:
skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
dst_exthdrlen);
- if (sk->sk_type == SOCK_DGRAM)
- skb_shinfo(skb)->tx_flags = tx_flags;
+ /* Only the initial fragment is time stamped */
+ skb_shinfo(skb)->tx_flags = tx_flags;
+ tx_flags = 0;
+ skb_shinfo(skb)->tskey = tskey;
+ tskey = 0;
/*
* Find where to start putting bytes
@@ -1571,7 +1568,9 @@ int ip6_push_pending_frames(struct sock *sk)
skb_reset_network_header(skb);
hdr = ipv6_hdr(skb);
- ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
+ ip6_flow_hdr(hdr, np->cork.tclass,
+ ip6_make_flowlabel(net, skb, fl6->flowlabel,
+ np->autoflowlabel));
hdr->hop_limit = np->cork.hop_limit;
hdr->nexthdr = proto;
hdr->saddr = fl6->saddr;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index afa082458360..9409887fb664 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -315,7 +315,8 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
else
sprintf(name, "ip6tnl%%d");
- dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup);
+ dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
+ ip6_tnl_dev_setup);
if (dev == NULL)
goto failed;
@@ -363,8 +364,12 @@ static struct ip6_tnl *ip6_tnl_locate(struct net *net,
(t = rtnl_dereference(*tp)) != NULL;
tp = &t->next) {
if (ipv6_addr_equal(local, &t->parms.laddr) &&
- ipv6_addr_equal(remote, &t->parms.raddr))
+ ipv6_addr_equal(remote, &t->parms.raddr)) {
+ if (create)
+ return NULL;
+
return t;
+ }
}
if (!create)
return NULL;
@@ -407,12 +412,12 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
{
const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw;
__u8 nexthdr = ipv6h->nexthdr;
- __u16 off = sizeof (*ipv6h);
+ __u16 off = sizeof(*ipv6h);
while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
__u16 optlen = 0;
struct ipv6_opt_hdr *hdr;
- if (raw + off + sizeof (*hdr) > skb->data &&
+ if (raw + off + sizeof(*hdr) > skb->data &&
!pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr)))
break;
@@ -529,7 +534,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
mtu = IPV6_MIN_MTU;
t->dev->mtu = mtu;
- if ((len = sizeof (*ipv6h) + ntohs(ipv6h->payload_len)) > mtu) {
+ if ((len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len)) > mtu) {
rel_type = ICMPV6_PKT_TOOBIG;
rel_code = 0;
rel_info = mtu;
@@ -990,7 +995,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
t->parms.name);
goto tx_err_dst_release;
}
- mtu = dst_mtu(dst) - sizeof (*ipv6h);
+ mtu = dst_mtu(dst) - sizeof(*ipv6h);
if (encap_limit >= 0) {
max_headroom += 8;
mtu -= 8;
@@ -1046,7 +1051,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
ipv6h = ipv6_hdr(skb);
- ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel);
+ ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
+ ip6_make_flowlabel(net, skb, fl6->flowlabel, false));
ipv6h->hop_limit = t->parms.hop_limit;
ipv6h->nexthdr = proto;
ipv6h->saddr = fl6->saddr;
@@ -1081,7 +1087,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
encap_limit = t->parms.encap_limit;
- memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6));
+ memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_IPIP;
dsfield = ipv4_get_dsfield(iph);
@@ -1133,7 +1139,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
encap_limit = t->parms.encap_limit;
- memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6));
+ memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_IPV6;
dsfield = ipv6_get_dsfield(ipv6h);
@@ -1227,11 +1233,11 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
if (rt->dst.dev) {
dev->hard_header_len = rt->dst.dev->hard_header_len +
- sizeof (struct ipv6hdr);
+ sizeof(struct ipv6hdr);
- dev->mtu = rt->dst.dev->mtu - sizeof (struct ipv6hdr);
+ dev->mtu = rt->dst.dev->mtu - sizeof(struct ipv6hdr);
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- dev->mtu-=8;
+ dev->mtu -= 8;
if (dev->mtu < IPV6_MIN_MTU)
dev->mtu = IPV6_MIN_MTU;
@@ -1348,7 +1354,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
switch (cmd) {
case SIOCGETTUNNEL:
if (dev == ip6n->fb_tnl_dev) {
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) {
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
err = -EFAULT;
break;
}
@@ -1360,7 +1366,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
memset(&p, 0, sizeof(p));
}
ip6_tnl_parm_to_user(&p, &t->parms);
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) {
err = -EFAULT;
}
break;
@@ -1370,7 +1376,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
break;
err = -EINVAL;
if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
@@ -1405,7 +1411,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (dev == ip6n->fb_tnl_dev) {
err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
break;
err = -ENOENT;
ip6_tnl_parm_from_user(&p1, &p);
@@ -1480,14 +1486,14 @@ static void ip6_tnl_dev_setup(struct net_device *dev)
dev->destructor = ip6_dev_free;
dev->type = ARPHRD_TUNNEL6;
- dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
- dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr);
+ dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr);
+ dev->mtu = ETH_DATA_LEN - sizeof(struct ipv6hdr);
t = netdev_priv(dev);
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- dev->mtu-=8;
+ dev->mtu -= 8;
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
- dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+ netif_keep_dst(dev);
/* This perm addr will be used as interface identifier by IPv6 */
dev->addr_assign_type = NET_ADDR_RANDOM;
eth_random_addr(dev->perm_addr);
@@ -1772,7 +1778,7 @@ static int __net_init ip6_tnl_init_net(struct net *net)
err = -ENOMEM;
ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
- ip6_tnl_dev_setup);
+ NET_NAME_UNKNOWN, ip6_tnl_dev_setup);
if (!ip6n->fb_tnl_dev)
goto err_alloc_dev;
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
new file mode 100644
index 000000000000..b04ed72c4542
--- /dev/null
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -0,0 +1,107 @@
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/socket.h>
+#include <linux/udp.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/in6.h>
+#include <net/udp.h>
+#include <net/udp_tunnel.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/ip6_tunnel.h>
+#include <net/ip6_checksum.h>
+
+int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
+ struct socket **sockp)
+{
+ struct sockaddr_in6 udp6_addr;
+ int err;
+ struct socket *sock = NULL;
+
+ err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock);
+ if (err < 0)
+ goto error;
+
+ sk_change_net(sock->sk, net);
+
+ udp6_addr.sin6_family = AF_INET6;
+ memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
+ sizeof(udp6_addr.sin6_addr));
+ udp6_addr.sin6_port = cfg->local_udp_port;
+ err = kernel_bind(sock, (struct sockaddr *)&udp6_addr,
+ sizeof(udp6_addr));
+ if (err < 0)
+ goto error;
+
+ if (cfg->peer_udp_port) {
+ udp6_addr.sin6_family = AF_INET6;
+ memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6,
+ sizeof(udp6_addr.sin6_addr));
+ udp6_addr.sin6_port = cfg->peer_udp_port;
+ err = kernel_connect(sock,
+ (struct sockaddr *)&udp6_addr,
+ sizeof(udp6_addr), 0);
+ }
+ if (err < 0)
+ goto error;
+
+ udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums);
+ udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums);
+
+ *sockp = sock;
+ return 0;
+
+error:
+ if (sock) {
+ kernel_sock_shutdown(sock, SHUT_RDWR);
+ sk_release_kernel(sock->sk);
+ }
+ *sockp = NULL;
+ return err;
+}
+EXPORT_SYMBOL_GPL(udp_sock_create6);
+
+int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
+ struct sk_buff *skb, struct net_device *dev,
+ struct in6_addr *saddr, struct in6_addr *daddr,
+ __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port)
+{
+ struct udphdr *uh;
+ struct ipv6hdr *ip6h;
+ struct sock *sk = sock->sk;
+
+ __skb_push(skb, sizeof(*uh));
+ skb_reset_transport_header(skb);
+ uh = udp_hdr(skb);
+
+ uh->dest = dst_port;
+ uh->source = src_port;
+
+ uh->len = htons(skb->len);
+ uh->check = 0;
+
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+ IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
+ | IPSKB_REROUTED);
+ skb_dst_set(skb, dst);
+
+ udp6_set_csum(udp_get_no_check6_tx(sk), skb, &inet6_sk(sk)->saddr,
+ &sk->sk_v6_daddr, skb->len);
+
+ __skb_push(skb, sizeof(*ip6h));
+ skb_reset_network_header(skb);
+ ip6h = ipv6_hdr(skb);
+ ip6_flow_hdr(ip6h, prio, htonl(0));
+ ip6h->payload_len = htons(skb->len);
+ ip6h->nexthdr = IPPROTO_UDP;
+ ip6h->hop_limit = ttl;
+ ip6h->daddr = *daddr;
+ ip6h->saddr = *saddr;
+
+ ip6tunnel_xmit(skb, dev);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 9aaa6bb229e4..d440bb585524 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -204,7 +204,7 @@ static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p
else
sprintf(name, "ip6_vti%%d");
- dev = alloc_netdev(sizeof(*t), name, vti6_dev_setup);
+ dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, vti6_dev_setup);
if (dev == NULL)
goto failed;
@@ -253,8 +253,12 @@ static struct ip6_tnl *vti6_locate(struct net *net, struct __ip6_tnl_parm *p,
(t = rtnl_dereference(*tp)) != NULL;
tp = &t->next) {
if (ipv6_addr_equal(local, &t->parms.laddr) &&
- ipv6_addr_equal(remote, &t->parms.raddr))
+ ipv6_addr_equal(remote, &t->parms.raddr)) {
+ if (create)
+ return NULL;
+
return t;
+ }
}
if (!create)
return NULL;
@@ -803,7 +807,7 @@ static void vti6_dev_setup(struct net_device *dev)
dev->mtu = ETH_DATA_LEN;
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
- dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+ netif_keep_dst(dev);
}
/**
@@ -1020,7 +1024,7 @@ static int __net_init vti6_init_net(struct net *net)
err = -ENOMEM;
ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6_vti0",
- vti6_dev_setup);
+ NET_NAME_UNKNOWN, vti6_dev_setup);
if (!ip6n->fb_tnl_dev)
goto err_alloc_dev;
@@ -1089,36 +1093,26 @@ static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = {
**/
static int __init vti6_tunnel_init(void)
{
- int err;
+ const char *msg;
+ int err;
+ msg = "tunnel device";
err = register_pernet_device(&vti6_net_ops);
if (err < 0)
- goto out_pernet;
+ goto pernet_dev_failed;
+ msg = "tunnel protocols";
err = xfrm6_protocol_register(&vti_esp6_protocol, IPPROTO_ESP);
- if (err < 0) {
- pr_err("%s: can't register vti6 protocol\n", __func__);
-
- goto out;
- }
-
+ if (err < 0)
+ goto xfrm_proto_esp_failed;
err = xfrm6_protocol_register(&vti_ah6_protocol, IPPROTO_AH);
- if (err < 0) {
- xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
- pr_err("%s: can't register vti6 protocol\n", __func__);
-
- goto out;
- }
-
+ if (err < 0)
+ goto xfrm_proto_ah_failed;
err = xfrm6_protocol_register(&vti_ipcomp6_protocol, IPPROTO_COMP);
- if (err < 0) {
- xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
- xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
- pr_err("%s: can't register vti6 protocol\n", __func__);
-
- goto out;
- }
+ if (err < 0)
+ goto xfrm_proto_comp_failed;
+ msg = "netlink interface";
err = rtnl_link_register(&vti6_link_ops);
if (err < 0)
goto rtnl_link_failed;
@@ -1127,11 +1121,14 @@ static int __init vti6_tunnel_init(void)
rtnl_link_failed:
xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP);
+xfrm_proto_comp_failed:
xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
+xfrm_proto_ah_failed:
xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
-out:
+xfrm_proto_esp_failed:
unregister_pernet_device(&vti6_net_ops);
-out_pernet:
+pernet_dev_failed:
+ pr_err("vti6 init: failed to register %s\n", msg);
return err;
}
@@ -1141,13 +1138,9 @@ out_pernet:
static void __exit vti6_tunnel_cleanup(void)
{
rtnl_link_unregister(&vti6_link_ops);
- if (xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP))
- pr_info("%s: can't deregister protocol\n", __func__);
- if (xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH))
- pr_info("%s: can't deregister protocol\n", __func__);
- if (xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP))
- pr_info("%s: can't deregister protocol\n", __func__);
-
+ xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP);
+ xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
+ xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
unregister_pernet_device(&vti6_net_ops);
}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 8250474ab7dc..0171f08325c3 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -744,7 +744,7 @@ static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
else
sprintf(name, "pim6reg%u", mrt->id);
- dev = alloc_netdev(0, name, reg_vif_setup);
+ dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
if (dev == NULL)
return NULL;
@@ -845,7 +845,7 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
atomic_dec(&mrt->cache_resolve_queue_len);
- while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
+ while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
if (ipv6_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
nlh->nlmsg_type = NLMSG_ERROR;
@@ -1103,7 +1103,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
* Play the pending entries through our router
*/
- while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+ while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
if (ipv6_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index d1c793cffcb5..1b9316e1386a 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -181,8 +181,7 @@ static int ipcomp6_rcv_cb(struct sk_buff *skb, int err)
return 0;
}
-static const struct xfrm_type ipcomp6_type =
-{
+static const struct xfrm_type ipcomp6_type = {
.description = "IPCOMP6",
.owner = THIS_MODULE,
.proto = IPPROTO_COMP,
@@ -193,8 +192,7 @@ static const struct xfrm_type ipcomp6_type =
.hdr_offset = xfrm6_find_1stfragopt,
};
-static struct xfrm6_protocol ipcomp6_protocol =
-{
+static struct xfrm6_protocol ipcomp6_protocol = {
.handler = xfrm6_rcv,
.cb_handler = ipcomp6_rcv_cb,
.err_handler = ipcomp6_err,
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index edb58aff4ae7..e1a9583bb419 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -66,12 +66,12 @@ int ip6_ra_control(struct sock *sk, int sel)
if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_RAW)
return -ENOPROTOOPT;
- new_ra = (sel>=0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
+ new_ra = (sel >= 0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
write_lock_bh(&ip6_ra_lock);
- for (rap = &ip6_ra_chain; (ra=*rap) != NULL; rap = &ra->next) {
+ for (rap = &ip6_ra_chain; (ra = *rap) != NULL; rap = &ra->next) {
if (ra->sk == sk) {
- if (sel>=0) {
+ if (sel >= 0) {
write_unlock_bh(&ip6_ra_lock);
kfree(new_ra);
return -EADDRINUSE;
@@ -130,7 +130,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
int retv = -ENOPROTOOPT;
if (optval == NULL)
- val=0;
+ val = 0;
else {
if (optlen >= sizeof(int)) {
if (get_user(val, (int __user *) optval))
@@ -139,7 +139,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
val = 0;
}
- valbool = (val!=0);
+ valbool = (val != 0);
if (ip6_mroute_opt(optname))
return ip6_mroute_setsockopt(sk, optname, optval, optlen);
@@ -235,7 +235,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (optlen < sizeof(int) ||
inet_sk(sk)->inet_num)
goto e_inval;
- np->ipv6only = valbool;
+ sk->sk_ipv6only = valbool;
retv = 0;
break;
@@ -474,7 +474,7 @@ sticky_done:
goto done;
msg.msg_controllen = optlen;
- msg.msg_control = (void*)(opt+1);
+ msg.msg_control = (void *)(opt+1);
retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk,
&junk, &junk);
@@ -687,7 +687,7 @@ done:
retv = -ENOBUFS;
break;
}
- gsf = kmalloc(optlen,GFP_KERNEL);
+ gsf = kmalloc(optlen, GFP_KERNEL);
if (!gsf) {
retv = -ENOBUFS;
break;
@@ -834,6 +834,10 @@ pref_skip_coa:
np->dontfrag = valbool;
retv = 0;
break;
+ case IPV6_AUTOFLOWLABEL:
+ np->autoflowlabel = valbool;
+ retv = 0;
+ break;
}
release_sock(sk);
@@ -869,7 +873,6 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
#endif
return err;
}
-
EXPORT_SYMBOL(ipv6_setsockopt);
#ifdef CONFIG_COMPAT
@@ -905,7 +908,6 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int optname,
#endif
return err;
}
-
EXPORT_SYMBOL(compat_ipv6_setsockopt);
#endif
@@ -917,7 +919,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
if (!opt)
return 0;
- switch(optname) {
+ switch (optname) {
case IPV6_HOPOPTS:
hdr = opt->hopopt;
break;
@@ -1058,7 +1060,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
}
case IPV6_V6ONLY:
- val = np->ipv6only;
+ val = sk->sk_ipv6only;
break;
case IPV6_RECVPKTINFO:
@@ -1158,7 +1160,6 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
return -EFAULT;
return 0;
- break;
}
case IPV6_TRANSPARENT:
@@ -1273,13 +1274,17 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
val = np->dontfrag;
break;
+ case IPV6_AUTOFLOWLABEL:
+ val = np->autoflowlabel;
+ break;
+
default:
return -ENOPROTOOPT;
}
len = min_t(unsigned int, sizeof(int), len);
- if(put_user(len, optlen))
+ if (put_user(len, optlen))
return -EFAULT;
- if(copy_to_user(optval,&val,len))
+ if (copy_to_user(optval, &val, len))
return -EFAULT;
return 0;
}
@@ -1292,7 +1297,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
if (level == SOL_IP && sk->sk_type != SOCK_RAW)
return udp_prot.getsockopt(sk, level, optname, optval, optlen);
- if(level != SOL_IPV6)
+ if (level != SOL_IPV6)
return -ENOPROTOOPT;
err = do_ipv6_getsockopt(sk, level, optname, optval, optlen, 0);
@@ -1314,7 +1319,6 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
#endif
return err;
}
-
EXPORT_SYMBOL(ipv6_getsockopt);
#ifdef CONFIG_COMPAT
@@ -1357,7 +1361,6 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
#endif
return err;
}
-
EXPORT_SYMBOL(compat_ipv6_getsockopt);
#endif
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 617f0958e164..9648de2b6745 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -64,15 +64,6 @@
#include <net/ip6_checksum.h>
-/* Set to 3 to get tracing... */
-#define MCAST_DEBUG 2
-
-#if MCAST_DEBUG >= 3
-#define MDBG(x) printk x
-#else
-#define MDBG(x)
-#endif
-
/* Ensure that we have struct in6_addr aligned on 32bit word. */
static void *__mld2_query_bugs[] __attribute__((__unused__)) = {
BUILD_BUG_ON_NULL(offsetof(struct mld2_query, mld2q_srcs) % 4),
@@ -82,9 +73,6 @@ static void *__mld2_query_bugs[] __attribute__((__unused__)) = {
static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT;
-/* Big mc list lock for all the sockets */
-static DEFINE_SPINLOCK(ipv6_sk_mc_lock);
-
static void igmp6_join_group(struct ifmcaddr6 *ma);
static void igmp6_leave_group(struct ifmcaddr6 *ma);
static void igmp6_timer_handler(unsigned long data);
@@ -121,6 +109,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
#define IPV6_MLD_MAX_MSF 64
int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF;
+int sysctl_mld_qrv __read_mostly = MLD_QRV_DEFAULT;
/*
* socket join on multicast group
@@ -172,7 +161,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
mc_lst->next = NULL;
mc_lst->addr = *addr;
- rcu_read_lock();
+ rtnl_lock();
if (ifindex == 0) {
struct rt6_info *rt;
rt = rt6_lookup(net, addr, NULL, 0, 0);
@@ -181,10 +170,10 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
ip6_rt_put(rt);
}
} else
- dev = dev_get_by_index_rcu(net, ifindex);
+ dev = __dev_get_by_index(net, ifindex);
if (dev == NULL) {
- rcu_read_unlock();
+ rtnl_unlock();
sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
return -ENODEV;
}
@@ -201,17 +190,15 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
err = ipv6_dev_mc_inc(dev, addr);
if (err) {
- rcu_read_unlock();
+ rtnl_unlock();
sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
return err;
}
- spin_lock(&ipv6_sk_mc_lock);
mc_lst->next = np->ipv6_mc_list;
rcu_assign_pointer(np->ipv6_mc_list, mc_lst);
- spin_unlock(&ipv6_sk_mc_lock);
- rcu_read_unlock();
+ rtnl_unlock();
return 0;
}
@@ -229,20 +216,17 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
if (!ipv6_addr_is_multicast(addr))
return -EINVAL;
- spin_lock(&ipv6_sk_mc_lock);
+ rtnl_lock();
for (lnk = &np->ipv6_mc_list;
- (mc_lst = rcu_dereference_protected(*lnk,
- lockdep_is_held(&ipv6_sk_mc_lock))) !=NULL ;
+ (mc_lst = rtnl_dereference(*lnk)) != NULL;
lnk = &mc_lst->next) {
if ((ifindex == 0 || mc_lst->ifindex == ifindex) &&
ipv6_addr_equal(&mc_lst->addr, addr)) {
struct net_device *dev;
*lnk = mc_lst->next;
- spin_unlock(&ipv6_sk_mc_lock);
- rcu_read_lock();
- dev = dev_get_by_index_rcu(net, mc_lst->ifindex);
+ dev = __dev_get_by_index(net, mc_lst->ifindex);
if (dev != NULL) {
struct inet6_dev *idev = __in6_dev_get(dev);
@@ -251,13 +235,14 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
__ipv6_dev_mc_dec(idev, &mc_lst->addr);
} else
(void) ip6_mc_leave_src(sk, mc_lst, NULL);
- rcu_read_unlock();
+ rtnl_unlock();
+
atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
kfree_rcu(mc_lst, rcu);
return 0;
}
}
- spin_unlock(&ipv6_sk_mc_lock);
+ rtnl_unlock();
return -EADDRNOTAVAIL;
}
@@ -302,16 +287,13 @@ void ipv6_sock_mc_close(struct sock *sk)
if (!rcu_access_pointer(np->ipv6_mc_list))
return;
- spin_lock(&ipv6_sk_mc_lock);
- while ((mc_lst = rcu_dereference_protected(np->ipv6_mc_list,
- lockdep_is_held(&ipv6_sk_mc_lock))) != NULL) {
+ rtnl_lock();
+ while ((mc_lst = rtnl_dereference(np->ipv6_mc_list)) != NULL) {
struct net_device *dev;
np->ipv6_mc_list = mc_lst->next;
- spin_unlock(&ipv6_sk_mc_lock);
- rcu_read_lock();
- dev = dev_get_by_index_rcu(net, mc_lst->ifindex);
+ dev = __dev_get_by_index(net, mc_lst->ifindex);
if (dev) {
struct inet6_dev *idev = __in6_dev_get(dev);
@@ -320,14 +302,12 @@ void ipv6_sock_mc_close(struct sock *sk)
__ipv6_dev_mc_dec(idev, &mc_lst->addr);
} else
(void) ip6_mc_leave_src(sk, mc_lst, NULL);
- rcu_read_unlock();
atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
kfree_rcu(mc_lst, rcu);
- spin_lock(&ipv6_sk_mc_lock);
}
- spin_unlock(&ipv6_sk_mc_lock);
+ rtnl_unlock();
}
int ip6_mc_source(int add, int omode, struct sock *sk,
@@ -390,7 +370,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
if (!psl)
goto done; /* err = -EADDRNOTAVAIL */
rv = !0;
- for (i=0; i<psl->sl_count; i++) {
+ for (i = 0; i < psl->sl_count; i++) {
rv = !ipv6_addr_equal(&psl->sl_addr[i], source);
if (rv == 0)
break;
@@ -407,7 +387,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
/* update the interface filter */
ip6_mc_del_src(idev, group, omode, 1, source, 1);
- for (j=i+1; j<psl->sl_count; j++)
+ for (j = i+1; j < psl->sl_count; j++)
psl->sl_addr[j-1] = psl->sl_addr[j];
psl->sl_count--;
err = 0;
@@ -433,19 +413,19 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
newpsl->sl_max = count;
newpsl->sl_count = count - IP6_SFBLOCK;
if (psl) {
- for (i=0; i<psl->sl_count; i++)
+ for (i = 0; i < psl->sl_count; i++)
newpsl->sl_addr[i] = psl->sl_addr[i];
sock_kfree_s(sk, psl, IP6_SFLSIZE(psl->sl_max));
}
pmc->sflist = psl = newpsl;
}
rv = 1; /* > 0 for insert logic below if sl_count is 0 */
- for (i=0; i<psl->sl_count; i++) {
+ for (i = 0; i < psl->sl_count; i++) {
rv = !ipv6_addr_equal(&psl->sl_addr[i], source);
if (rv == 0) /* There is an error in the address. */
goto done;
}
- for (j=psl->sl_count-1; j>=i; j--)
+ for (j = psl->sl_count-1; j >= i; j--)
psl->sl_addr[j+1] = psl->sl_addr[j];
psl->sl_addr[i] = *source;
psl->sl_count++;
@@ -514,7 +494,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
goto done;
}
newpsl->sl_max = newpsl->sl_count = gsf->gf_numsrc;
- for (i=0; i<newpsl->sl_count; ++i) {
+ for (i = 0; i < newpsl->sl_count; ++i) {
struct sockaddr_in6 *psin6;
psin6 = (struct sockaddr_in6 *)&gsf->gf_slist[i];
@@ -576,9 +556,8 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
}
err = -EADDRNOTAVAIL;
- /*
- * changes to the ipv6_mc_list require the socket lock and
- * a read lock on ip6_sk_mc_lock. We have the socket lock,
+ /* changes to the ipv6_mc_list require the socket lock and
+ * rtnl lock. We have the socket lock and rcu read lock,
* so reading the list is safe.
*/
@@ -602,11 +581,10 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) {
return -EFAULT;
}
- /* changes to psl require the socket lock, a read lock on
- * on ipv6_sk_mc_lock and a write lock on pmc->sflock. We
- * have the socket lock, so reading here is safe.
+ /* changes to psl require the socket lock, and a write lock
+ * on pmc->sflock. We have the socket lock so reading here is safe.
*/
- for (i=0; i<copycount; i++) {
+ for (i = 0; i < copycount; i++) {
struct sockaddr_in6 *psin6;
struct sockaddr_storage ss;
@@ -648,7 +626,7 @@ bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
} else {
int i;
- for (i=0; i<psl->sl_count; i++) {
+ for (i = 0; i < psl->sl_count; i++) {
if (ipv6_addr_equal(&psl->sl_addr[i], src_addr))
break;
}
@@ -663,14 +641,6 @@ bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
return rv;
}
-static void ma_put(struct ifmcaddr6 *mc)
-{
- if (atomic_dec_and_test(&mc->mca_refcnt)) {
- in6_dev_put(mc->idev);
- kfree(mc);
- }
-}
-
static void igmp6_group_added(struct ifmcaddr6 *mc)
{
struct net_device *dev = mc->idev->dev;
@@ -762,7 +732,7 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
pmc->mca_tomb = im->mca_tomb;
pmc->mca_sources = im->mca_sources;
im->mca_tomb = im->mca_sources = NULL;
- for (psf=pmc->mca_sources; psf; psf=psf->sf_next)
+ for (psf = pmc->mca_sources; psf; psf = psf->sf_next)
psf->sf_crcount = pmc->mca_crcount;
}
spin_unlock_bh(&im->mca_lock);
@@ -780,7 +750,7 @@ static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *pmca)
spin_lock_bh(&idev->mc_lock);
pmc_prev = NULL;
- for (pmc=idev->mc_tomb; pmc; pmc=pmc->next) {
+ for (pmc = idev->mc_tomb; pmc; pmc = pmc->next) {
if (ipv6_addr_equal(&pmc->mca_addr, pmca))
break;
pmc_prev = pmc;
@@ -794,7 +764,7 @@ static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *pmca)
spin_unlock_bh(&idev->mc_lock);
if (pmc) {
- for (psf=pmc->mca_tomb; psf; psf=psf_next) {
+ for (psf = pmc->mca_tomb; psf; psf = psf_next) {
psf_next = psf->sf_next;
kfree(psf);
}
@@ -821,14 +791,14 @@ static void mld_clear_delrec(struct inet6_dev *idev)
/* clear dead sources, too */
read_lock_bh(&idev->lock);
- for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
+ for (pmc = idev->mc_list; pmc; pmc = pmc->next) {
struct ip6_sf_list *psf, *psf_next;
spin_lock_bh(&pmc->mca_lock);
psf = pmc->mca_tomb;
pmc->mca_tomb = NULL;
spin_unlock_bh(&pmc->mca_lock);
- for (; psf; psf=psf_next) {
+ for (; psf; psf = psf_next) {
psf_next = psf->sf_next;
kfree(psf);
}
@@ -836,6 +806,48 @@ static void mld_clear_delrec(struct inet6_dev *idev)
read_unlock_bh(&idev->lock);
}
+static void mca_get(struct ifmcaddr6 *mc)
+{
+ atomic_inc(&mc->mca_refcnt);
+}
+
+static void ma_put(struct ifmcaddr6 *mc)
+{
+ if (atomic_dec_and_test(&mc->mca_refcnt)) {
+ in6_dev_put(mc->idev);
+ kfree(mc);
+ }
+}
+
+static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
+ const struct in6_addr *addr)
+{
+ struct ifmcaddr6 *mc;
+
+ mc = kzalloc(sizeof(*mc), GFP_ATOMIC);
+ if (mc == NULL)
+ return NULL;
+
+ setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc);
+
+ mc->mca_addr = *addr;
+ mc->idev = idev; /* reference taken by caller */
+ mc->mca_users = 1;
+ /* mca_stamp should be updated upon changes */
+ mc->mca_cstamp = mc->mca_tstamp = jiffies;
+ atomic_set(&mc->mca_refcnt, 1);
+ spin_lock_init(&mc->mca_lock);
+
+ /* initial mode is (EX, empty) */
+ mc->mca_sfmode = MCAST_EXCLUDE;
+ mc->mca_sfcount[MCAST_EXCLUDE] = 1;
+
+ if (ipv6_addr_is_ll_all_nodes(&mc->mca_addr) ||
+ IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL)
+ mc->mca_flags |= MAF_NOREPORT;
+
+ return mc;
+}
/*
* device multicast group inc (add if not found)
@@ -845,6 +857,8 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
struct ifmcaddr6 *mc;
struct inet6_dev *idev;
+ ASSERT_RTNL();
+
/* we need to take a reference on idev */
idev = in6_dev_get(dev);
@@ -869,38 +883,20 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
}
}
- /*
- * not found: create a new one.
- */
-
- mc = kzalloc(sizeof(struct ifmcaddr6), GFP_ATOMIC);
-
- if (mc == NULL) {
+ mc = mca_alloc(idev, addr);
+ if (!mc) {
write_unlock_bh(&idev->lock);
in6_dev_put(idev);
return -ENOMEM;
}
- setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc);
-
- mc->mca_addr = *addr;
- mc->idev = idev; /* (reference taken) */
- mc->mca_users = 1;
- /* mca_stamp should be updated upon changes */
- mc->mca_cstamp = mc->mca_tstamp = jiffies;
- atomic_set(&mc->mca_refcnt, 2);
- spin_lock_init(&mc->mca_lock);
-
- /* initial mode is (EX, empty) */
- mc->mca_sfmode = MCAST_EXCLUDE;
- mc->mca_sfcount[MCAST_EXCLUDE] = 1;
-
- if (ipv6_addr_is_ll_all_nodes(&mc->mca_addr) ||
- IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL)
- mc->mca_flags |= MAF_NOREPORT;
-
mc->next = idev->mc_list;
idev->mc_list = mc;
+
+ /* Hold this for the code below before we unlock,
+ * it is already exposed via idev->mc_list.
+ */
+ mca_get(mc);
write_unlock_bh(&idev->lock);
mld_del_delrec(idev, &mc->mca_addr);
@@ -916,8 +912,10 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr)
{
struct ifmcaddr6 *ma, **map;
+ ASSERT_RTNL();
+
write_lock_bh(&idev->lock);
- for (map = &idev->mc_list; (ma=*map) != NULL; map = &ma->next) {
+ for (map = &idev->mc_list; (ma = *map) != NULL; map = &ma->next) {
if (ipv6_addr_equal(&ma->mca_addr, addr)) {
if (--ma->mca_users == 0) {
*map = ma->next;
@@ -942,7 +940,7 @@ int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr)
struct inet6_dev *idev;
int err;
- rcu_read_lock();
+ ASSERT_RTNL();
idev = __in6_dev_get(dev);
if (!idev)
@@ -950,7 +948,6 @@ int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr)
else
err = __ipv6_dev_mc_dec(idev, addr);
- rcu_read_unlock();
return err;
}
@@ -968,7 +965,7 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
idev = __in6_dev_get(dev);
if (idev) {
read_lock_bh(&idev->lock);
- for (mc = idev->mc_list; mc; mc=mc->next) {
+ for (mc = idev->mc_list; mc; mc = mc->next) {
if (ipv6_addr_equal(&mc->mca_addr, group))
break;
}
@@ -977,7 +974,7 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
struct ip6_sf_list *psf;
spin_lock_bh(&mc->mca_lock);
- for (psf=mc->mca_sources;psf;psf=psf->sf_next) {
+ for (psf = mc->mca_sources; psf; psf = psf->sf_next) {
if (ipv6_addr_equal(&psf->sf_addr, src_addr))
break;
}
@@ -986,7 +983,7 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
psf->sf_count[MCAST_EXCLUDE] !=
mc->mca_sfcount[MCAST_EXCLUDE];
else
- rv = mc->mca_sfcount[MCAST_EXCLUDE] !=0;
+ rv = mc->mca_sfcount[MCAST_EXCLUDE] != 0;
spin_unlock_bh(&mc->mca_lock);
} else
rv = true; /* don't filter unspecified source */
@@ -1077,10 +1074,10 @@ static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
int i, scount;
scount = 0;
- for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
+ for (psf = pmc->mca_sources; psf; psf = psf->sf_next) {
if (scount == nsrcs)
break;
- for (i=0; i<nsrcs; i++) {
+ for (i = 0; i < nsrcs; i++) {
/* skip inactive filters */
if (psf->sf_count[MCAST_INCLUDE] ||
pmc->mca_sfcount[MCAST_EXCLUDE] !=
@@ -1110,10 +1107,10 @@ static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
/* mark INCLUDE-mode sources */
scount = 0;
- for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
+ for (psf = pmc->mca_sources; psf; psf = psf->sf_next) {
if (scount == nsrcs)
break;
- for (i=0; i<nsrcs; i++) {
+ for (i = 0; i < nsrcs; i++) {
if (ipv6_addr_equal(&srcs[i], &psf->sf_addr)) {
psf->sf_gsresp = 1;
scount++;
@@ -1191,15 +1188,16 @@ static void mld_update_qrv(struct inet6_dev *idev,
* and SHOULD NOT be one. Catch this here if we ever run
* into such a case in future.
*/
+ const int min_qrv = min(MLD_QRV_DEFAULT, sysctl_mld_qrv);
WARN_ON(idev->mc_qrv == 0);
if (mlh2->mld2q_qrv > 0)
idev->mc_qrv = mlh2->mld2q_qrv;
- if (unlikely(idev->mc_qrv < 2)) {
+ if (unlikely(idev->mc_qrv < min_qrv)) {
net_warn_ratelimited("IPv6: MLD: clamping QRV from %u to %u!\n",
- idev->mc_qrv, MLD_QRV_DEFAULT);
- idev->mc_qrv = MLD_QRV_DEFAULT;
+ idev->mc_qrv, min_qrv);
+ idev->mc_qrv = min_qrv;
}
}
@@ -1239,7 +1237,7 @@ static void mld_update_qri(struct inet6_dev *idev,
}
static int mld_process_v1(struct inet6_dev *idev, struct mld_msg *mld,
- unsigned long *max_delay)
+ unsigned long *max_delay, bool v1_query)
{
unsigned long mldv1_md;
@@ -1247,11 +1245,32 @@ static int mld_process_v1(struct inet6_dev *idev, struct mld_msg *mld,
if (mld_in_v2_mode_only(idev))
return -EINVAL;
- /* MLDv1 router present */
mldv1_md = ntohs(mld->mld_maxdelay);
+
+ /* When in MLDv1 fallback and a MLDv2 router start-up being
+ * unaware of current MLDv1 operation, the MRC == MRD mapping
+ * only works when the exponential algorithm is not being
+ * used (as MLDv1 is unaware of such things).
+ *
+ * According to the RFC author, the MLDv2 implementations
+ * he's aware of all use a MRC < 32768 on start up queries.
+ *
+ * Thus, should we *ever* encounter something else larger
+ * than that, just assume the maximum possible within our
+ * reach.
+ */
+ if (!v1_query)
+ mldv1_md = min(mldv1_md, MLDV1_MRD_MAX_COMPAT);
+
*max_delay = max(msecs_to_jiffies(mldv1_md), 1UL);
- mld_set_v1_mode(idev);
+ /* MLDv1 router present: we need to go into v1 mode *only*
+ * when an MLDv1 query is received as per section 9.12. of
+ * RFC3810! And we know from RFC2710 section 3.7 that MLDv1
+ * queries MUST be of exactly 24 octets.
+ */
+ if (v1_query)
+ mld_set_v1_mode(idev);
/* cancel MLDv2 report timer */
mld_gq_stop_timer(idev);
@@ -1266,10 +1285,6 @@ static int mld_process_v1(struct inet6_dev *idev, struct mld_msg *mld,
static int mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld,
unsigned long *max_delay)
{
- /* hosts need to stay in MLDv1 mode, discard MLDv2 queries */
- if (mld_in_v1_mode(idev))
- return -EINVAL;
-
*max_delay = max(msecs_to_jiffies(mldv2_mrc(mld)), 1UL);
mld_update_qrv(idev, mld);
@@ -1326,8 +1341,11 @@ int igmp6_event_query(struct sk_buff *skb)
!(group_type&IPV6_ADDR_MULTICAST))
return -EINVAL;
- if (len == MLD_V1_QUERY_LEN) {
- err = mld_process_v1(idev, mld, &max_delay);
+ if (len < MLD_V1_QUERY_LEN) {
+ return -EINVAL;
+ } else if (len == MLD_V1_QUERY_LEN || mld_in_v1_mode(idev)) {
+ err = mld_process_v1(idev, mld, &max_delay,
+ len == MLD_V1_QUERY_LEN);
if (err < 0)
return err;
} else if (len >= MLD_V2_QUERY_LEN_MIN) {
@@ -1359,18 +1377,19 @@ int igmp6_event_query(struct sk_buff *skb)
mlh2 = (struct mld2_query *)skb_transport_header(skb);
mark = 1;
}
- } else
+ } else {
return -EINVAL;
+ }
read_lock_bh(&idev->lock);
if (group_type == IPV6_ADDR_ANY) {
- for (ma = idev->mc_list; ma; ma=ma->next) {
+ for (ma = idev->mc_list; ma; ma = ma->next) {
spin_lock_bh(&ma->mca_lock);
igmp6_group_queried(ma, max_delay);
spin_unlock_bh(&ma->mca_lock);
}
} else {
- for (ma = idev->mc_list; ma; ma=ma->next) {
+ for (ma = idev->mc_list; ma; ma = ma->next) {
if (!ipv6_addr_equal(group, &ma->mca_addr))
continue;
spin_lock_bh(&ma->mca_lock);
@@ -1434,7 +1453,7 @@ int igmp6_event_report(struct sk_buff *skb)
*/
read_lock_bh(&idev->lock);
- for (ma = idev->mc_list; ma; ma=ma->next) {
+ for (ma = idev->mc_list; ma; ma = ma->next) {
if (ipv6_addr_equal(&ma->mca_addr, &mld->mld_mca)) {
spin_lock(&ma->mca_lock);
if (del_timer(&ma->mca_timer))
@@ -1498,7 +1517,7 @@ mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted)
struct ip6_sf_list *psf;
int scount = 0;
- for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
+ for (psf = pmc->mca_sources; psf; psf = psf->sf_next) {
if (!is_in(pmc, psf, type, gdeleted, sdeleted))
continue;
scount++;
@@ -1712,7 +1731,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
}
first = 1;
psf_prev = NULL;
- for (psf=*psf_list; psf; psf=psf_next) {
+ for (psf = *psf_list; psf; psf = psf_next) {
struct in6_addr *psrc;
psf_next = psf->sf_next;
@@ -1791,7 +1810,7 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
read_lock_bh(&idev->lock);
if (!pmc) {
- for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
+ for (pmc = idev->mc_list; pmc; pmc = pmc->next) {
if (pmc->mca_flags & MAF_NOREPORT)
continue;
spin_lock_bh(&pmc->mca_lock);
@@ -1824,7 +1843,7 @@ static void mld_clear_zeros(struct ip6_sf_list **ppsf)
struct ip6_sf_list *psf_prev, *psf_next, *psf;
psf_prev = NULL;
- for (psf=*ppsf; psf; psf = psf_next) {
+ for (psf = *ppsf; psf; psf = psf_next) {
psf_next = psf->sf_next;
if (psf->sf_crcount == 0) {
if (psf_prev)
@@ -1848,7 +1867,7 @@ static void mld_send_cr(struct inet6_dev *idev)
/* deleted MCA's */
pmc_prev = NULL;
- for (pmc=idev->mc_tomb; pmc; pmc=pmc_next) {
+ for (pmc = idev->mc_tomb; pmc; pmc = pmc_next) {
pmc_next = pmc->next;
if (pmc->mca_sfmode == MCAST_INCLUDE) {
type = MLD2_BLOCK_OLD_SOURCES;
@@ -1881,7 +1900,7 @@ static void mld_send_cr(struct inet6_dev *idev)
spin_unlock(&idev->mc_lock);
/* change recs */
- for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
+ for (pmc = idev->mc_list; pmc; pmc = pmc->next) {
spin_lock_bh(&pmc->mca_lock);
if (pmc->mca_sfcount[MCAST_EXCLUDE]) {
type = MLD2_BLOCK_OLD_SOURCES;
@@ -2018,7 +2037,7 @@ static void mld_send_initial_cr(struct inet6_dev *idev)
skb = NULL;
read_lock_bh(&idev->lock);
- for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
+ for (pmc = idev->mc_list; pmc; pmc = pmc->next) {
spin_lock_bh(&pmc->mca_lock);
if (pmc->mca_sfcount[MCAST_EXCLUDE])
type = MLD2_CHANGE_TO_EXCLUDE;
@@ -2063,7 +2082,7 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
int rv = 0;
psf_prev = NULL;
- for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
+ for (psf = pmc->mca_sources; psf; psf = psf->sf_next) {
if (ipv6_addr_equal(&psf->sf_addr, psfsrc))
break;
psf_prev = psf;
@@ -2104,7 +2123,7 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca,
if (!idev)
return -ENODEV;
read_lock_bh(&idev->lock);
- for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
+ for (pmc = idev->mc_list; pmc; pmc = pmc->next) {
if (ipv6_addr_equal(pmca, &pmc->mca_addr))
break;
}
@@ -2124,7 +2143,7 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca,
pmc->mca_sfcount[sfmode]--;
}
err = 0;
- for (i=0; i<sfcount; i++) {
+ for (i = 0; i < sfcount; i++) {
int rv = ip6_mc_del1_src(pmc, sfmode, &psfsrc[i]);
changerec |= rv > 0;
@@ -2140,7 +2159,7 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca,
pmc->mca_sfmode = MCAST_INCLUDE;
pmc->mca_crcount = idev->mc_qrv;
idev->mc_ifc_count = pmc->mca_crcount;
- for (psf=pmc->mca_sources; psf; psf = psf->sf_next)
+ for (psf = pmc->mca_sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
mld_ifc_event(pmc->idev);
} else if (sf_setstate(pmc) || changerec)
@@ -2159,7 +2178,7 @@ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode,
struct ip6_sf_list *psf, *psf_prev;
psf_prev = NULL;
- for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
+ for (psf = pmc->mca_sources; psf; psf = psf->sf_next) {
if (ipv6_addr_equal(&psf->sf_addr, psfsrc))
break;
psf_prev = psf;
@@ -2184,7 +2203,7 @@ static void sf_markstate(struct ifmcaddr6 *pmc)
struct ip6_sf_list *psf;
int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE];
- for (psf=pmc->mca_sources; psf; psf=psf->sf_next)
+ for (psf = pmc->mca_sources; psf; psf = psf->sf_next)
if (pmc->mca_sfcount[MCAST_EXCLUDE]) {
psf->sf_oldin = mca_xcount ==
psf->sf_count[MCAST_EXCLUDE] &&
@@ -2201,7 +2220,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc)
int new_in, rv;
rv = 0;
- for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
+ for (psf = pmc->mca_sources; psf; psf = psf->sf_next) {
if (pmc->mca_sfcount[MCAST_EXCLUDE]) {
new_in = mca_xcount == psf->sf_count[MCAST_EXCLUDE] &&
!psf->sf_count[MCAST_INCLUDE];
@@ -2211,8 +2230,8 @@ static int sf_setstate(struct ifmcaddr6 *pmc)
if (!psf->sf_oldin) {
struct ip6_sf_list *prev = NULL;
- for (dpsf=pmc->mca_tomb; dpsf;
- dpsf=dpsf->sf_next) {
+ for (dpsf = pmc->mca_tomb; dpsf;
+ dpsf = dpsf->sf_next) {
if (ipv6_addr_equal(&dpsf->sf_addr,
&psf->sf_addr))
break;
@@ -2234,7 +2253,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc)
* add or update "delete" records if an active filter
* is now inactive
*/
- for (dpsf=pmc->mca_tomb; dpsf; dpsf=dpsf->sf_next)
+ for (dpsf = pmc->mca_tomb; dpsf; dpsf = dpsf->sf_next)
if (ipv6_addr_equal(&dpsf->sf_addr,
&psf->sf_addr))
break;
@@ -2268,7 +2287,7 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
if (!idev)
return -ENODEV;
read_lock_bh(&idev->lock);
- for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
+ for (pmc = idev->mc_list; pmc; pmc = pmc->next) {
if (ipv6_addr_equal(pmca, &pmc->mca_addr))
break;
}
@@ -2284,7 +2303,7 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
if (!delta)
pmc->mca_sfcount[sfmode]++;
err = 0;
- for (i=0; i<sfcount; i++) {
+ for (i = 0; i < sfcount; i++) {
err = ip6_mc_add1_src(pmc, sfmode, &psfsrc[i]);
if (err)
break;
@@ -2294,7 +2313,7 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
if (!delta)
pmc->mca_sfcount[sfmode]--;
- for (j=0; j<i; j++)
+ for (j = 0; j < i; j++)
ip6_mc_del1_src(pmc, sfmode, &psfsrc[j]);
} else if (isexclude != (pmc->mca_sfcount[MCAST_EXCLUDE] != 0)) {
struct ip6_sf_list *psf;
@@ -2308,7 +2327,7 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
pmc->mca_crcount = idev->mc_qrv;
idev->mc_ifc_count = pmc->mca_crcount;
- for (psf=pmc->mca_sources; psf; psf = psf->sf_next)
+ for (psf = pmc->mca_sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
mld_ifc_event(idev);
} else if (sf_setstate(pmc))
@@ -2322,12 +2341,12 @@ static void ip6_mc_clear_src(struct ifmcaddr6 *pmc)
{
struct ip6_sf_list *psf, *nextpsf;
- for (psf=pmc->mca_tomb; psf; psf=nextpsf) {
+ for (psf = pmc->mca_tomb; psf; psf = nextpsf) {
nextpsf = psf->sf_next;
kfree(psf);
}
pmc->mca_tomb = NULL;
- for (psf=pmc->mca_sources; psf; psf=nextpsf) {
+ for (psf = pmc->mca_sources; psf; psf = nextpsf) {
nextpsf = psf->sf_next;
kfree(psf);
}
@@ -2366,7 +2385,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
{
int err;
- /* callers have the socket lock and a write lock on ipv6_sk_mc_lock,
+ /* callers have the socket lock and rtnl lock
* so no other readers or writers of iml or its sflist
*/
if (!iml->sflist) {
@@ -2471,13 +2490,21 @@ void ipv6_mc_down(struct inet6_dev *idev)
mld_gq_stop_timer(idev);
mld_dad_stop_timer(idev);
- for (i = idev->mc_list; i; i=i->next)
+ for (i = idev->mc_list; i; i = i->next)
igmp6_group_dropped(i);
read_unlock_bh(&idev->lock);
mld_clear_delrec(idev);
}
+static void ipv6_mc_reset(struct inet6_dev *idev)
+{
+ idev->mc_qrv = sysctl_mld_qrv;
+ idev->mc_qi = MLD_QI_DEFAULT;
+ idev->mc_qri = MLD_QRI_DEFAULT;
+ idev->mc_v1_seen = 0;
+ idev->mc_maxdelay = unsolicited_report_interval(idev);
+}
/* Device going up */
@@ -2488,7 +2515,8 @@ void ipv6_mc_up(struct inet6_dev *idev)
/* Install multicast list, except for all-nodes (already installed) */
read_lock_bh(&idev->lock);
- for (i = idev->mc_list; i; i=i->next)
+ ipv6_mc_reset(idev);
+ for (i = idev->mc_list; i; i = i->next)
igmp6_group_added(i);
read_unlock_bh(&idev->lock);
}
@@ -2508,13 +2536,7 @@ void ipv6_mc_init_dev(struct inet6_dev *idev)
(unsigned long)idev);
setup_timer(&idev->mc_dad_timer, mld_dad_timer_expire,
(unsigned long)idev);
-
- idev->mc_qrv = MLD_QRV_DEFAULT;
- idev->mc_qi = MLD_QI_DEFAULT;
- idev->mc_qri = MLD_QRI_DEFAULT;
-
- idev->mc_maxdelay = unsolicited_report_interval(idev);
- idev->mc_v1_seen = 0;
+ ipv6_mc_reset(idev);
write_unlock_bh(&idev->lock);
}
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index db9b6cbc9db3..f61429d391d3 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -336,11 +336,10 @@ static void mip6_destopt_destroy(struct xfrm_state *x)
{
}
-static const struct xfrm_type mip6_destopt_type =
-{
+static const struct xfrm_type mip6_destopt_type = {
.description = "MIP6DESTOPT",
.owner = THIS_MODULE,
- .proto = IPPROTO_DSTOPTS,
+ .proto = IPPROTO_DSTOPTS,
.flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_LOCAL_COADDR,
.init_state = mip6_destopt_init_state,
.destructor = mip6_destopt_destroy,
@@ -469,11 +468,10 @@ static void mip6_rthdr_destroy(struct xfrm_state *x)
{
}
-static const struct xfrm_type mip6_rthdr_type =
-{
+static const struct xfrm_type mip6_rthdr_type = {
.description = "MIP6RT",
.owner = THIS_MODULE,
- .proto = IPPROTO_ROUTING,
+ .proto = IPPROTO_ROUTING,
.flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_REMOTE_COADDR,
.init_state = mip6_rthdr_init_state,
.destructor = mip6_rthdr_destroy,
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index ca8d4ea48a5d..4cb45c1079a2 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -175,7 +175,7 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
type = cur->nd_opt_type;
do {
cur = ((void *)cur) + (cur->nd_opt_len << 3);
- } while(cur < end && cur->nd_opt_type != type);
+ } while (cur < end && cur->nd_opt_type != type);
return cur <= end && cur->nd_opt_type == type ? cur : NULL;
}
@@ -192,7 +192,7 @@ static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
return NULL;
do {
cur = ((void *)cur) + (cur->nd_opt_len << 3);
- } while(cur < end && !ndisc_is_useropt(cur));
+ } while (cur < end && !ndisc_is_useropt(cur));
return cur <= end && ndisc_is_useropt(cur) ? cur : NULL;
}
@@ -284,7 +284,6 @@ int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev,
}
return -EINVAL;
}
-
EXPORT_SYMBOL(ndisc_mc_map);
static u32 ndisc_hash(const void *pkey,
@@ -296,7 +295,7 @@ static u32 ndisc_hash(const void *pkey,
static int ndisc_constructor(struct neighbour *neigh)
{
- struct in6_addr *addr = (struct in6_addr*)&neigh->primary_key;
+ struct in6_addr *addr = (struct in6_addr *)&neigh->primary_key;
struct net_device *dev = neigh->dev;
struct inet6_dev *in6_dev;
struct neigh_parms *parms;
@@ -344,7 +343,7 @@ static int ndisc_constructor(struct neighbour *neigh)
static int pndisc_constructor(struct pneigh_entry *n)
{
- struct in6_addr *addr = (struct in6_addr*)&n->key;
+ struct in6_addr *addr = (struct in6_addr *)&n->key;
struct in6_addr maddr;
struct net_device *dev = n->dev;
@@ -357,7 +356,7 @@ static int pndisc_constructor(struct pneigh_entry *n)
static void pndisc_destructor(struct pneigh_entry *n)
{
- struct in6_addr *addr = (struct in6_addr*)&n->key;
+ struct in6_addr *addr = (struct in6_addr *)&n->key;
struct in6_addr maddr;
struct net_device *dev = n->dev;
@@ -1065,11 +1064,14 @@ static void ndisc_router_discovery(struct sk_buff *skb)
int optlen;
unsigned int pref = 0;
- __u8 * opt = (__u8 *)(ra_msg + 1);
+ __u8 *opt = (__u8 *)(ra_msg + 1);
optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) -
sizeof(struct ra_msg);
+ ND_PRINTK(2, info,
+ "RA: %s, dev: %s\n",
+ __func__, skb->dev->name);
if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
ND_PRINTK(2, warn, "RA: source address is not link-local\n");
return;
@@ -1102,13 +1104,21 @@ static void ndisc_router_discovery(struct sk_buff *skb)
return;
}
- if (!ipv6_accept_ra(in6_dev))
+ if (!ipv6_accept_ra(in6_dev)) {
+ ND_PRINTK(2, info,
+ "RA: %s, did not accept ra for dev: %s\n",
+ __func__, skb->dev->name);
goto skip_linkparms;
+ }
#ifdef CONFIG_IPV6_NDISC_NODETYPE
/* skip link-specific parameters from interior routers */
- if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT)
+ if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) {
+ ND_PRINTK(2, info,
+ "RA: %s, nodetype is NODEFAULT, dev: %s\n",
+ __func__, skb->dev->name);
goto skip_linkparms;
+ }
#endif
if (in6_dev->if_flags & IF_RS_SENT) {
@@ -1130,11 +1140,24 @@ static void ndisc_router_discovery(struct sk_buff *skb)
(ra_msg->icmph.icmp6_addrconf_other ?
IF_RA_OTHERCONF : 0);
- if (!in6_dev->cnf.accept_ra_defrtr)
+ if (!in6_dev->cnf.accept_ra_defrtr) {
+ ND_PRINTK(2, info,
+ "RA: %s, defrtr is false for dev: %s\n",
+ __func__, skb->dev->name);
goto skip_defrtr;
+ }
- if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0))
+ /* Do not accept RA with source-addr found on local machine unless
+ * accept_ra_from_local is set to true.
+ */
+ if (!in6_dev->cnf.accept_ra_from_local &&
+ ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
+ NULL, 0)) {
+ ND_PRINTK(2, info,
+ "RA from local address detected on dev: %s: default router ignored\n",
+ skb->dev->name);
goto skip_defrtr;
+ }
lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
@@ -1163,8 +1186,10 @@ static void ndisc_router_discovery(struct sk_buff *skb)
rt = NULL;
}
+ ND_PRINTK(3, info, "RA: rt: %p lifetime: %d, for dev: %s\n",
+ rt, lifetime, skb->dev->name);
if (rt == NULL && lifetime) {
- ND_PRINTK(3, dbg, "RA: adding default router\n");
+ ND_PRINTK(3, info, "RA: adding default router\n");
rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref);
if (rt == NULL) {
@@ -1260,12 +1285,22 @@ skip_linkparms:
NEIGH_UPDATE_F_ISROUTER);
}
- if (!ipv6_accept_ra(in6_dev))
+ if (!ipv6_accept_ra(in6_dev)) {
+ ND_PRINTK(2, info,
+ "RA: %s, accept_ra is false for dev: %s\n",
+ __func__, skb->dev->name);
goto out;
+ }
#ifdef CONFIG_IPV6_ROUTE_INFO
- if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0))
+ if (!in6_dev->cnf.accept_ra_from_local &&
+ ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
+ NULL, 0)) {
+ ND_PRINTK(2, info,
+ "RA from local address detected on dev: %s: router info ignored.\n",
+ skb->dev->name);
goto skip_routeinfo;
+ }
if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) {
struct nd_opt_hdr *p;
@@ -1283,7 +1318,7 @@ skip_linkparms:
continue;
if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
continue;
- rt6_route_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3,
+ rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3,
&ipv6_hdr(skb)->saddr);
}
}
@@ -1293,8 +1328,12 @@ skip_routeinfo:
#ifdef CONFIG_IPV6_NDISC_NODETYPE
/* skip link-specific ndopts from interior routers */
- if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT)
+ if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) {
+ ND_PRINTK(2, info,
+ "RA: %s, nodetype is NODEFAULT (interior routes), dev: %s\n",
+ __func__, skb->dev->name);
goto out;
+ }
#endif
if (in6_dev->cnf.accept_ra_pinfo && ndopts.nd_opts_pi) {
@@ -1312,7 +1351,7 @@ skip_routeinfo:
__be32 n;
u32 mtu;
- memcpy(&n, ((u8*)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
+ memcpy(&n, ((u8 *)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
mtu = ntohl(n);
if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
@@ -1728,7 +1767,7 @@ int __init ndisc_init(void)
#ifdef CONFIG_SYSCTL
err = neigh_sysctl_register(NULL, &nd_tbl.parms,
- &ndisc_ifinfo_sysctl_change);
+ ndisc_ifinfo_sysctl_change);
if (err)
goto out_unregister_pernet;
out:
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 4bff1f297e39..6af874fc187f 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -40,9 +40,35 @@ config NFT_CHAIN_ROUTE_IPV6
fields such as the source, destination, flowlabel, hop-limit and
the packet mark.
+config NF_REJECT_IPV6
+ tristate "IPv6 packet rejection"
+ default m if NETFILTER_ADVANCED=n
+
+config NFT_REJECT_IPV6
+ depends on NF_TABLES_IPV6
+ select NF_REJECT_IPV6
+ default NFT_REJECT
+ tristate
+
+config NF_LOG_IPV6
+ tristate "IPv6 packet logging"
+ default m if NETFILTER_ADVANCED=n
+ select NF_LOG_COMMON
+
+config NF_NAT_IPV6
+ tristate "IPv6 NAT"
+ depends on NF_CONNTRACK_IPV6
+ depends on NETFILTER_ADVANCED
+ select NF_NAT
+ help
+ The IPv6 NAT option allows masquerading, port forwarding and other
+ forms of full Network Address Port Translation. This can be
+ controlled by iptables or nft.
+
+if NF_NAT_IPV6
+
config NFT_CHAIN_NAT_IPV6
depends on NF_TABLES_IPV6
- depends on NF_NAT_IPV6 && NFT_NAT
tristate "IPv6 nf_tables nat chain support"
help
This option enables the "nat" chain for IPv6 in nf_tables. This
@@ -50,10 +76,22 @@ config NFT_CHAIN_NAT_IPV6
packet transformations such as the source, destination address and
source and destination ports.
-config NFT_REJECT_IPV6
+config NF_NAT_MASQUERADE_IPV6
+ tristate "IPv6 masquerade support"
+ help
+ This is the kernel functionality to provide NAT in the masquerade
+ flavour (automatic source address selection) for IPv6.
+
+config NFT_MASQ_IPV6
+ tristate "IPv6 masquerade support for nf_tables"
depends on NF_TABLES_IPV6
- default NFT_REJECT
- tristate
+ depends on NFT_MASQ
+ select NF_NAT_MASQUERADE_IPV6
+ help
+ This is the expression that provides IPv4 masquerading support for
+ nf_tables.
+
+endif # NF_NAT_IPV6
config IP6_NF_IPTABLES
tristate "IP6 tables support (required for filtering)"
@@ -175,6 +213,7 @@ config IP6_NF_FILTER
config IP6_NF_TARGET_REJECT
tristate "REJECT target support"
depends on IP6_NF_FILTER
+ select NF_REJECT_IPV6
default m if NETFILTER_ADVANCED=n
help
The REJECT target allows a filtering rule to specify that an ICMPv6
@@ -227,22 +266,25 @@ config IP6_NF_SECURITY
If unsure, say N.
-config NF_NAT_IPV6
- tristate "IPv6 NAT"
+config IP6_NF_NAT
+ tristate "ip6tables NAT support"
depends on NF_CONNTRACK_IPV6
depends on NETFILTER_ADVANCED
select NF_NAT
+ select NF_NAT_IPV6
+ select NETFILTER_XT_NAT
help
- The IPv6 NAT option allows masquerading, port forwarding and other
- forms of full Network Address Port Translation. It is controlled by
- the `nat' table in ip6tables, see the man page for ip6tables(8).
+ This enables the `nat' table in ip6tables. This allows masquerading,
+ port forwarding and other forms of full Network Address Port
+ Translation.
To compile it as a module, choose M here. If unsure, say N.
-if NF_NAT_IPV6
+if IP6_NF_NAT
config IP6_NF_TARGET_MASQUERADE
tristate "MASQUERADE target support"
+ select NF_NAT_MASQUERADE_IPV6
help
Masquerading is a special case of NAT: all outgoing connections are
changed to seem to come from a particular interface's address, and
@@ -260,7 +302,7 @@ config IP6_NF_TARGET_NPT
To compile it as a module, choose M here. If unsure, say N.
-endif # NF_NAT_IPV6
+endif # IP6_NF_NAT
endif # IP6_NF_IPTABLES
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 70d3dd66f2cd..fbb25f01143c 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -8,7 +8,7 @@ obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
-obj-$(CONFIG_NF_NAT_IPV6) += ip6table_nat.o
+obj-$(CONFIG_IP6_NF_NAT) += ip6table_nat.o
# objects for l3 independent conntrack
nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
@@ -18,16 +18,24 @@ obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o
nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o
obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
+obj-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o
# defrag
nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
+# logging
+obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o
+
+# reject
+obj-$(CONFIG_NF_REJECT_IPV6) += nf_reject_ipv6.o
+
# nf_tables
obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o
obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
+obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o
# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 3e4e92d5e157..7f9f45d829d2 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -19,33 +19,12 @@
#include <net/netfilter/nf_nat.h>
#include <net/addrconf.h>
#include <net/ipv6.h>
+#include <net/netfilter/ipv6/nf_nat_masquerade.h>
static unsigned int
masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
- const struct nf_nat_range *range = par->targinfo;
- enum ip_conntrack_info ctinfo;
- struct in6_addr src;
- struct nf_conn *ct;
- struct nf_nat_range newrange;
-
- ct = nf_ct_get(skb, &ctinfo);
- NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
- ctinfo == IP_CT_RELATED_REPLY));
-
- if (ipv6_dev_get_saddr(dev_net(par->out), par->out,
- &ipv6_hdr(skb)->daddr, 0, &src) < 0)
- return NF_DROP;
-
- nfct_nat(ct)->masq_index = par->out->ifindex;
-
- newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS;
- newrange.min_addr.in6 = src;
- newrange.max_addr.in6 = src;
- newrange.min_proto = range->min_proto;
- newrange.max_proto = range->max_proto;
-
- return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
+ return nf_nat_masquerade_ipv6(skb, par->targinfo, par->out);
}
static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
@@ -57,48 +36,6 @@ static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
return 0;
}
-static int device_cmp(struct nf_conn *ct, void *ifindex)
-{
- const struct nf_conn_nat *nat = nfct_nat(ct);
-
- if (!nat)
- return 0;
- if (nf_ct_l3num(ct) != NFPROTO_IPV6)
- return 0;
- return nat->masq_index == (int)(long)ifindex;
-}
-
-static int masq_device_event(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct net *net = dev_net(dev);
-
- if (event == NETDEV_DOWN)
- nf_ct_iterate_cleanup(net, device_cmp,
- (void *)(long)dev->ifindex, 0, 0);
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block masq_dev_notifier = {
- .notifier_call = masq_device_event,
-};
-
-static int masq_inet_event(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- struct inet6_ifaddr *ifa = ptr;
- struct netdev_notifier_info info;
-
- netdev_notifier_info_init(&info, ifa->idev->dev);
- return masq_device_event(this, event, &info);
-}
-
-static struct notifier_block masq_inet_notifier = {
- .notifier_call = masq_inet_event,
-};
-
static struct xt_target masquerade_tg6_reg __read_mostly = {
.name = "MASQUERADE",
.family = NFPROTO_IPV6,
@@ -115,17 +52,14 @@ static int __init masquerade_tg6_init(void)
int err;
err = xt_register_target(&masquerade_tg6_reg);
- if (err == 0) {
- register_netdevice_notifier(&masq_dev_notifier);
- register_inet6addr_notifier(&masq_inet_notifier);
- }
+ if (err == 0)
+ nf_nat_masquerade_ipv6_register_notifier();
return err;
}
static void __exit masquerade_tg6_exit(void)
{
- unregister_inet6addr_notifier(&masq_inet_notifier);
- unregister_netdevice_notifier(&masq_dev_notifier);
+ nf_nat_masquerade_ipv6_unregister_notifier();
xt_unregister_target(&masquerade_tg6_reg);
}
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 54bd9790603f..8b147440fbdc 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -94,7 +94,6 @@ ipv6header_mt6(const struct sk_buff *skb, struct xt_action_param *par)
break;
default:
return false;
- break;
}
nexthdr = hp->nexthdr;
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 387d8b8fc18d..b0634ac996b7 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -30,222 +30,57 @@ static const struct xt_table nf_nat_ipv6_table = {
.af = NFPROTO_IPV6,
};
-static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
-{
- /* Force range to this IP; let proto decide mapping for
- * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
- */
- struct nf_nat_range range;
-
- range.flags = 0;
- pr_debug("Allocating NULL binding for %p (%pI6)\n", ct,
- HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6 :
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6);
-
- return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
-}
-
-static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
- const struct net_device *in,
- const struct net_device *out,
- struct nf_conn *ct)
+static unsigned int ip6table_nat_do_chain(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
- unsigned int ret;
- ret = ip6t_do_table(skb, hooknum, in, out, net->ipv6.ip6table_nat);
- if (ret == NF_ACCEPT) {
- if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
- ret = alloc_null_binding(ct, hooknum);
- }
- return ret;
+ return ip6t_do_table(skb, ops->hooknum, in, out, net->ipv6.ip6table_nat);
}
-static unsigned int
-nf_nat_ipv6_fn(const struct nf_hook_ops *ops,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int ip6table_nat_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
- struct nf_conn_nat *nat;
- enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
- __be16 frag_off;
- int hdrlen;
- u8 nexthdr;
-
- ct = nf_ct_get(skb, &ctinfo);
- /* Can't track? It's not due to stress, or conntrack would
- * have dropped it. Hence it's the user's responsibilty to
- * packet filter it out, or implement conntrack/NAT for that
- * protocol. 8) --RR
- */
- if (!ct)
- return NF_ACCEPT;
-
- /* Don't try to NAT if this packet is not conntracked */
- if (nf_ct_is_untracked(ct))
- return NF_ACCEPT;
-
- nat = nf_ct_nat_ext_add(ct);
- if (nat == NULL)
- return NF_ACCEPT;
-
- switch (ctinfo) {
- case IP_CT_RELATED:
- case IP_CT_RELATED_REPLY:
- nexthdr = ipv6_hdr(skb)->nexthdr;
- hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
- &nexthdr, &frag_off);
-
- if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
- if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
- ops->hooknum,
- hdrlen))
- return NF_DROP;
- else
- return NF_ACCEPT;
- }
- /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
- case IP_CT_NEW:
- /* Seen it before? This can happen for loopback, retrans,
- * or local packets.
- */
- if (!nf_nat_initialized(ct, maniptype)) {
- unsigned int ret;
-
- ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct);
- if (ret != NF_ACCEPT)
- return ret;
- } else {
- pr_debug("Already setup manip %s for ct %p\n",
- maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
- ct);
- if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
- goto oif_changed;
- }
- break;
-
- default:
- /* ESTABLISHED */
- NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
- ctinfo == IP_CT_ESTABLISHED_REPLY);
- if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
- goto oif_changed;
- }
-
- return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
-
-oif_changed:
- nf_ct_kill_acct(ct, ctinfo, skb);
- return NF_DROP;
+ return nf_nat_ipv6_fn(ops, skb, in, out, ip6table_nat_do_chain);
}
-static unsigned int
-nf_nat_ipv6_in(const struct nf_hook_ops *ops,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int ip6table_nat_in(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- unsigned int ret;
- struct in6_addr daddr = ipv6_hdr(skb)->daddr;
-
- ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
- if (ret != NF_DROP && ret != NF_STOLEN &&
- ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
- skb_dst_drop(skb);
-
- return ret;
+ return nf_nat_ipv6_in(ops, skb, in, out, ip6table_nat_do_chain);
}
-static unsigned int
-nf_nat_ipv6_out(const struct nf_hook_ops *ops,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int ip6table_nat_out(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
-#ifdef CONFIG_XFRM
- const struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
- int err;
-#endif
- unsigned int ret;
-
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct ipv6hdr))
- return NF_ACCEPT;
-
- ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
-#ifdef CONFIG_XFRM
- if (ret != NF_DROP && ret != NF_STOLEN &&
- !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
- (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
- if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
- &ct->tuplehash[!dir].tuple.dst.u3) ||
- (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
- ct->tuplehash[dir].tuple.src.u.all !=
- ct->tuplehash[!dir].tuple.dst.u.all)) {
- err = nf_xfrm_me_harder(skb, AF_INET6);
- if (err < 0)
- ret = NF_DROP_ERR(err);
- }
- }
-#endif
- return ret;
+ return nf_nat_ipv6_out(ops, skb, in, out, ip6table_nat_do_chain);
}
-static unsigned int
-nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int ip6table_nat_local_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- const struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
- unsigned int ret;
- int err;
-
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct ipv6hdr))
- return NF_ACCEPT;
-
- ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
- if (ret != NF_DROP && ret != NF_STOLEN &&
- (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
- if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
- &ct->tuplehash[!dir].tuple.src.u3)) {
- err = ip6_route_me_harder(skb);
- if (err < 0)
- ret = NF_DROP_ERR(err);
- }
-#ifdef CONFIG_XFRM
- else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
- ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
- ct->tuplehash[dir].tuple.dst.u.all !=
- ct->tuplehash[!dir].tuple.src.u.all) {
- err = nf_xfrm_me_harder(skb, AF_INET6);
- if (err < 0)
- ret = NF_DROP_ERR(err);
- }
-#endif
- }
- return ret;
+ return nf_nat_ipv6_local_fn(ops, skb, in, out, ip6table_nat_do_chain);
}
static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
/* Before packet filtering, change destination */
{
- .hook = nf_nat_ipv6_in,
+ .hook = ip6table_nat_in,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_PRE_ROUTING,
@@ -253,7 +88,7 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
},
/* After packet filtering, change source */
{
- .hook = nf_nat_ipv6_out,
+ .hook = ip6table_nat_out,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_POST_ROUTING,
@@ -261,7 +96,7 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
},
/* Before packet filtering, change destination */
{
- .hook = nf_nat_ipv6_local_fn,
+ .hook = ip6table_nat_local_fn,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_OUT,
@@ -269,7 +104,7 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
},
/* After packet filtering, change source */
{
- .hook = nf_nat_ipv6_fn,
+ .hook = ip6table_nat_fn,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 0d5279fd852a..6f187c8d8a1b 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -50,6 +50,7 @@
#include <linux/module.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+static const char nf_frags_cache_name[] = "nf-frags";
struct nf_ct_frag6_skb_cb
{
@@ -63,6 +64,8 @@ struct nf_ct_frag6_skb_cb
static struct inet_frags nf_frags;
#ifdef CONFIG_SYSCTL
+static int zero;
+
static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
.procname = "nf_conntrack_frag6_timeout",
@@ -76,14 +79,17 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
.data = &init_net.nf_frag.frags.low_thresh,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &init_net.nf_frag.frags.high_thresh
},
{
.procname = "nf_conntrack_frag6_high_thresh",
.data = &init_net.nf_frag.frags.high_thresh,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &init_net.nf_frag.frags.low_thresh
},
{ }
};
@@ -102,7 +108,10 @@ static int nf_ct_frag6_sysctl_register(struct net *net)
table[0].data = &net->nf_frag.frags.timeout;
table[1].data = &net->nf_frag.frags.low_thresh;
+ table[1].extra2 = &net->nf_frag.frags.high_thresh;
table[2].data = &net->nf_frag.frags.high_thresh;
+ table[2].extra1 = &net->nf_frag.frags.low_thresh;
+ table[2].extra2 = &init_net.nf_frag.frags.high_thresh;
}
hdr = register_net_sysctl(net, "net/netfilter", table);
@@ -147,16 +156,13 @@ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr,
const struct in6_addr *daddr)
{
- u32 c;
-
net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd));
- c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
- (__force u32)id, nf_frags.rnd);
- return c & (INETFRAGS_HASHSZ - 1);
+ return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
+ (__force u32)id, nf_frags.rnd);
}
-static unsigned int nf_hashfn(struct inet_frag_queue *q)
+static unsigned int nf_hashfn(const struct inet_frag_queue *q)
{
const struct frag_queue *nq;
@@ -196,7 +202,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
arg.dst = dst;
arg.ecn = ecn;
- read_lock_bh(&nf_frags.lock);
+ local_bh_disable();
hash = nf_hash_frag(id, src, dst);
q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
@@ -217,7 +223,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
int offset, end;
u8 ecn;
- if (fq->q.last_in & INET_FRAG_COMPLETE) {
+ if (fq->q.flags & INET_FRAG_COMPLETE) {
pr_debug("Already completed\n");
goto err;
}
@@ -248,11 +254,11 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
* or have different end, the segment is corrupted.
*/
if (end < fq->q.len ||
- ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len)) {
+ ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len)) {
pr_debug("already received last fragment\n");
goto err;
}
- fq->q.last_in |= INET_FRAG_LAST_IN;
+ fq->q.flags |= INET_FRAG_LAST_IN;
fq->q.len = end;
} else {
/* Check if the fragment is rounded to 8 bytes.
@@ -267,7 +273,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
}
if (end > fq->q.len) {
/* Some bits beyond end -> corruption. */
- if (fq->q.last_in & INET_FRAG_LAST_IN) {
+ if (fq->q.flags & INET_FRAG_LAST_IN) {
pr_debug("last packet already reached.\n");
goto err;
}
@@ -349,10 +355,9 @@ found:
*/
if (offset == 0) {
fq->nhoffset = nhoff;
- fq->q.last_in |= INET_FRAG_FIRST_IN;
+ fq->q.flags |= INET_FRAG_FIRST_IN;
}
- inet_frag_lru_move(&fq->q);
return 0;
discard_fq:
@@ -597,10 +602,6 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
hdr = ipv6_hdr(clone);
fhdr = (struct frag_hdr *)skb_transport_header(clone);
- local_bh_disable();
- inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false);
- local_bh_enable();
-
fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
ip6_frag_ecn(hdr));
if (fq == NULL) {
@@ -617,7 +618,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
goto ret_orig;
}
- if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
+ if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
fq->q.meat == fq->q.len) {
ret_skb = nf_ct_frag6_reasm(fq, dev);
if (ret_skb == NULL)
@@ -677,13 +678,15 @@ int nf_ct_frag6_init(void)
nf_frags.qsize = sizeof(struct frag_queue);
nf_frags.match = ip6_frag_match;
nf_frags.frag_expire = nf_ct_frag6_expire;
- nf_frags.secret_interval = 10 * 60 * HZ;
- inet_frags_init(&nf_frags);
-
+ nf_frags.frags_cache_name = nf_frags_cache_name;
+ ret = inet_frags_init(&nf_frags);
+ if (ret)
+ goto out;
ret = register_pernet_subsys(&nf_ct_net_ops);
if (ret)
inet_frags_fini(&nf_frags);
+out:
return ret;
}
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index 7b9a748c6bac..e70382e4dfb5 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -40,7 +40,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
zone = nf_ct_zone((struct nf_conn *)skb->nfct);
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (skb->nf_bridge &&
skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
new file mode 100644
index 000000000000..7b17a0be93e7
--- /dev/null
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -0,0 +1,417 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/if_arp.h>
+#include <linux/ip.h>
+#include <net/ipv6.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/tcp.h>
+#include <net/route.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter/xt_LOG.h>
+#include <net/netfilter/nf_log.h>
+
+static struct nf_loginfo default_loginfo = {
+ .type = NF_LOG_TYPE_LOG,
+ .u = {
+ .log = {
+ .level = 5,
+ .logflags = NF_LOG_MASK,
+ },
+ },
+};
+
+/* One level of recursion won't kill us */
+static void dump_ipv6_packet(struct nf_log_buf *m,
+ const struct nf_loginfo *info,
+ const struct sk_buff *skb, unsigned int ip6hoff,
+ int recurse)
+{
+ u_int8_t currenthdr;
+ int fragment;
+ struct ipv6hdr _ip6h;
+ const struct ipv6hdr *ih;
+ unsigned int ptr;
+ unsigned int hdrlen = 0;
+ unsigned int logflags;
+
+ if (info->type == NF_LOG_TYPE_LOG)
+ logflags = info->u.log.logflags;
+ else
+ logflags = NF_LOG_MASK;
+
+ ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h);
+ if (ih == NULL) {
+ nf_log_buf_add(m, "TRUNCATED");
+ return;
+ }
+
+ /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */
+ nf_log_buf_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr);
+
+ /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */
+ nf_log_buf_add(m, "LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ",
+ ntohs(ih->payload_len) + sizeof(struct ipv6hdr),
+ (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20,
+ ih->hop_limit,
+ (ntohl(*(__be32 *)ih) & 0x000fffff));
+
+ fragment = 0;
+ ptr = ip6hoff + sizeof(struct ipv6hdr);
+ currenthdr = ih->nexthdr;
+ while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) {
+ struct ipv6_opt_hdr _hdr;
+ const struct ipv6_opt_hdr *hp;
+
+ hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
+ if (hp == NULL) {
+ nf_log_buf_add(m, "TRUNCATED");
+ return;
+ }
+
+ /* Max length: 48 "OPT (...) " */
+ if (logflags & XT_LOG_IPOPT)
+ nf_log_buf_add(m, "OPT ( ");
+
+ switch (currenthdr) {
+ case IPPROTO_FRAGMENT: {
+ struct frag_hdr _fhdr;
+ const struct frag_hdr *fh;
+
+ nf_log_buf_add(m, "FRAG:");
+ fh = skb_header_pointer(skb, ptr, sizeof(_fhdr),
+ &_fhdr);
+ if (fh == NULL) {
+ nf_log_buf_add(m, "TRUNCATED ");
+ return;
+ }
+
+ /* Max length: 6 "65535 " */
+ nf_log_buf_add(m, "%u ", ntohs(fh->frag_off) & 0xFFF8);
+
+ /* Max length: 11 "INCOMPLETE " */
+ if (fh->frag_off & htons(0x0001))
+ nf_log_buf_add(m, "INCOMPLETE ");
+
+ nf_log_buf_add(m, "ID:%08x ",
+ ntohl(fh->identification));
+
+ if (ntohs(fh->frag_off) & 0xFFF8)
+ fragment = 1;
+
+ hdrlen = 8;
+
+ break;
+ }
+ case IPPROTO_DSTOPTS:
+ case IPPROTO_ROUTING:
+ case IPPROTO_HOPOPTS:
+ if (fragment) {
+ if (logflags & XT_LOG_IPOPT)
+ nf_log_buf_add(m, ")");
+ return;
+ }
+ hdrlen = ipv6_optlen(hp);
+ break;
+ /* Max Length */
+ case IPPROTO_AH:
+ if (logflags & XT_LOG_IPOPT) {
+ struct ip_auth_hdr _ahdr;
+ const struct ip_auth_hdr *ah;
+
+ /* Max length: 3 "AH " */
+ nf_log_buf_add(m, "AH ");
+
+ if (fragment) {
+ nf_log_buf_add(m, ")");
+ return;
+ }
+
+ ah = skb_header_pointer(skb, ptr, sizeof(_ahdr),
+ &_ahdr);
+ if (ah == NULL) {
+ /*
+ * Max length: 26 "INCOMPLETE [65535
+ * bytes] )"
+ */
+ nf_log_buf_add(m, "INCOMPLETE [%u bytes] )",
+ skb->len - ptr);
+ return;
+ }
+
+ /* Length: 15 "SPI=0xF1234567 */
+ nf_log_buf_add(m, "SPI=0x%x ", ntohl(ah->spi));
+
+ }
+
+ hdrlen = (hp->hdrlen+2)<<2;
+ break;
+ case IPPROTO_ESP:
+ if (logflags & XT_LOG_IPOPT) {
+ struct ip_esp_hdr _esph;
+ const struct ip_esp_hdr *eh;
+
+ /* Max length: 4 "ESP " */
+ nf_log_buf_add(m, "ESP ");
+
+ if (fragment) {
+ nf_log_buf_add(m, ")");
+ return;
+ }
+
+ /*
+ * Max length: 26 "INCOMPLETE [65535 bytes] )"
+ */
+ eh = skb_header_pointer(skb, ptr, sizeof(_esph),
+ &_esph);
+ if (eh == NULL) {
+ nf_log_buf_add(m, "INCOMPLETE [%u bytes] )",
+ skb->len - ptr);
+ return;
+ }
+
+ /* Length: 16 "SPI=0xF1234567 )" */
+ nf_log_buf_add(m, "SPI=0x%x )",
+ ntohl(eh->spi));
+ }
+ return;
+ default:
+ /* Max length: 20 "Unknown Ext Hdr 255" */
+ nf_log_buf_add(m, "Unknown Ext Hdr %u", currenthdr);
+ return;
+ }
+ if (logflags & XT_LOG_IPOPT)
+ nf_log_buf_add(m, ") ");
+
+ currenthdr = hp->nexthdr;
+ ptr += hdrlen;
+ }
+
+ switch (currenthdr) {
+ case IPPROTO_TCP:
+ if (nf_log_dump_tcp_header(m, skb, currenthdr, fragment,
+ ptr, logflags))
+ return;
+ break;
+ case IPPROTO_UDP:
+ case IPPROTO_UDPLITE:
+ if (nf_log_dump_udp_header(m, skb, currenthdr, fragment, ptr))
+ return;
+ break;
+ case IPPROTO_ICMPV6: {
+ struct icmp6hdr _icmp6h;
+ const struct icmp6hdr *ic;
+
+ /* Max length: 13 "PROTO=ICMPv6 " */
+ nf_log_buf_add(m, "PROTO=ICMPv6 ");
+
+ if (fragment)
+ break;
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h);
+ if (ic == NULL) {
+ nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
+ skb->len - ptr);
+ return;
+ }
+
+ /* Max length: 18 "TYPE=255 CODE=255 " */
+ nf_log_buf_add(m, "TYPE=%u CODE=%u ",
+ ic->icmp6_type, ic->icmp6_code);
+
+ switch (ic->icmp6_type) {
+ case ICMPV6_ECHO_REQUEST:
+ case ICMPV6_ECHO_REPLY:
+ /* Max length: 19 "ID=65535 SEQ=65535 " */
+ nf_log_buf_add(m, "ID=%u SEQ=%u ",
+ ntohs(ic->icmp6_identifier),
+ ntohs(ic->icmp6_sequence));
+ break;
+ case ICMPV6_MGM_QUERY:
+ case ICMPV6_MGM_REPORT:
+ case ICMPV6_MGM_REDUCTION:
+ break;
+
+ case ICMPV6_PARAMPROB:
+ /* Max length: 17 "POINTER=ffffffff " */
+ nf_log_buf_add(m, "POINTER=%08x ",
+ ntohl(ic->icmp6_pointer));
+ /* Fall through */
+ case ICMPV6_DEST_UNREACH:
+ case ICMPV6_PKT_TOOBIG:
+ case ICMPV6_TIME_EXCEED:
+ /* Max length: 3+maxlen */
+ if (recurse) {
+ nf_log_buf_add(m, "[");
+ dump_ipv6_packet(m, info, skb,
+ ptr + sizeof(_icmp6h), 0);
+ nf_log_buf_add(m, "] ");
+ }
+
+ /* Max length: 10 "MTU=65535 " */
+ if (ic->icmp6_type == ICMPV6_PKT_TOOBIG) {
+ nf_log_buf_add(m, "MTU=%u ",
+ ntohl(ic->icmp6_mtu));
+ }
+ }
+ break;
+ }
+ /* Max length: 10 "PROTO=255 " */
+ default:
+ nf_log_buf_add(m, "PROTO=%u ", currenthdr);
+ }
+
+ /* Max length: 15 "UID=4294967295 " */
+ if ((logflags & XT_LOG_UID) && recurse)
+ nf_log_dump_sk_uid_gid(m, skb->sk);
+
+ /* Max length: 16 "MARK=0xFFFFFFFF " */
+ if (recurse && skb->mark)
+ nf_log_buf_add(m, "MARK=0x%x ", skb->mark);
+}
+
+static void dump_ipv6_mac_header(struct nf_log_buf *m,
+ const struct nf_loginfo *info,
+ const struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dev;
+ unsigned int logflags = 0;
+
+ if (info->type == NF_LOG_TYPE_LOG)
+ logflags = info->u.log.logflags;
+
+ if (!(logflags & XT_LOG_MACDECODE))
+ goto fallback;
+
+ switch (dev->type) {
+ case ARPHRD_ETHER:
+ nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
+ eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
+ ntohs(eth_hdr(skb)->h_proto));
+ return;
+ default:
+ break;
+ }
+
+fallback:
+ nf_log_buf_add(m, "MAC=");
+ if (dev->hard_header_len &&
+ skb->mac_header != skb->network_header) {
+ const unsigned char *p = skb_mac_header(skb);
+ unsigned int len = dev->hard_header_len;
+ unsigned int i;
+
+ if (dev->type == ARPHRD_SIT) {
+ p -= ETH_HLEN;
+
+ if (p < skb->head)
+ p = NULL;
+ }
+
+ if (p != NULL) {
+ nf_log_buf_add(m, "%02x", *p++);
+ for (i = 1; i < len; i++)
+ nf_log_buf_add(m, ":%02x", *p++);
+ }
+ nf_log_buf_add(m, " ");
+
+ if (dev->type == ARPHRD_SIT) {
+ const struct iphdr *iph =
+ (struct iphdr *)skb_mac_header(skb);
+ nf_log_buf_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr,
+ &iph->daddr);
+ }
+ } else {
+ nf_log_buf_add(m, " ");
+ }
+}
+
+static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
+ unsigned int hooknum, const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct nf_loginfo *loginfo,
+ const char *prefix)
+{
+ struct nf_log_buf *m;
+
+ /* FIXME: Disabled from containers until syslog ns is supported */
+ if (!net_eq(net, &init_net))
+ return;
+
+ m = nf_log_buf_open();
+
+ if (!loginfo)
+ loginfo = &default_loginfo;
+
+ nf_log_dump_packet_common(m, pf, hooknum, skb, in, out,
+ loginfo, prefix);
+
+ if (in != NULL)
+ dump_ipv6_mac_header(m, loginfo, skb);
+
+ dump_ipv6_packet(m, loginfo, skb, skb_network_offset(skb), 1);
+
+ nf_log_buf_close(m);
+}
+
+static struct nf_logger nf_ip6_logger __read_mostly = {
+ .name = "nf_log_ipv6",
+ .type = NF_LOG_TYPE_LOG,
+ .logfn = nf_log_ip6_packet,
+ .me = THIS_MODULE,
+};
+
+static int __net_init nf_log_ipv6_net_init(struct net *net)
+{
+ nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger);
+ return 0;
+}
+
+static void __net_exit nf_log_ipv6_net_exit(struct net *net)
+{
+ nf_log_unset(net, &nf_ip6_logger);
+}
+
+static struct pernet_operations nf_log_ipv6_net_ops = {
+ .init = nf_log_ipv6_net_init,
+ .exit = nf_log_ipv6_net_exit,
+};
+
+static int __init nf_log_ipv6_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&nf_log_ipv6_net_ops);
+ if (ret < 0)
+ return ret;
+
+ nf_log_register(NFPROTO_IPV6, &nf_ip6_logger);
+ return 0;
+}
+
+static void __exit nf_log_ipv6_exit(void)
+{
+ unregister_pernet_subsys(&nf_log_ipv6_net_ops);
+ nf_log_unregister(&nf_ip6_logger);
+}
+
+module_init(nf_log_ipv6_init);
+module_exit(nf_log_ipv6_exit);
+
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("Netfilter IPv4 packet logging");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NF_LOGGER(AF_INET6, 0);
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index abfe75a2e316..c5812e1c1ffb 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -158,6 +158,7 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
htons(oldlen), htons(datalen), 1);
}
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
struct nf_nat_range *range)
{
@@ -175,6 +176,7 @@ static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
return 0;
}
+#endif
static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
.l3proto = NFPROTO_IPV6,
@@ -183,7 +185,9 @@ static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
.manip_pkt = nf_nat_ipv6_manip_pkt,
.csum_update = nf_nat_ipv6_csum_update,
.csum_recalc = nf_nat_ipv6_csum_recalc,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_to_range = nf_nat_ipv6_nlattr_to_range,
+#endif
#ifdef CONFIG_XFRM
.decode_session = nf_nat_ipv6_decode_session,
#endif
@@ -257,6 +261,205 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
}
EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
+unsigned int
+nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct nf_conn *ct))
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn_nat *nat;
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
+ __be16 frag_off;
+ int hdrlen;
+ u8 nexthdr;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ /* Can't track? It's not due to stress, or conntrack would
+ * have dropped it. Hence it's the user's responsibilty to
+ * packet filter it out, or implement conntrack/NAT for that
+ * protocol. 8) --RR
+ */
+ if (!ct)
+ return NF_ACCEPT;
+
+ /* Don't try to NAT if this packet is not conntracked */
+ if (nf_ct_is_untracked(ct))
+ return NF_ACCEPT;
+
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return NF_ACCEPT;
+
+ switch (ctinfo) {
+ case IP_CT_RELATED:
+ case IP_CT_RELATED_REPLY:
+ nexthdr = ipv6_hdr(skb)->nexthdr;
+ hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
+ &nexthdr, &frag_off);
+
+ if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
+ if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
+ ops->hooknum,
+ hdrlen))
+ return NF_DROP;
+ else
+ return NF_ACCEPT;
+ }
+ /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+ case IP_CT_NEW:
+ /* Seen it before? This can happen for loopback, retrans,
+ * or local packets.
+ */
+ if (!nf_nat_initialized(ct, maniptype)) {
+ unsigned int ret;
+
+ ret = do_chain(ops, skb, in, out, ct);
+ if (ret != NF_ACCEPT)
+ return ret;
+
+ if (nf_nat_initialized(ct, HOOK2MANIP(ops->hooknum)))
+ break;
+
+ ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
+ if (ret != NF_ACCEPT)
+ return ret;
+ } else {
+ pr_debug("Already setup manip %s for ct %p\n",
+ maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
+ ct);
+ if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
+ goto oif_changed;
+ }
+ break;
+
+ default:
+ /* ESTABLISHED */
+ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
+ ctinfo == IP_CT_ESTABLISHED_REPLY);
+ if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
+ goto oif_changed;
+ }
+
+ return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+
+oif_changed:
+ nf_ct_kill_acct(ct, ctinfo, skb);
+ return NF_DROP;
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv6_fn);
+
+unsigned int
+nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct nf_conn *ct))
+{
+ unsigned int ret;
+ struct in6_addr daddr = ipv6_hdr(skb)->daddr;
+
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, do_chain);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
+ skb_dst_drop(skb);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv6_in);
+
+unsigned int
+nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct nf_conn *ct))
+{
+#ifdef CONFIG_XFRM
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ int err;
+#endif
+ unsigned int ret;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct ipv6hdr))
+ return NF_ACCEPT;
+
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, do_chain);
+#ifdef CONFIG_XFRM
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3) ||
+ (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
+ ct->tuplehash[dir].tuple.src.u.all !=
+ ct->tuplehash[!dir].tuple.dst.u.all)) {
+ err = nf_xfrm_me_harder(skb, AF_INET6);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+ }
+#endif
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv6_out);
+
+unsigned int
+nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct nf_conn *ct))
+{
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ unsigned int ret;
+ int err;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct ipv6hdr))
+ return NF_ACCEPT;
+
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, do_chain);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
+ &ct->tuplehash[!dir].tuple.src.u3)) {
+ err = ip6_route_me_harder(skb);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+#ifdef CONFIG_XFRM
+ else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+ ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
+ ct->tuplehash[dir].tuple.dst.u.all !=
+ ct->tuplehash[!dir].tuple.src.u.all) {
+ err = nf_xfrm_me_harder(skb, AF_INET6);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+#endif
+ }
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv6_local_fn);
+
static int __init nf_nat_l3proto_ipv6_init(void)
{
int err;
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
new file mode 100644
index 000000000000..7745609665cd
--- /dev/null
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on Rusty Russell's IPv6 MASQUERADE target. Development of IPv6
+ * NAT funded by Astaro.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/atomic.h>
+#include <linux/netdevice.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/addrconf.h>
+#include <net/ipv6.h>
+#include <net/netfilter/ipv6/nf_nat_masquerade.h>
+
+unsigned int
+nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
+ const struct net_device *out)
+{
+ enum ip_conntrack_info ctinfo;
+ struct in6_addr src;
+ struct nf_conn *ct;
+ struct nf_nat_range newrange;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+ ctinfo == IP_CT_RELATED_REPLY));
+
+ if (ipv6_dev_get_saddr(dev_net(out), out,
+ &ipv6_hdr(skb)->daddr, 0, &src) < 0)
+ return NF_DROP;
+
+ nfct_nat(ct)->masq_index = out->ifindex;
+
+ newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS;
+ newrange.min_addr.in6 = src;
+ newrange.max_addr.in6 = src;
+ newrange.min_proto = range->min_proto;
+ newrange.max_proto = range->max_proto;
+
+ return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);
+
+static int device_cmp(struct nf_conn *ct, void *ifindex)
+{
+ const struct nf_conn_nat *nat = nfct_nat(ct);
+
+ if (!nat)
+ return 0;
+ if (nf_ct_l3num(ct) != NFPROTO_IPV6)
+ return 0;
+ return nat->masq_index == (int)(long)ifindex;
+}
+
+static int masq_device_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct net *net = dev_net(dev);
+
+ if (event == NETDEV_DOWN)
+ nf_ct_iterate_cleanup(net, device_cmp,
+ (void *)(long)dev->ifindex, 0, 0);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block masq_dev_notifier = {
+ .notifier_call = masq_device_event,
+};
+
+static int masq_inet_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct inet6_ifaddr *ifa = ptr;
+ struct netdev_notifier_info info;
+
+ netdev_notifier_info_init(&info, ifa->idev->dev);
+ return masq_device_event(this, event, &info);
+}
+
+static struct notifier_block masq_inet_notifier = {
+ .notifier_call = masq_inet_event,
+};
+
+static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0);
+
+void nf_nat_masquerade_ipv6_register_notifier(void)
+{
+ /* check if the notifier is already set */
+ if (atomic_inc_return(&masquerade_notifier_refcount) > 1)
+ return;
+
+ register_netdevice_notifier(&masq_dev_notifier);
+ register_inet6addr_notifier(&masq_inet_notifier);
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier);
+
+void nf_nat_masquerade_ipv6_unregister_notifier(void)
+{
+ /* check if the notifier still has clients */
+ if (atomic_dec_return(&masquerade_notifier_refcount) > 0)
+ return;
+
+ unregister_inet6addr_notifier(&masq_inet_notifier);
+ unregister_netdevice_notifier(&masq_dev_notifier);
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
new file mode 100644
index 000000000000..5f5f0438d74d
--- /dev/null
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -0,0 +1,163 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/ip6_fib.h>
+#include <net/ip6_checksum.h>
+#include <linux/netfilter_ipv6.h>
+
+void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
+{
+ struct sk_buff *nskb;
+ struct tcphdr otcph, *tcph;
+ unsigned int otcplen, hh_len;
+ int tcphoff, needs_ack;
+ const struct ipv6hdr *oip6h = ipv6_hdr(oldskb);
+ struct ipv6hdr *ip6h;
+#define DEFAULT_TOS_VALUE 0x0U
+ const __u8 tclass = DEFAULT_TOS_VALUE;
+ struct dst_entry *dst = NULL;
+ u8 proto;
+ __be16 frag_off;
+ struct flowi6 fl6;
+
+ if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) ||
+ (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) {
+ pr_debug("addr is not unicast.\n");
+ return;
+ }
+
+ proto = oip6h->nexthdr;
+ tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off);
+
+ if ((tcphoff < 0) || (tcphoff > oldskb->len)) {
+ pr_debug("Cannot get TCP header.\n");
+ return;
+ }
+
+ otcplen = oldskb->len - tcphoff;
+
+ /* IP header checks: fragment, too short. */
+ if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) {
+ pr_debug("proto(%d) != IPPROTO_TCP, "
+ "or too short. otcplen = %d\n",
+ proto, otcplen);
+ return;
+ }
+
+ if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr)))
+ BUG();
+
+ /* No RST for RST. */
+ if (otcph.rst) {
+ pr_debug("RST is set\n");
+ return;
+ }
+
+ /* Check checksum. */
+ if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) {
+ pr_debug("TCP checksum is invalid\n");
+ return;
+ }
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_TCP;
+ fl6.saddr = oip6h->daddr;
+ fl6.daddr = oip6h->saddr;
+ fl6.fl6_sport = otcph.dest;
+ fl6.fl6_dport = otcph.source;
+ security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (dst == NULL || dst->error) {
+ dst_release(dst);
+ return;
+ }
+ dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
+ if (IS_ERR(dst))
+ return;
+
+ hh_len = (dst->dev->hard_header_len + 15)&~15;
+ nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr)
+ + sizeof(struct tcphdr) + dst->trailer_len,
+ GFP_ATOMIC);
+
+ if (!nskb) {
+ net_dbg_ratelimited("cannot alloc skb\n");
+ dst_release(dst);
+ return;
+ }
+
+ skb_dst_set(nskb, dst);
+
+ skb_reserve(nskb, hh_len + dst->header_len);
+
+ skb_put(nskb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(nskb);
+ ip6h = ipv6_hdr(nskb);
+ ip6_flow_hdr(ip6h, tclass, 0);
+ ip6h->hop_limit = ip6_dst_hoplimit(dst);
+ ip6h->nexthdr = IPPROTO_TCP;
+ ip6h->saddr = oip6h->daddr;
+ ip6h->daddr = oip6h->saddr;
+
+ skb_reset_transport_header(nskb);
+ tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
+ /* Truncate to length (no data) */
+ tcph->doff = sizeof(struct tcphdr)/4;
+ tcph->source = otcph.dest;
+ tcph->dest = otcph.source;
+
+ if (otcph.ack) {
+ needs_ack = 0;
+ tcph->seq = otcph.ack_seq;
+ tcph->ack_seq = 0;
+ } else {
+ needs_ack = 1;
+ tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin
+ + otcplen - (otcph.doff<<2));
+ tcph->seq = 0;
+ }
+
+ /* Reset flags */
+ ((u_int8_t *)tcph)[13] = 0;
+ tcph->rst = 1;
+ tcph->ack = needs_ack;
+ tcph->window = 0;
+ tcph->urg_ptr = 0;
+ tcph->check = 0;
+
+ /* Adjust TCP checksum */
+ tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr,
+ &ipv6_hdr(nskb)->daddr,
+ sizeof(struct tcphdr), IPPROTO_TCP,
+ csum_partial(tcph,
+ sizeof(struct tcphdr), 0));
+
+ nf_ct_attach(nskb, oldskb);
+
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+ /* If we use ip6_local_out for bridged traffic, the MAC source on
+ * the RST will be ours, instead of the destination's. This confuses
+ * some routers/firewalls, and they drop the packet. So we need to
+ * build the eth header using the original destination's MAC as the
+ * source, and send the RST packet directly.
+ */
+ if (oldskb->nf_bridge) {
+ struct ethhdr *oeth = eth_hdr(oldskb);
+ nskb->dev = oldskb->nf_bridge->physindev;
+ nskb->protocol = htons(ETH_P_IPV6);
+ ip6h->payload_len = htons(sizeof(struct tcphdr));
+ if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
+ oeth->h_source, oeth->h_dest, nskb->len) < 0)
+ return;
+ dev_queue_xmit(nskb);
+ } else
+#endif
+ ip6_local_out(nskb);
+}
+EXPORT_SYMBOL_GPL(nf_send_reset6);
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index d189fcb437fe..1c4b75dd425b 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -24,144 +24,53 @@
#include <net/netfilter/nf_nat_l3proto.h>
#include <net/ipv6.h>
-/*
- * IPv6 NAT chains
- */
-
-static unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct nf_conn *ct)
{
- enum ip_conntrack_info ctinfo;
- struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
- struct nf_conn_nat *nat;
- enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
- __be16 frag_off;
- int hdrlen;
- u8 nexthdr;
struct nft_pktinfo pkt;
- unsigned int ret;
-
- if (ct == NULL || nf_ct_is_untracked(ct))
- return NF_ACCEPT;
-
- nat = nf_ct_nat_ext_add(ct);
- if (nat == NULL)
- return NF_ACCEPT;
-
- switch (ctinfo) {
- case IP_CT_RELATED:
- case IP_CT_RELATED + IP_CT_IS_REPLY:
- nexthdr = ipv6_hdr(skb)->nexthdr;
- hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
- &nexthdr, &frag_off);
-
- if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
- if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
- ops->hooknum,
- hdrlen))
- return NF_DROP;
- else
- return NF_ACCEPT;
- }
- /* Fall through */
- case IP_CT_NEW:
- if (nf_nat_initialized(ct, maniptype))
- break;
-
- nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out);
- ret = nft_do_chain(&pkt, ops);
- if (ret != NF_ACCEPT)
- return ret;
- if (!nf_nat_initialized(ct, maniptype)) {
- ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
- if (ret != NF_ACCEPT)
- return ret;
- }
- default:
- break;
- }
+ nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out);
- return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+ return nft_do_chain(&pkt, ops);
}
-static unsigned int nf_nat_ipv6_prerouting(const struct nf_hook_ops *ops,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int nft_nat_ipv6_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- struct in6_addr daddr = ipv6_hdr(skb)->daddr;
- unsigned int ret;
-
- ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
- if (ret != NF_DROP && ret != NF_STOLEN &&
- ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
- skb_dst_drop(skb);
-
- return ret;
+ return nf_nat_ipv6_fn(ops, skb, in, out, nft_nat_do_chain);
}
-static unsigned int nf_nat_ipv6_postrouting(const struct nf_hook_ops *ops,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int nft_nat_ipv6_in(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- enum ip_conntrack_info ctinfo __maybe_unused;
- const struct nf_conn *ct __maybe_unused;
- unsigned int ret;
-
- ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
-#ifdef CONFIG_XFRM
- if (ret != NF_DROP && ret != NF_STOLEN &&
- !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
- (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
- if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
- &ct->tuplehash[!dir].tuple.dst.u3) ||
- (ct->tuplehash[dir].tuple.src.u.all !=
- ct->tuplehash[!dir].tuple.dst.u.all))
- if (nf_xfrm_me_harder(skb, AF_INET6) < 0)
- ret = NF_DROP;
- }
-#endif
- return ret;
+ return nf_nat_ipv6_in(ops, skb, in, out, nft_nat_do_chain);
}
-static unsigned int nf_nat_ipv6_output(const struct nf_hook_ops *ops,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int nft_nat_ipv6_out(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- enum ip_conntrack_info ctinfo;
- const struct nf_conn *ct;
- unsigned int ret;
-
- ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
- if (ret != NF_DROP && ret != NF_STOLEN &&
- (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ return nf_nat_ipv6_out(ops, skb, in, out, nft_nat_do_chain);
+}
- if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
- &ct->tuplehash[!dir].tuple.src.u3)) {
- if (ip6_route_me_harder(skb))
- ret = NF_DROP;
- }
-#ifdef CONFIG_XFRM
- else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
- ct->tuplehash[dir].tuple.dst.u.all !=
- ct->tuplehash[!dir].tuple.src.u.all)
- if (nf_xfrm_me_harder(skb, AF_INET6))
- ret = NF_DROP;
-#endif
- }
- return ret;
+static unsigned int nft_nat_ipv6_local_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return nf_nat_ipv6_local_fn(ops, skb, in, out, nft_nat_do_chain);
}
static const struct nf_chain_type nft_chain_nat_ipv6 = {
@@ -174,10 +83,10 @@ static const struct nf_chain_type nft_chain_nat_ipv6 = {
(1 << NF_INET_LOCAL_OUT) |
(1 << NF_INET_LOCAL_IN),
.hooks = {
- [NF_INET_PRE_ROUTING] = nf_nat_ipv6_prerouting,
- [NF_INET_POST_ROUTING] = nf_nat_ipv6_postrouting,
- [NF_INET_LOCAL_OUT] = nf_nat_ipv6_output,
- [NF_INET_LOCAL_IN] = nf_nat_ipv6_fn,
+ [NF_INET_PRE_ROUTING] = nft_nat_ipv6_in,
+ [NF_INET_POST_ROUTING] = nft_nat_ipv6_out,
+ [NF_INET_LOCAL_OUT] = nft_nat_ipv6_local_fn,
+ [NF_INET_LOCAL_IN] = nft_nat_ipv6_fn,
},
};
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c
new file mode 100644
index 000000000000..556262f40761
--- /dev/null
+++ b/net/ipv6/netfilter/nft_masq_ipv6.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nft_masq.h>
+#include <net/netfilter/ipv6/nf_nat_masquerade.h>
+
+static void nft_masq_ipv6_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_masq *priv = nft_expr_priv(expr);
+ struct nf_nat_range range;
+ unsigned int verdict;
+
+ range.flags = priv->flags;
+
+ verdict = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out);
+
+ data[NFT_REG_VERDICT].verdict = verdict;
+}
+
+static struct nft_expr_type nft_masq_ipv6_type;
+static const struct nft_expr_ops nft_masq_ipv6_ops = {
+ .type = &nft_masq_ipv6_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)),
+ .eval = nft_masq_ipv6_eval,
+ .init = nft_masq_init,
+ .dump = nft_masq_dump,
+};
+
+static struct nft_expr_type nft_masq_ipv6_type __read_mostly = {
+ .family = NFPROTO_IPV6,
+ .name = "masq",
+ .ops = &nft_masq_ipv6_ops,
+ .policy = nft_masq_policy,
+ .maxattr = NFTA_MASQ_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_masq_ipv6_module_init(void)
+{
+ int ret;
+
+ ret = nft_register_expr(&nft_masq_ipv6_type);
+ if (ret < 0)
+ return ret;
+
+ nf_nat_masquerade_ipv6_register_notifier();
+
+ return ret;
+}
+
+static void __exit nft_masq_ipv6_module_exit(void)
+{
+ nft_unregister_expr(&nft_masq_ipv6_type);
+ nf_nat_masquerade_ipv6_unregister_notifier();
+}
+
+module_init(nft_masq_ipv6_module_init);
+module_exit(nft_masq_ipv6_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "masq");
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 5ec867e4a8b7..fc24c390af05 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -35,7 +35,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
if (found_rhdr)
return offset;
break;
- default :
+ default:
return offset;
}
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 3317440ea341..1752cd0b4882 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -8,7 +8,7 @@
* except it reports the sockets in the INET6 address family.
*
* Authors: David S. Miller (davem@caip.rutgers.edu)
- * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+ * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -33,6 +33,7 @@
static int sockstat6_seq_show(struct seq_file *seq, void *v)
{
struct net *net = seq->private;
+ unsigned int frag_mem = ip6_frag_mem(net);
seq_printf(seq, "TCP6: inuse %d\n",
sock_prot_inuse_get(net, &tcpv6_prot));
@@ -42,8 +43,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
sock_prot_inuse_get(net, &udplitev6_prot));
seq_printf(seq, "RAW6: inuse %d\n",
sock_prot_inuse_get(net, &rawv6_prot));
- seq_printf(seq, "FRAG6: inuse %d memory %d\n",
- ip6_frag_nqueues(net), ip6_frag_mem(net));
+ seq_printf(seq, "FRAG6: inuse %u memory %u\n", !!frag_mem, frag_mem);
return 0;
}
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index e048cf1bb6a2..e3770abe688a 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -51,6 +51,7 @@ EXPORT_SYMBOL(inet6_del_protocol);
#endif
const struct net_offload __rcu *inet6_offloads[MAX_INET_PROTOS] __read_mostly;
+EXPORT_SYMBOL(inet6_offloads);
int inet6_add_offload(const struct net_offload *prot, unsigned char protocol)
{
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index b2dc60b0c764..896af8807979 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -176,7 +176,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
goto out;
net = dev_net(skb->dev);
- sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, IP6CB(skb)->iif);
+ sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, inet6_iif(skb));
while (sk) {
int filtered;
@@ -220,7 +220,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
}
}
sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr,
- IP6CB(skb)->iif);
+ inet6_iif(skb));
}
out:
read_unlock(&raw_v6_hashinfo.lock);
@@ -375,7 +375,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
net = dev_net(skb->dev);
while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr,
- IP6CB(skb)->iif))) {
+ inet6_iif(skb)))) {
rawv6_err(sk, skb, NULL, type, code,
inner_offset, info);
sk = sk_next(sk);
@@ -506,7 +506,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
sin6->sin6_addr = ipv6_hdr(skb)->saddr;
sin6->sin6_flowinfo = 0;
sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr,
- IP6CB(skb)->iif);
+ inet6_iif(skb));
*addr_len = sizeof(*sin6);
}
@@ -588,8 +588,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
}
offset += skb_transport_offset(skb);
- if (skb_copy_bits(skb, offset, &csum, 2))
- BUG();
+ BUG_ON(skb_copy_bits(skb, offset, &csum, 2));
/* in case cksum was not initialized */
if (unlikely(csum))
@@ -601,8 +600,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
if (csum == 0 && fl6->flowi6_proto == IPPROTO_UDP)
csum = CSUM_MANGLED_0;
- if (skb_store_bits(skb, offset, &csum, 2))
- BUG();
+ BUG_ON(skb_store_bits(skb, offset, &csum, 2));
send:
err = ip6_push_pending_frames(sk);
@@ -891,7 +889,7 @@ back_from_confirm:
else {
lock_sock(sk);
err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov,
- len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info*)dst,
+ len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info *)dst,
msg->msg_flags, dontfrag);
if (err)
@@ -904,7 +902,7 @@ done:
dst_release(dst);
out:
fl6_sock_release(flowlabel);
- return err<0?err:len;
+ return err < 0 ? err : len;
do_confirm:
dst_confirm(dst);
if (!(msg->msg_flags & MSG_PROBE) || len)
@@ -1047,7 +1045,7 @@ static int do_rawv6_getsockopt(struct sock *sk, int level, int optname,
struct raw6_sock *rp = raw6_sk(sk);
int val, len;
- if (get_user(len,optlen))
+ if (get_user(len, optlen))
return -EFAULT;
switch (optname) {
@@ -1071,7 +1069,7 @@ static int do_rawv6_getsockopt(struct sock *sk, int level, int optname,
if (put_user(len, optlen))
return -EFAULT;
- if (copy_to_user(optval,&val,len))
+ if (copy_to_user(optval, &val, len))
return -EFAULT;
return 0;
}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index cc85a9ba5010..1a157ca2ebc1 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -60,13 +60,14 @@
#include <net/inet_frag.h>
#include <net/inet_ecn.h>
-struct ip6frag_skb_cb
-{
+static const char ip6_frag_cache_name[] = "ip6-frags";
+
+struct ip6frag_skb_cb {
struct inet6_skb_parm h;
int offset;
};
-#define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb))
+#define FRAG6_CB(skb) ((struct ip6frag_skb_cb *)((skb)->cb))
static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
{
@@ -85,27 +86,23 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
const struct in6_addr *daddr)
{
- u32 c;
-
net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd));
- c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
- (__force u32)id, ip6_frags.rnd);
-
- return c & (INETFRAGS_HASHSZ - 1);
+ return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
+ (__force u32)id, ip6_frags.rnd);
}
-static unsigned int ip6_hashfn(struct inet_frag_queue *q)
+static unsigned int ip6_hashfn(const struct inet_frag_queue *q)
{
- struct frag_queue *fq;
+ const struct frag_queue *fq;
fq = container_of(q, struct frag_queue, q);
return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr);
}
-bool ip6_frag_match(struct inet_frag_queue *q, void *a)
+bool ip6_frag_match(const struct inet_frag_queue *q, const void *a)
{
- struct frag_queue *fq;
- struct ip6_create_arg *arg = a;
+ const struct frag_queue *fq;
+ const struct ip6_create_arg *arg = a;
fq = container_of(q, struct frag_queue, q);
return fq->id == arg->id &&
@@ -115,10 +112,10 @@ bool ip6_frag_match(struct inet_frag_queue *q, void *a)
}
EXPORT_SYMBOL(ip6_frag_match);
-void ip6_frag_init(struct inet_frag_queue *q, void *a)
+void ip6_frag_init(struct inet_frag_queue *q, const void *a)
{
struct frag_queue *fq = container_of(q, struct frag_queue, q);
- struct ip6_create_arg *arg = a;
+ const struct ip6_create_arg *arg = a;
fq->id = arg->id;
fq->user = arg->user;
@@ -135,7 +132,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
spin_lock(&fq->q.lock);
- if (fq->q.last_in & INET_FRAG_COMPLETE)
+ if (fq->q.flags & INET_FRAG_COMPLETE)
goto out;
inet_frag_kill(&fq->q, frags);
@@ -145,17 +142,20 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
if (!dev)
goto out_rcu_unlock;
- IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
+ if (fq->q.flags & INET_FRAG_EVICTED)
+ goto out_rcu_unlock;
+
+ IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
+
/* Don't send error if the first segment did not arrive. */
- if (!(fq->q.last_in & INET_FRAG_FIRST_IN) || !fq->q.fragments)
+ if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments)
goto out_rcu_unlock;
- /*
- But use as source device on which LAST ARRIVED
- segment was received. And do not use fq->dev
- pointer directly, device might already disappeared.
+ /* But use as source device on which LAST ARRIVED
+ * segment was received. And do not use fq->dev
+ * pointer directly, device might already disappeared.
*/
fq->q.fragments->dev = dev;
icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
@@ -192,7 +192,6 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src,
arg.dst = dst;
arg.ecn = ecn;
- read_lock(&ip6_frags.lock);
hash = inet6_hash_frag(id, src, dst);
q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
@@ -212,7 +211,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
struct net *net = dev_net(skb_dst(skb)->dev);
u8 ecn;
- if (fq->q.last_in & INET_FRAG_COMPLETE)
+ if (fq->q.flags & INET_FRAG_COMPLETE)
goto err;
offset = ntohs(fhdr->frag_off) & ~0x7;
@@ -243,9 +242,9 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
* or have different end, the segment is corrupted.
*/
if (end < fq->q.len ||
- ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len))
+ ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len))
goto err;
- fq->q.last_in |= INET_FRAG_LAST_IN;
+ fq->q.flags |= INET_FRAG_LAST_IN;
fq->q.len = end;
} else {
/* Check if the fragment is rounded to 8 bytes.
@@ -263,7 +262,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
}
if (end > fq->q.len) {
/* Some bits beyond end -> corruption. */
- if (fq->q.last_in & INET_FRAG_LAST_IN)
+ if (fq->q.flags & INET_FRAG_LAST_IN)
goto err;
fq->q.len = end;
}
@@ -289,7 +288,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
goto found;
}
prev = NULL;
- for(next = fq->q.fragments; next != NULL; next = next->next) {
+ for (next = fq->q.fragments; next != NULL; next = next->next) {
if (FRAG6_CB(next)->offset >= offset)
break; /* bingo! */
prev = next;
@@ -338,10 +337,10 @@ found:
*/
if (offset == 0) {
fq->nhoffset = nhoff;
- fq->q.last_in |= INET_FRAG_FIRST_IN;
+ fq->q.flags |= INET_FRAG_FIRST_IN;
}
- if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
+ if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
fq->q.meat == fq->q.len) {
int res;
unsigned long orefdst = skb->_skb_refdst;
@@ -353,14 +352,13 @@ found:
}
skb_dst_drop(skb);
- inet_frag_lru_move(&fq->q);
return -1;
discard_fq:
inet_frag_kill(&fq->q, &ip6_frags);
err:
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_REASMFAILS);
+ IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_REASMFAILS);
kfree_skb(skb);
return -1;
}
@@ -523,7 +521,6 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
struct frag_queue *fq;
const struct ipv6hdr *hdr = ipv6_hdr(skb);
struct net *net = dev_net(skb_dst(skb)->dev);
- int evicted;
if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
goto fail_hdr;
@@ -531,7 +528,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
/* Jumbo payload inhibits frag. header */
- if (hdr->payload_len==0)
+ if (hdr->payload_len == 0)
goto fail_hdr;
if (!pskb_may_pull(skb, (skb_transport_offset(skb) +
@@ -552,11 +549,6 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
return 1;
}
- evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags, false);
- if (evicted)
- IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_REASMFAILS, evicted);
-
fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
ip6_frag_ecn(hdr));
if (fq != NULL) {
@@ -576,32 +568,37 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
return -1;
fail_hdr:
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb));
return -1;
}
-static const struct inet6_protocol frag_protocol =
-{
+static const struct inet6_protocol frag_protocol = {
.handler = ipv6_frag_rcv,
.flags = INET6_PROTO_NOPOLICY,
};
#ifdef CONFIG_SYSCTL
+static int zero;
+
static struct ctl_table ip6_frags_ns_ctl_table[] = {
{
.procname = "ip6frag_high_thresh",
.data = &init_net.ipv6.frags.high_thresh,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &init_net.ipv6.frags.low_thresh
},
{
.procname = "ip6frag_low_thresh",
.data = &init_net.ipv6.frags.low_thresh,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &init_net.ipv6.frags.high_thresh
},
{
.procname = "ip6frag_time",
@@ -613,10 +610,12 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = {
{ }
};
+/* secret interval has been deprecated */
+static int ip6_frags_secret_interval_unused;
static struct ctl_table ip6_frags_ctl_table[] = {
{
.procname = "ip6frag_secret_interval",
- .data = &ip6_frags.secret_interval,
+ .data = &ip6_frags_secret_interval_unused,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
@@ -636,7 +635,10 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
goto err_alloc;
table[0].data = &net->ipv6.frags.high_thresh;
+ table[0].extra1 = &net->ipv6.frags.low_thresh;
+ table[0].extra2 = &init_net.ipv6.frags.high_thresh;
table[1].data = &net->ipv6.frags.low_thresh;
+ table[1].extra2 = &net->ipv6.frags.high_thresh;
table[2].data = &net->ipv6.frags.timeout;
/* Don't export sysctls to unprivileged users */
@@ -746,8 +748,10 @@ int __init ipv6_frag_init(void)
ip6_frags.qsize = sizeof(struct frag_queue);
ip6_frags.match = ip6_frag_match;
ip6_frags.frag_expire = ip6_frag_expire;
- ip6_frags.secret_interval = 10 * 60 * HZ;
- inet_frags_init(&ip6_frags);
+ ip6_frags.frags_cache_name = ip6_frag_cache_name;
+ ret = inet_frags_init(&ip6_frags);
+ if (ret)
+ goto err_pernet;
out:
return ret;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f23fbd28a501..a318dd89b6d9 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -314,7 +314,6 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
- rt->rt6i_genid = rt_genid_ipv6(net);
INIT_LIST_HEAD(&rt->rt6i_siblings);
}
return rt;
@@ -813,7 +812,7 @@ out:
}
-struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
+struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
int flags)
{
return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
@@ -843,7 +842,6 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
return NULL;
}
-
EXPORT_SYMBOL(rt6_lookup);
/* ip6_ins_rt is called with FREE table->tb6_lock.
@@ -1024,7 +1022,7 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table
return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
}
-struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
+struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
struct flowi6 *fl6)
{
int flags = 0;
@@ -1041,7 +1039,6 @@ struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
}
-
EXPORT_SYMBOL(ip6_route_output);
struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
@@ -1098,9 +1095,6 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
* DST_OBSOLETE_FORCE_CHK which forces validation calls down
* into this function always.
*/
- if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev)))
- return NULL;
-
if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
return NULL;
@@ -1149,7 +1143,7 @@ static void ip6_link_failure(struct sk_buff *skb)
static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu)
{
- struct rt6_info *rt6 = (struct rt6_info*)dst;
+ struct rt6_info *rt6 = (struct rt6_info *)dst;
dst_confirm(dst);
if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
@@ -1924,7 +1918,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
return NULL;
read_lock_bh(&table->tb6_lock);
- fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
+ fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
if (!fn)
goto out;
@@ -1983,7 +1977,7 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev
return NULL;
read_lock_bh(&table->tb6_lock);
- for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
+ for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
if (dev == rt->dst.dev &&
((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
ipv6_addr_equal(&rt->rt6i_gateway, addr))
@@ -2068,7 +2062,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
struct in6_rtmsg rtmsg;
int err;
- switch(cmd) {
+ switch (cmd) {
case SIOCADDRT: /* Add a route */
case SIOCDELRT: /* Delete a route */
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
@@ -2191,7 +2185,7 @@ int ip6_route_get_saddr(struct net *net,
unsigned int prefs,
struct in6_addr *saddr)
{
- struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
+ struct inet6_dev *idev = ip6_dst_idev((struct dst_entry *)rt);
int err = 0;
if (rt->rt6i_prefsrc.plen)
*saddr = rt->rt6i_prefsrc.addr;
@@ -2486,7 +2480,7 @@ beginning:
return last_err;
}
-static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
+static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct fib6_config cfg;
int err;
@@ -2501,7 +2495,7 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
return ip6_route_del(&cfg);
}
-static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
+static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct fib6_config cfg;
int err;
@@ -2693,7 +2687,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
prefix, 0, NLM_F_MULTI);
}
-static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
+static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(in_skb->sk);
struct nlattr *tb[RTA_MAX+1];
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 4f408176dc64..6eab37cf5345 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -101,19 +101,19 @@ static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net,
for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
- (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
+ (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) {
if (remote == t->parms.iph.daddr &&
- (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
+ (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) {
if (local == t->parms.iph.saddr &&
- (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
+ (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
@@ -250,7 +250,8 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
else
strcpy(name, "sit%d");
- dev = alloc_netdev(sizeof(*t), name, ipip6_tunnel_setup);
+ dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
+ ipip6_tunnel_setup);
if (dev == NULL)
return NULL;
@@ -811,9 +812,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
const struct ipv6hdr *iph6 = ipv6_hdr(skb);
u8 tos = tunnel->parms.iph.tos;
__be16 df = tiph->frag_off;
- struct rtable *rt; /* Route to the other host */
- struct net_device *tdev; /* Device to other host */
- unsigned int max_headroom; /* The extra header space needed */
+ struct rtable *rt; /* Route to the other host */
+ struct net_device *tdev; /* Device to other host */
+ unsigned int max_headroom; /* The extra header space needed */
__be32 dst = tiph->daddr;
struct flowi4 fl4;
int mtu;
@@ -821,6 +822,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
int addr_type;
u8 ttl;
int err;
+ u8 protocol = IPPROTO_IPV6;
+ int t_hlen = tunnel->hlen + sizeof(struct iphdr);
if (skb->protocol != htons(ETH_P_IPV6))
goto tx_error;
@@ -910,8 +913,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
goto tx_error;
}
+ skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT);
+ if (IS_ERR(skb)) {
+ ip_rt_put(rt);
+ goto out;
+ }
+
if (df) {
- mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
+ mtu = dst_mtu(&rt->dst) - t_hlen;
if (mtu < 68) {
dev->stats.collisions++;
@@ -946,7 +955,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
/*
* Okay, now see if we can stuff it in the buffer as-is.
*/
- max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr);
+ max_headroom = LL_RESERVED_SPACE(tdev) + t_hlen;
if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
(skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
@@ -968,14 +977,15 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
ttl = iph6->hop_limit;
tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
- skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT);
- if (IS_ERR(skb)) {
+ if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) {
ip_rt_put(rt);
- goto out;
+ goto tx_error;
}
+ skb_set_inner_ipproto(skb, IPPROTO_IPV6);
+
err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr,
- IPPROTO_IPV6, tos, ttl, df,
+ protocol, tos, ttl, df,
!net_eq(tunnel->net, dev_net(dev)));
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
return NETDEV_TX_OK;
@@ -998,6 +1008,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (IS_ERR(skb))
goto out;
+ skb_set_inner_ipproto(skb, IPPROTO_IPIP);
+
ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP);
return NETDEV_TX_OK;
out:
@@ -1058,8 +1070,10 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
if (tdev) {
+ int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+
dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
- dev->mtu = tdev->mtu - sizeof(struct iphdr);
+ dev->mtu = tdev->mtu - t_hlen;
if (dev->mtu < IPV6_MIN_MTU)
dev->mtu = IPV6_MIN_MTU;
}
@@ -1122,7 +1136,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
#endif
static int
-ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
+ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
int err = 0;
struct ip_tunnel_parm p;
@@ -1306,7 +1320,10 @@ done:
static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu)
{
- if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - sizeof(struct iphdr))
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+
+ if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - t_hlen)
return -EINVAL;
dev->mtu = new_mtu;
return 0;
@@ -1337,14 +1354,17 @@ static void ipip6_dev_free(struct net_device *dev)
static void ipip6_tunnel_setup(struct net_device *dev)
{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+
dev->netdev_ops = &ipip6_netdev_ops;
- dev->destructor = ipip6_dev_free;
+ dev->destructor = ipip6_dev_free;
dev->type = ARPHRD_SIT;
- dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
- dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
+ dev->hard_header_len = LL_MAX_HEADER + t_hlen;
+ dev->mtu = ETH_DATA_LEN - t_hlen;
dev->flags = IFF_NOARP;
- dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+ netif_keep_dst(dev);
dev->iflink = 0;
dev->addr_len = 4;
dev->features |= NETIF_F_LLTX;
@@ -1465,6 +1485,40 @@ static void ipip6_netlink_parms(struct nlattr *data[],
}
+/* This function returns true when ENCAP attributes are present in the nl msg */
+static bool ipip6_netlink_encap_parms(struct nlattr *data[],
+ struct ip_tunnel_encap *ipencap)
+{
+ bool ret = false;
+
+ memset(ipencap, 0, sizeof(*ipencap));
+
+ if (!data)
+ return ret;
+
+ if (data[IFLA_IPTUN_ENCAP_TYPE]) {
+ ret = true;
+ ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
+ }
+
+ if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
+ ret = true;
+ ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
+ }
+
+ if (data[IFLA_IPTUN_ENCAP_SPORT]) {
+ ret = true;
+ ipencap->sport = nla_get_u16(data[IFLA_IPTUN_ENCAP_SPORT]);
+ }
+
+ if (data[IFLA_IPTUN_ENCAP_DPORT]) {
+ ret = true;
+ ipencap->dport = nla_get_u16(data[IFLA_IPTUN_ENCAP_DPORT]);
+ }
+
+ return ret;
+}
+
#ifdef CONFIG_IPV6_SIT_6RD
/* This function returns true when 6RD attributes are present in the nl msg */
static bool ipip6_netlink_6rd_parms(struct nlattr *data[],
@@ -1508,12 +1562,20 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev,
{
struct net *net = dev_net(dev);
struct ip_tunnel *nt;
+ struct ip_tunnel_encap ipencap;
#ifdef CONFIG_IPV6_SIT_6RD
struct ip_tunnel_6rd ip6rd;
#endif
int err;
nt = netdev_priv(dev);
+
+ if (ipip6_netlink_encap_parms(data, &ipencap)) {
+ err = ip_tunnel_encap_setup(nt, &ipencap);
+ if (err < 0)
+ return err;
+ }
+
ipip6_netlink_parms(data, &nt->parms);
if (ipip6_tunnel_locate(net, &nt->parms, 0))
@@ -1536,15 +1598,23 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[],
{
struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_parm p;
+ struct ip_tunnel_encap ipencap;
struct net *net = t->net;
struct sit_net *sitn = net_generic(net, sit_net_id);
#ifdef CONFIG_IPV6_SIT_6RD
struct ip_tunnel_6rd ip6rd;
#endif
+ int err;
if (dev == sitn->fb_tunnel_dev)
return -EINVAL;
+ if (ipip6_netlink_encap_parms(data, &ipencap)) {
+ err = ip_tunnel_encap_setup(t, &ipencap);
+ if (err < 0)
+ return err;
+ }
+
ipip6_netlink_parms(data, &p);
if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
@@ -1598,6 +1668,14 @@ static size_t ipip6_get_size(const struct net_device *dev)
/* IFLA_IPTUN_6RD_RELAY_PREFIXLEN */
nla_total_size(2) +
#endif
+ /* IFLA_IPTUN_ENCAP_TYPE */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_ENCAP_FLAGS */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_ENCAP_SPORT */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_ENCAP_DPORT */
+ nla_total_size(2) +
0;
}
@@ -1629,6 +1707,16 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
goto nla_put_failure;
#endif
+ if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
+ tunnel->encap.type) ||
+ nla_put_u16(skb, IFLA_IPTUN_ENCAP_SPORT,
+ tunnel->encap.sport) ||
+ nla_put_u16(skb, IFLA_IPTUN_ENCAP_DPORT,
+ tunnel->encap.dport) ||
+ nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
+ tunnel->encap.dport))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -1650,6 +1738,10 @@ static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_6RD_PREFIXLEN] = { .type = NLA_U16 },
[IFLA_IPTUN_6RD_RELAY_PREFIXLEN] = { .type = NLA_U16 },
#endif
+ [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 },
+ [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
+ [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
+ [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
};
static void ipip6_dellink(struct net_device *dev, struct list_head *head)
@@ -1729,6 +1821,7 @@ static int __net_init sit_init_net(struct net *net)
sitn->tunnels[3] = sitn->tunnels_r_l;
sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0",
+ NET_NAME_UNKNOWN,
ipip6_tunnel_setup);
if (!sitn->fb_tunnel_dev) {
err = -ENOMEM;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index a822b880689b..9a2838e93cc5 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -24,7 +24,7 @@
#define COOKIEBITS 24 /* Upper bits store count */
#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
-static u32 syncookie6_secret[2][16-4+SHA_DIGEST_WORDS];
+static u32 syncookie6_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly;
/* RFC 2460, Section 8.3:
* [ipv6 tcp] MSS must be computed as the maximum packet size minus 60 [..]
@@ -187,7 +187,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
goto out;
ret = NULL;
- req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
+ req = inet_reqsk_alloc(&tcp6_request_sock_ops);
if (!req)
goto out;
@@ -203,7 +203,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ireq->ir_num = ntohs(th->dest);
ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
- if (ipv6_opt_accepted(sk, skb) ||
+ if (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
atomic_inc(&skb->users);
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 058f3eca2e53..c5c10fafcfe2 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -16,6 +16,8 @@
#include <net/addrconf.h>
#include <net/inet_frag.h>
+static int one = 1;
+
static struct ctl_table ipv6_table_template[] = {
{
.procname = "bindv6only",
@@ -39,6 +41,13 @@ static struct ctl_table ipv6_table_template[] = {
.proc_handler = proc_dointvec
},
{
+ .procname = "auto_flowlabels",
+ .data = &init_net.ipv6.sysctl.auto_flowlabels,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
.procname = "fwmark_reflect",
.data = &init_net.ipv6.sysctl.fwmark_reflect,
.maxlen = sizeof(int),
@@ -56,6 +65,14 @@ static struct ctl_table ipv6_rotable[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "mld_qrv",
+ .data = &sysctl_mld_qrv,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one
+ },
{ }
};
@@ -74,6 +91,8 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
ipv6_table[0].data = &net->ipv6.sysctl.bindv6only;
ipv6_table[1].data = &net->ipv6.sysctl.anycast_src_echo_reply;
ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency;
+ ipv6_table[3].data = &net->ipv6.sysctl.auto_flowlabels;
+ ipv6_table[4].data = &net->ipv6.sysctl.fwmark_reflect;
ipv6_route_table = ipv6_route_sysctl_init(net);
if (!ipv6_route_table)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 229239ad96b1..cf2e45ab2fa4 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -59,7 +59,6 @@
#include <net/snmp.h>
#include <net/dsfield.h>
#include <net/timewait_sock.h>
-#include <net/netdma.h>
#include <net/inet_common.h>
#include <net/secure_seq.h>
#include <net/tcp_memcontrol.h>
@@ -93,13 +92,16 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- const struct rt6_info *rt = (const struct rt6_info *)dst;
- dst_hold(dst);
- sk->sk_rx_dst = dst;
- inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
- if (rt->rt6i_node)
- inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
+ if (dst) {
+ const struct rt6_info *rt = (const struct rt6_info *)dst;
+
+ dst_hold(dst);
+ sk->sk_rx_dst = dst;
+ inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
+ if (rt->rt6i_node)
+ inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
+ }
}
static void tcp_v6_hash(struct sock *sk)
@@ -198,6 +200,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sk->sk_v6_daddr = usin->sin6_addr;
np->flow_label = fl6.flowlabel;
+ ip6_set_txhash(sk);
+
/*
* TCP over IPv4
*/
@@ -470,13 +474,14 @@ out:
static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
- struct flowi6 *fl6,
+ struct flowi *fl,
struct request_sock *req,
u16 queue_mapping,
struct tcp_fastopen_cookie *foc)
{
struct inet_request_sock *ireq = inet_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
+ struct flowi6 *fl6 = &fl->u.ip6;
struct sk_buff *skb;
int err = -ENOMEM;
@@ -503,18 +508,6 @@ done:
return err;
}
-static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)
-{
- struct flowi6 fl6;
- int res;
-
- res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0, NULL);
- if (!res) {
- TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
- }
- return res;
-}
static void tcp_v6_reqsk_destructor(struct request_sock *req)
{
@@ -676,7 +669,8 @@ clear_hash_noput:
return 1;
}
-static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
+static int __tcp_v6_inbound_md5_hash(struct sock *sk,
+ const struct sk_buff *skb)
{
const __u8 *hash_location = NULL;
struct tcp_md5sig_key *hash_expected;
@@ -716,24 +710,81 @@ static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
}
return 0;
}
+
+static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
+{
+ int ret;
+
+ rcu_read_lock();
+ ret = __tcp_v6_inbound_md5_hash(sk, skb);
+ rcu_read_unlock();
+
+ return ret;
+}
+
#endif
+static void tcp_v6_init_req(struct request_sock *req, struct sock *sk,
+ struct sk_buff *skb)
+{
+ struct inet_request_sock *ireq = inet_rsk(req);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
+ ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
+
+ ireq->ir_iif = sk->sk_bound_dev_if;
+
+ /* So that link locals have meaning */
+ if (!sk->sk_bound_dev_if &&
+ ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
+ ireq->ir_iif = inet6_iif(skb);
+
+ if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
+ (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) ||
+ np->rxopt.bits.rxinfo ||
+ np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
+ np->rxopt.bits.rxohlim || np->repflow)) {
+ atomic_inc(&skb->users);
+ ireq->pktopts = skb;
+ }
+}
+
+static struct dst_entry *tcp_v6_route_req(struct sock *sk, struct flowi *fl,
+ const struct request_sock *req,
+ bool *strict)
+{
+ if (strict)
+ *strict = true;
+ return inet6_csk_route_req(sk, &fl->u.ip6, req);
+}
+
struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
.family = AF_INET6,
.obj_size = sizeof(struct tcp6_request_sock),
- .rtx_syn_ack = tcp_v6_rtx_synack,
+ .rtx_syn_ack = tcp_rtx_synack,
.send_ack = tcp_v6_reqsk_send_ack,
.destructor = tcp_v6_reqsk_destructor,
.send_reset = tcp_v6_send_reset,
.syn_ack_timeout = tcp_syn_ack_timeout,
};
-#ifdef CONFIG_TCP_MD5SIG
static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
+ .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
+ sizeof(struct ipv6hdr),
+#ifdef CONFIG_TCP_MD5SIG
.md5_lookup = tcp_v6_reqsk_md5_lookup,
.calc_md5_hash = tcp_v6_md5_hash_skb,
-};
#endif
+ .init_req = tcp_v6_init_req,
+#ifdef CONFIG_SYN_COOKIES
+ .cookie_init_seq = cookie_v6_init_sequence,
+#endif
+ .route_req = tcp_v6_route_req,
+ .init_seq = tcp_v6_init_sequence,
+ .send_synack = tcp_v6_send_synack,
+ .queue_hash_add = inet6_csk_reqsk_queue_hash_add,
+};
static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
u32 tsval, u32 tsecr, int oif,
@@ -973,153 +1024,17 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)
return sk;
}
-/* FIXME: this is substantially similar to the ipv4 code.
- * Can some kind of merge be done? -- erics
- */
static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
- struct tcp_options_received tmp_opt;
- struct request_sock *req;
- struct inet_request_sock *ireq;
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct tcp_sock *tp = tcp_sk(sk);
- __u32 isn = TCP_SKB_CB(skb)->when;
- struct dst_entry *dst = NULL;
- struct tcp_fastopen_cookie foc = { .len = -1 };
- bool want_cookie = false, fastopen;
- struct flowi6 fl6;
- int err;
-
if (skb->protocol == htons(ETH_P_IP))
return tcp_v4_conn_request(sk, skb);
if (!ipv6_unicast_destination(skb))
goto drop;
- if ((sysctl_tcp_syncookies == 2 ||
- inet_csk_reqsk_queue_is_full(sk)) && !isn) {
- want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
- if (!want_cookie)
- goto drop;
- }
+ return tcp_conn_request(&tcp6_request_sock_ops,
+ &tcp_request_sock_ipv6_ops, sk, skb);
- if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
- goto drop;
- }
-
- req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
- if (req == NULL)
- goto drop;
-
-#ifdef CONFIG_TCP_MD5SIG
- tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
-#endif
-
- tcp_clear_options(&tmp_opt);
- tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
- tmp_opt.user_mss = tp->rx_opt.user_mss;
- tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
-
- if (want_cookie && !tmp_opt.saw_tstamp)
- tcp_clear_options(&tmp_opt);
-
- tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
- tcp_openreq_init(req, &tmp_opt, skb);
-
- ireq = inet_rsk(req);
- ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
- ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
- if (!want_cookie || tmp_opt.tstamp_ok)
- TCP_ECN_create_request(req, skb, sock_net(sk));
-
- ireq->ir_iif = sk->sk_bound_dev_if;
- ireq->ir_mark = inet_request_mark(sk, skb);
-
- /* So that link locals have meaning */
- if (!sk->sk_bound_dev_if &&
- ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
- ireq->ir_iif = inet6_iif(skb);
-
- if (!isn) {
- if (ipv6_opt_accepted(sk, skb) ||
- np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
- np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim ||
- np->repflow) {
- atomic_inc(&skb->users);
- ireq->pktopts = skb;
- }
-
- if (want_cookie) {
- isn = cookie_v6_init_sequence(sk, skb, &req->mss);
- req->cookie_ts = tmp_opt.tstamp_ok;
- goto have_isn;
- }
-
- /* VJ's idea. We save last timestamp seen
- * from the destination in peer table, when entering
- * state TIME-WAIT, and check against it before
- * accepting new connection request.
- *
- * If "isn" is not zero, this request hit alive
- * timewait bucket, so that all the necessary checks
- * are made in the function processing timewait state.
- */
- if (tmp_opt.saw_tstamp &&
- tcp_death_row.sysctl_tw_recycle &&
- (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) {
- if (!tcp_peer_is_proven(req, dst, true)) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
- goto drop_and_release;
- }
- }
- /* Kill the following clause, if you dislike this way. */
- else if (!sysctl_tcp_syncookies &&
- (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
- (sysctl_max_syn_backlog >> 2)) &&
- !tcp_peer_is_proven(req, dst, false)) {
- /* Without syncookies last quarter of
- * backlog is filled with destinations,
- * proven to be alive.
- * It means that we continue to communicate
- * to destinations, already remembered
- * to the moment of synflood.
- */
- LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
- &ireq->ir_v6_rmt_addr, ntohs(tcp_hdr(skb)->source));
- goto drop_and_release;
- }
-
- isn = tcp_v6_init_sequence(skb);
- }
-have_isn:
-
- if (security_inet_conn_request(sk, skb, req))
- goto drop_and_release;
-
- if (!dst && (dst = inet6_csk_route_req(sk, &fl6, req)) == NULL)
- goto drop_and_free;
-
- tcp_rsk(req)->snt_isn = isn;
- tcp_rsk(req)->snt_synack = tcp_time_stamp;
- tcp_openreq_init_rwin(req, sk, dst);
- fastopen = !want_cookie &&
- tcp_try_fastopen(sk, skb, req, &foc, dst);
- err = tcp_v6_send_synack(sk, dst, &fl6, req,
- skb_get_queue_mapping(skb), &foc);
- if (!fastopen) {
- if (err || want_cookie)
- goto drop_and_free;
-
- tcp_rsk(req)->listener = NULL;
- inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
- }
- return 0;
-
-drop_and_release:
- dst_release(dst);
-drop_and_free:
- reqsk_free(req);
drop:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
return 0; /* don't send reset */
@@ -1235,6 +1150,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
newsk->sk_bound_dev_if = ireq->ir_iif;
+ ip6_set_txhash(newsk);
+
/* Now IPv6 options...
First: no IPv4 options.
@@ -1346,11 +1263,6 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (skb->protocol == htons(ETH_P_IP))
return tcp_v4_do_rcv(sk, skb);
-#ifdef CONFIG_TCP_MD5SIG
- if (tcp_v6_inbound_md5_hash(sk, skb))
- goto discard;
-#endif
-
if (sk_filter(sk, skb))
goto discard;
@@ -1455,7 +1367,7 @@ ipv6_pktoptions:
np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
if (np->repflow)
np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
- if (ipv6_opt_accepted(sk, opt_skb)) {
+ if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
skb_set_owner_r(opt_skb, sk);
opt_skb = xchg(&np->pktoptions, opt_skb);
} else {
@@ -1499,11 +1411,19 @@ static int tcp_v6_rcv(struct sk_buff *skb)
th = tcp_hdr(skb);
hdr = ipv6_hdr(skb);
+ /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
+ * barrier() makes sure compiler wont play fool^Waliasing games.
+ */
+ memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
+ sizeof(struct inet6_skb_parm));
+ barrier();
+
TCP_SKB_CB(skb)->seq = ntohl(th->seq);
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
skb->len - th->doff*4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
- TCP_SKB_CB(skb)->when = 0;
+ TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
+ TCP_SKB_CB(skb)->tcp_tw_isn = 0;
TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
TCP_SKB_CB(skb)->sacked = 0;
@@ -1523,6 +1443,11 @@ process:
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
+#ifdef CONFIG_TCP_MD5SIG
+ if (tcp_v6_inbound_md5_hash(sk, skb))
+ goto discard_and_relse;
+#endif
+
if (sk_filter(sk, skb))
goto discard_and_relse;
@@ -1532,18 +1457,8 @@ process:
bh_lock_sock_nested(sk);
ret = 0;
if (!sock_owned_by_user(sk)) {
-#ifdef CONFIG_NET_DMA
- struct tcp_sock *tp = tcp_sk(sk);
- if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
- tp->ucopy.dma_chan = net_dma_find_channel();
- if (tp->ucopy.dma_chan)
+ if (!tcp_prequeue(sk, skb))
ret = tcp_v6_do_rcv(sk, skb);
- else
-#endif
- {
- if (!tcp_prequeue(sk, skb))
- ret = tcp_v6_do_rcv(sk, skb);
- }
} else if (unlikely(sk_add_backlog(sk, skb,
sk->sk_rcvbuf + sk->sk_sndbuf))) {
bh_unlock_sock(sk);
@@ -1681,6 +1596,7 @@ static const struct inet_connection_sock_af_ops ipv6_specific = {
.compat_setsockopt = compat_ipv6_setsockopt,
.compat_getsockopt = compat_ipv6_getsockopt,
#endif
+ .mtu_reduced = tcp_v6_mtu_reduced,
};
#ifdef CONFIG_TCP_MD5SIG
@@ -1711,6 +1627,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
.compat_setsockopt = compat_ipv6_setsockopt,
.compat_getsockopt = compat_ipv6_getsockopt,
#endif
+ .mtu_reduced = tcp_v4_mtu_reduced,
};
#ifdef CONFIG_TCP_MD5SIG
@@ -1950,7 +1867,6 @@ struct proto tcpv6_prot = {
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v6_do_rcv,
.release_cb = tcp_release_cb,
- .mtu_reduced = tcp_v6_mtu_reduced,
.hash = tcp_v6_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index 01b0ff9a0c2c..c1ab77105b4c 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -15,54 +15,17 @@
#include <net/ip6_checksum.h>
#include "ip6_offload.h"
-static int tcp_v6_gso_send_check(struct sk_buff *skb)
-{
- const struct ipv6hdr *ipv6h;
- struct tcphdr *th;
-
- if (!pskb_may_pull(skb, sizeof(*th)))
- return -EINVAL;
-
- ipv6h = ipv6_hdr(skb);
- th = tcp_hdr(skb);
-
- th->check = 0;
- skb->ip_summed = CHECKSUM_PARTIAL;
- __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr);
- return 0;
-}
-
static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
struct sk_buff *skb)
{
- const struct ipv6hdr *iph = skb_gro_network_header(skb);
- __wsum wsum;
-
/* Don't bother verifying checksum if we're going to flush anyway. */
- if (NAPI_GRO_CB(skb)->flush)
- goto skip_csum;
-
- wsum = NAPI_GRO_CB(skb)->csum;
-
- switch (skb->ip_summed) {
- case CHECKSUM_NONE:
- wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb),
- wsum);
-
- /* fall through */
-
- case CHECKSUM_COMPLETE:
- if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
- wsum)) {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- break;
- }
-
+ if (!NAPI_GRO_CB(skb)->flush &&
+ skb_gro_checksum_validate(skb, IPPROTO_TCP,
+ ip6_gro_compute_pseudo)) {
NAPI_GRO_CB(skb)->flush = 1;
return NULL;
}
-skip_csum:
return tcp_gro_receive(head, skb);
}
@@ -78,10 +41,32 @@ static int tcp6_gro_complete(struct sk_buff *skb, int thoff)
return tcp_gro_complete(skb);
}
+struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ struct tcphdr *th;
+
+ if (!pskb_may_pull(skb, sizeof(*th)))
+ return ERR_PTR(-EINVAL);
+
+ if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
+ const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct tcphdr *th = tcp_hdr(skb);
+
+ /* Set up pseudo header, usually expect stack to have done
+ * this.
+ */
+
+ th->check = 0;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr);
+ }
+
+ return tcp_gso_segment(skb, features);
+}
static const struct net_offload tcpv6_offload = {
.callbacks = {
- .gso_send_check = tcp_v6_gso_send_check,
- .gso_segment = tcp_gso_segment,
+ .gso_segment = tcp6_gso_segment,
.gro_receive = tcp6_gro_receive,
.gro_complete = tcp6_gro_complete,
},
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index 2c4e4c5c7614..3c758007b327 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -15,7 +15,7 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*
* Authors Mitsuru KANDA <mk@linux-ipv6.org>
- * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+ * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
*/
#define pr_fmt(fmt) "IPv6: " fmt
@@ -64,7 +64,6 @@ err:
return ret;
}
-
EXPORT_SYMBOL(xfrm6_tunnel_register);
int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family)
@@ -92,7 +91,6 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family)
return ret;
}
-
EXPORT_SYMBOL(xfrm6_tunnel_deregister);
#define for_each_tunnel_rcu(head, handler) \
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 7092ff78fd84..f6ba535b6feb 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -79,7 +79,6 @@ static unsigned int udp6_ehashfn(struct net *net,
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
{
const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
- int sk_ipv6only = ipv6_only_sock(sk);
int sk2_ipv6only = inet_v6_ipv6only(sk2);
int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
@@ -95,7 +94,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
return 1;
if (addr_type == IPV6_ADDR_ANY &&
- !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
+ !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
return 1;
if (sk2_rcv_saddr6 &&
@@ -244,7 +243,7 @@ begin:
goto exact_match;
} else if (score == badness && reuseport) {
matches++;
- if (((u64)hash * matches) >> 32 == 0)
+ if (reciprocal_scale(hash, matches) == 0)
result = sk;
hash = next_pseudo_random32(hash);
}
@@ -324,7 +323,7 @@ begin:
}
} else if (score == badness && reuseport) {
matches++;
- if (((u64)hash * matches) >> 32 == 0)
+ if (reciprocal_scale(hash, matches) == 0)
result = sk;
hash = next_pseudo_random32(hash);
}
@@ -374,8 +373,8 @@ EXPORT_SYMBOL_GPL(udp6_lib_lookup);
/*
- * This should be easy, if there is something there we
- * return it, otherwise we block.
+ * This should be easy, if there is something there we
+ * return it, otherwise we block.
*/
int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
@@ -473,7 +472,7 @@ try_again:
sin6->sin6_addr = ipv6_hdr(skb)->saddr;
sin6->sin6_scope_id =
ipv6_iface_scope_id(&sin6->sin6_addr,
- IP6CB(skb)->iif);
+ inet6_iif(skb));
}
*addr_len = sizeof(*sin6);
}
@@ -531,14 +530,18 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
const struct in6_addr *saddr = &hdr->saddr;
const struct in6_addr *daddr = &hdr->daddr;
- struct udphdr *uh = (struct udphdr*)(skb->data+offset);
+ struct udphdr *uh = (struct udphdr *)(skb->data+offset);
struct sock *sk;
int err;
+ struct net *net = dev_net(skb->dev);
- sk = __udp6_lib_lookup(dev_net(skb->dev), daddr, uh->dest,
+ sk = __udp6_lib_lookup(net, daddr, uh->dest,
saddr, uh->source, inet6_iif(skb), udptable);
- if (sk == NULL)
+ if (sk == NULL) {
+ ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
+ ICMP6_MIB_INERRORS);
return;
+ }
if (type == ICMPV6_PKT_TOOBIG) {
if (!ip6_sk_accept_pmtu(sk))
@@ -593,7 +596,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
static __inline__ void udpv6_err(struct sk_buff *skb,
struct inet6_skb_parm *opt, u8 type,
- u8 code, int offset, __be32 info )
+ u8 code, int offset, __be32 info)
{
__udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
}
@@ -674,7 +677,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
goto csum_error;
}
- if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) {
+ if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
UDP6_INC_STATS_BH(sock_net(sk),
UDP_MIB_RCVBUFERRORS, is_udplite);
goto drop;
@@ -703,43 +706,26 @@ drop:
return -1;
}
-static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
- __be16 loc_port, const struct in6_addr *loc_addr,
- __be16 rmt_port, const struct in6_addr *rmt_addr,
- int dif)
+static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk,
+ __be16 loc_port, const struct in6_addr *loc_addr,
+ __be16 rmt_port, const struct in6_addr *rmt_addr,
+ int dif, unsigned short hnum)
{
- struct hlist_nulls_node *node;
- unsigned short num = ntohs(loc_port);
-
- sk_nulls_for_each_from(sk, node) {
- struct inet_sock *inet = inet_sk(sk);
-
- if (!net_eq(sock_net(sk), net))
- continue;
-
- if (udp_sk(sk)->udp_port_hash == num &&
- sk->sk_family == PF_INET6) {
- if (inet->inet_dport) {
- if (inet->inet_dport != rmt_port)
- continue;
- }
- if (!ipv6_addr_any(&sk->sk_v6_daddr) &&
- !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
- continue;
-
- if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
- continue;
+ struct inet_sock *inet = inet_sk(sk);
- if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
- if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))
- continue;
- }
- if (!inet6_mc_check(sk, loc_addr, rmt_addr))
- continue;
- return sk;
- }
- }
- return NULL;
+ if (!net_eq(sock_net(sk), net))
+ return false;
+
+ if (udp_sk(sk)->udp_port_hash != hnum ||
+ sk->sk_family != PF_INET6 ||
+ (inet->inet_dport && inet->inet_dport != rmt_port) ||
+ (!ipv6_addr_any(&sk->sk_v6_daddr) &&
+ !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) ||
+ (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+ return false;
+ if (!inet6_mc_check(sk, loc_addr, rmt_addr))
+ return false;
+ return true;
}
static void flush_stack(struct sock **stack, unsigned int count,
@@ -763,6 +749,7 @@ static void flush_stack(struct sock **stack, unsigned int count,
if (skb1 && udpv6_queue_rcv_skb(sk, skb1) <= 0)
skb1 = NULL;
+ sock_put(sk);
}
if (unlikely(skb1))
kfree_skb(skb1);
@@ -788,43 +775,51 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
{
struct sock *sk, *stack[256 / sizeof(struct sock *)];
const struct udphdr *uh = udp_hdr(skb);
- struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
- int dif;
- unsigned int i, count = 0;
+ struct hlist_nulls_node *node;
+ unsigned short hnum = ntohs(uh->dest);
+ struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum);
+ int dif = inet6_iif(skb);
+ unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node);
+ unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
+
+ if (use_hash2) {
+ hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) &
+ udp_table.mask;
+ hash2 = udp6_portaddr_hash(net, daddr, hnum) & udp_table.mask;
+start_lookup:
+ hslot = &udp_table.hash2[hash2];
+ offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
+ }
spin_lock(&hslot->lock);
- sk = sk_nulls_head(&hslot->head);
- dif = inet6_iif(skb);
- sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
- while (sk) {
- /* If zero checksum and no_check is not on for
- * the socket then skip it.
- */
- if (uh->check || udp_sk(sk)->no_check6_rx)
+ sk_nulls_for_each_entry_offset(sk, node, &hslot->head, offset) {
+ if (__udp_v6_is_mcast_sock(net, sk,
+ uh->dest, daddr,
+ uh->source, saddr,
+ dif, hnum) &&
+ /* If zero checksum and no_check is not on for
+ * the socket then skip it.
+ */
+ (uh->check || udp_sk(sk)->no_check6_rx)) {
+ if (unlikely(count == ARRAY_SIZE(stack))) {
+ flush_stack(stack, count, skb, ~0);
+ count = 0;
+ }
stack[count++] = sk;
-
- sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
- uh->source, saddr, dif);
- if (unlikely(count == ARRAY_SIZE(stack))) {
- if (!sk)
- break;
- flush_stack(stack, count, skb, ~0);
- count = 0;
+ sock_hold(sk);
}
}
- /*
- * before releasing the lock, we must take reference on sockets
- */
- for (i = 0; i < count; i++)
- sock_hold(stack[i]);
spin_unlock(&hslot->lock);
+ /* Also lookup *:port if we are using hash2 and haven't done so yet. */
+ if (use_hash2 && hash2 != hash2_any) {
+ hash2 = hash2_any;
+ goto start_lookup;
+ }
+
if (count) {
flush_stack(stack, count, skb, count - 1);
-
- for (i = 0; i < count; i++)
- sock_put(stack[i]);
} else {
kfree_skb(skb);
}
@@ -896,6 +891,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
goto csum_error;
}
+ if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk))
+ skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
+ ip6_compute_pseudo);
+
ret = udpv6_queue_rcv_skb(sk, skb);
sock_put(sk);
@@ -965,10 +964,10 @@ static void udp_v6_flush_pending_frames(struct sock *sk)
}
/**
- * udp6_hwcsum_outgoing - handle outgoing HW checksumming
- * @sk: socket we are sending on
- * @skb: sk_buff containing the filled-in UDP header
- * (checksum field must be zeroed out)
+ * udp6_hwcsum_outgoing - handle outgoing HW checksumming
+ * @sk: socket we are sending on
+ * @skb: sk_buff containing the filled-in UDP header
+ * (checksum field must be zeroed out)
*/
static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
const struct in6_addr *saddr,
@@ -1299,7 +1298,7 @@ do_append_data:
getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen,
sizeof(struct udphdr), hlimit, tclass, opt, &fl6,
- (struct rt6_info*)dst,
+ (struct rt6_info *)dst,
corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag);
if (err)
udp_v6_flush_pending_frames(sk);
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 0ae3d98f83e0..6b8f543f6ac6 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -10,34 +10,13 @@
* UDPv6 GSO support
*/
#include <linux/skbuff.h>
+#include <linux/netdevice.h>
#include <net/protocol.h>
#include <net/ipv6.h>
#include <net/udp.h>
#include <net/ip6_checksum.h>
#include "ip6_offload.h"
-static int udp6_ufo_send_check(struct sk_buff *skb)
-{
- const struct ipv6hdr *ipv6h;
- struct udphdr *uh;
-
- if (!pskb_may_pull(skb, sizeof(*uh)))
- return -EINVAL;
-
- if (likely(!skb->encapsulation)) {
- ipv6h = ipv6_hdr(skb);
- uh = udp_hdr(skb);
-
- uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
- IPPROTO_UDP, 0);
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct udphdr, check);
- skb->ip_summed = CHECKSUM_PARTIAL;
- }
-
- return 0;
-}
-
static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
netdev_features_t features)
{
@@ -48,7 +27,6 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
u8 *packet_start, *prevhdr;
u8 nexthdr;
u8 frag_hdr_sz = sizeof(struct frag_hdr);
- int offset;
__wsum csum;
int tnl_hlen;
@@ -80,15 +58,29 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
if (skb->encapsulation && skb_shinfo(skb)->gso_type &
(SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))
- segs = skb_udp_tunnel_segment(skb, features);
+ segs = skb_udp_tunnel_segment(skb, features, true);
else {
+ const struct ipv6hdr *ipv6h;
+ struct udphdr *uh;
+
+ if (!pskb_may_pull(skb, sizeof(struct udphdr)))
+ goto out;
+
/* Do software UFO. Complete and fill in the UDP checksum as HW cannot
* do checksum of UDP packets sent as multiple IP fragments.
*/
- offset = skb_checksum_start_offset(skb);
- csum = skb_checksum(skb, offset, skb->len - offset, 0);
- offset += skb->csum_offset;
- *(__sum16 *)(skb->data + offset) = csum_fold(csum);
+
+ uh = udp_hdr(skb);
+ ipv6h = ipv6_hdr(skb);
+
+ uh->check = 0;
+ csum = skb_checksum(skb, 0, skb->len, 0);
+ uh->check = udp_v6_check(skb->len, &ipv6h->saddr,
+ &ipv6h->daddr, csum);
+
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+
skb->ip_summed = CHECKSUM_NONE;
/* Check if there is enough headroom to insert fragment header. */
@@ -127,10 +119,52 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
out:
return segs;
}
+
+static struct sk_buff **udp6_gro_receive(struct sk_buff **head,
+ struct sk_buff *skb)
+{
+ struct udphdr *uh = udp_gro_udphdr(skb);
+
+ if (unlikely(!uh))
+ goto flush;
+
+ /* Don't bother verifying checksum if we're going to flush anyway. */
+ if (NAPI_GRO_CB(skb)->flush)
+ goto skip;
+
+ if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
+ ip6_gro_compute_pseudo))
+ goto flush;
+ else if (uh->check)
+ skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
+ ip6_gro_compute_pseudo);
+
+skip:
+ NAPI_GRO_CB(skb)->is_ipv6 = 1;
+ return udp_gro_receive(head, skb, uh);
+
+flush:
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
+}
+
+static int udp6_gro_complete(struct sk_buff *skb, int nhoff)
+{
+ const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
+
+ if (uh->check)
+ uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr,
+ &ipv6h->daddr, 0);
+
+ return udp_gro_complete(skb, nhoff);
+}
+
static const struct net_offload udpv6_offload = {
.callbacks = {
- .gso_send_check = udp6_ufo_send_check,
.gso_segment = udp6_ufo_fragment,
+ .gro_receive = udp6_gro_receive,
+ .gro_complete = udp6_gro_complete,
},
};
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index f8c3cf842f53..f48fbe4d16f5 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -3,8 +3,8 @@
*
* Authors:
* Mitsuru KANDA @USAGI
- * Kazunori MIYAZAWA @USAGI
- * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ * Kazunori MIYAZAWA @USAGI
+ * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
* YOSHIFUJI Hideaki @USAGI
* IPv6 support
*/
@@ -52,7 +52,6 @@ int xfrm6_rcv(struct sk_buff *skb)
return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
0);
}
-
EXPORT_SYMBOL(xfrm6_rcv);
int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
@@ -142,5 +141,4 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
drop:
return -1;
}
-
EXPORT_SYMBOL(xfrm6_input_addr);
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 433672d07d0b..ca3f29b98ae5 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -25,7 +25,6 @@ int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
{
return ip6_find_1stfragopt(skb, prevhdr);
}
-
EXPORT_SYMBOL(xfrm6_find_1stfragopt);
static int xfrm6_local_dontfrag(struct sk_buff *skb)
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 2a0bbda2c76a..ac49f84fe2c3 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -3,11 +3,11 @@
*
* Authors:
* Mitsuru KANDA @USAGI
- * Kazunori MIYAZAWA @USAGI
- * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
- * IPv6 support
- * YOSHIFUJI Hideaki
- * Split up af-specific portion
+ * Kazunori MIYAZAWA @USAGI
+ * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ * IPv6 support
+ * YOSHIFUJI Hideaki
+ * Split up af-specific portion
*
*/
@@ -84,7 +84,7 @@ static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
int nfheader_len)
{
if (dst->ops->family == AF_INET6) {
- struct rt6_info *rt = (struct rt6_info*)dst;
+ struct rt6_info *rt = (struct rt6_info *)dst;
if (rt->rt6i_node)
path->path_cookie = rt->rt6i_node->fn_sernum;
}
@@ -97,7 +97,7 @@ static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
const struct flowi *fl)
{
- struct rt6_info *rt = (struct rt6_info*)xdst->route;
+ struct rt6_info *rt = (struct rt6_info *)xdst->route;
xdst->u.dst.dev = dev;
dev_hold(dev);
@@ -296,7 +296,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
.family = AF_INET6,
.dst_ops = &xfrm6_dst_ops,
.dst_lookup = xfrm6_dst_lookup,
- .get_saddr = xfrm6_get_saddr,
+ .get_saddr = xfrm6_get_saddr,
.decode_session = _decode_session6,
.get_tos = xfrm6_get_tos,
.init_dst = xfrm6_init_dst,
@@ -319,9 +319,9 @@ static void xfrm6_policy_fini(void)
static struct ctl_table xfrm6_policy_table[] = {
{
.procname = "xfrm6_gc_thresh",
- .data = &init_net.xfrm.xfrm6_dst_ops.gc_thresh,
- .maxlen = sizeof(int),
- .mode = 0644,
+ .data = &init_net.xfrm.xfrm6_dst_ops.gc_thresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
.proc_handler = proc_dointvec,
},
{ }
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 3fc970135fc6..8a1f9c0d2a13 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -3,11 +3,11 @@
*
* Authors:
* Mitsuru KANDA @USAGI
- * Kazunori MIYAZAWA @USAGI
- * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
- * IPv6 support
- * YOSHIFUJI Hideaki @USAGI
- * Split up af-specific portion
+ * Kazunori MIYAZAWA @USAGI
+ * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ * IPv6 support
+ * YOSHIFUJI Hideaki @USAGI
+ * Split up af-specific portion
*
*/
@@ -45,10 +45,10 @@ xfrm6_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl,
const xfrm_address_t *daddr, const xfrm_address_t *saddr)
{
x->id = tmpl->id;
- if (ipv6_addr_any((struct in6_addr*)&x->id.daddr))
+ if (ipv6_addr_any((struct in6_addr *)&x->id.daddr))
memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr));
memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr));
- if (ipv6_addr_any((struct in6_addr*)&x->props.saddr))
+ if (ipv6_addr_any((struct in6_addr *)&x->props.saddr))
memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr));
x->props.mode = tmpl->mode;
x->props.reqid = tmpl->reqid;
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 1c66465a42dd..5743044cd660 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -15,7 +15,7 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*
* Authors Mitsuru KANDA <mk@linux-ipv6.org>
- * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+ * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
*
* Based on net/ipv4/xfrm4_tunnel.c
*
@@ -110,7 +110,6 @@ __be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr)
rcu_read_unlock_bh();
return htonl(spi);
}
-
EXPORT_SYMBOL(xfrm6_tunnel_spi_lookup);
static int __xfrm6_tunnel_spi_check(struct net *net, u32 spi)
@@ -187,7 +186,6 @@ __be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr)
return htonl(spi);
}
-
EXPORT_SYMBOL(xfrm6_tunnel_alloc_spi);
static void x6spi_destroy_rcu(struct rcu_head *head)
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 54747c25c86c..92fafd485deb 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -674,7 +674,6 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
self->daddr = DEV_ADDR_ANY;
kfree(discoveries);
return -EHOSTUNREACH;
- break;
}
}
/* Cleanup our copy of the discovery log */
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 2ba8b9705bb7..61ceb4cdb4a2 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -320,8 +320,7 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self,
__FILE__, __LINE__, tty->driver->name, port->count);
spin_lock_irqsave(&port->lock, flags);
- if (!tty_hung_up_p(filp))
- port->count--;
+ port->count--;
port->blocked_open++;
spin_unlock_irqrestore(&port->lock, flags);
@@ -458,8 +457,7 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
/*
* If the port is the middle of closing, bail out now
*/
- if (tty_hung_up_p(filp) ||
- test_bit(ASYNCB_CLOSING, &self->port.flags)) {
+ if (test_bit(ASYNCB_CLOSING, &self->port.flags)) {
/* Hm, why are we blocking on ASYNC_CLOSING if we
* do return -EAGAIN/-ERESTARTSYS below anyway?
diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c
index 365b895da84b..9e0d909390fd 100644
--- a/net/irda/irda_device.c
+++ b/net/irda/irda_device.c
@@ -293,7 +293,8 @@ static void irda_device_setup(struct net_device *dev)
*/
struct net_device *alloc_irdadev(int sizeof_priv)
{
- return alloc_netdev(sizeof_priv, "irda%d", irda_device_setup);
+ return alloc_netdev(sizeof_priv, "irda%d", NET_NAME_UNKNOWN,
+ irda_device_setup);
}
EXPORT_SYMBOL(alloc_irdadev);
diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c
index 7ac4d1becbfc..5a2d0a695529 100644
--- a/net/irda/irlan/irlan_common.c
+++ b/net/irda/irlan/irlan_common.c
@@ -98,7 +98,7 @@ static const struct file_operations irlan_fops = {
extern struct proc_dir_entry *proc_irda;
#endif /* CONFIG_PROC_FS */
-static struct irlan_cb *irlan_open(__u32 saddr, __u32 daddr);
+static struct irlan_cb __init *irlan_open(__u32 saddr, __u32 daddr);
static void __irlan_close(struct irlan_cb *self);
static int __irlan_insert_param(struct sk_buff *skb, char *param, int type,
__u8 value_byte, __u16 value_short,
@@ -196,7 +196,7 @@ static void __exit irlan_cleanup(void)
* Open new instance of a client/provider, we should only register the
* network device if this instance is ment for a particular client/provider
*/
-static struct irlan_cb *irlan_open(__u32 saddr, __u32 daddr)
+static struct irlan_cb __init *irlan_open(__u32 saddr, __u32 daddr)
{
struct net_device *dev;
struct irlan_cb *self;
@@ -1024,7 +1024,6 @@ static int __irlan_insert_param(struct sk_buff *skb, char *param, int type,
default:
IRDA_DEBUG(2, "%s(), Unknown parameter type!\n", __func__ );
return 0;
- break;
}
/* Insert at end of sk-buffer */
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index ffcec225b5d9..dc13f1a45f2f 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -96,7 +96,7 @@ static void irlan_eth_setup(struct net_device *dev)
*/
struct net_device *alloc_irlandev(const char *name)
{
- return alloc_netdev(sizeof(struct irlan_cb), name,
+ return alloc_netdev(sizeof(struct irlan_cb), name, NET_NAME_UNKNOWN,
irlan_eth_setup);
}
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
index 9ea0c933b9ff..a37998c6273d 100644
--- a/net/irda/irlap_frame.c
+++ b/net/irda/irlap_frame.c
@@ -622,7 +622,7 @@ void irlap_send_rd_frame(struct irlap_cb *self)
frame = (struct rd_frame *)skb_put(tx_skb, 2);
frame->caddr = self->caddr;
- frame->caddr = RD_RSP | PF_BIT;
+ frame->control = RD_RSP | PF_BIT;
irlap_queue_xmit(self, tx_skb);
}
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
index 98ad6ec4bd3c..a5f28d421ea8 100644
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -1426,7 +1426,8 @@ __u8 *irlmp_hint_to_service(__u8 *hint)
if (hint[1] & HINT_TELEPHONY) {
IRDA_DEBUG(1, "Telephony ");
service[i++] = S_TELEPHONY;
- } if (hint[1] & HINT_FILE_SERVER)
+ }
+ if (hint[1] & HINT_FILE_SERVER)
IRDA_DEBUG(1, "File Server ");
if (hint[1] & HINT_COMM) {
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 7a95fa4a3de1..a089b6b91650 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1103,7 +1103,6 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
default:
err = -EINVAL;
goto out;
- break;
}
}
@@ -1543,7 +1542,8 @@ static int iucv_sock_shutdown(struct socket *sock, int how)
sk->sk_shutdown |= how;
if (how == RCV_SHUTDOWN || how == SHUTDOWN_MASK) {
- if (iucv->transport == AF_IUCV_TRANS_IUCV) {
+ if ((iucv->transport == AF_IUCV_TRANS_IUCV) &&
+ iucv->path) {
err = pr_iucv->path_quiesce(iucv->path, NULL);
if (err)
err = -ENOTCONN;
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index da787930df0a..2a6a1fdd62c0 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -493,8 +493,8 @@ static void iucv_declare_cpu(void *data)
err = "Paging or storage error";
break;
}
- pr_warning("Defining an interrupt buffer on CPU %i"
- " failed with 0x%02x (%s)\n", cpu, rc, err);
+ pr_warn("Defining an interrupt buffer on CPU %i failed with 0x%02x (%s)\n",
+ cpu, rc, err);
return;
}
@@ -1831,7 +1831,7 @@ static void iucv_external_interrupt(struct ext_code ext_code,
BUG_ON(p->iptype < 0x01 || p->iptype > 0x09);
work = kmalloc(sizeof(struct iucv_irq_list), GFP_ATOMIC);
if (!work) {
- pr_warning("iucv_external_interrupt: out of memory\n");
+ pr_warn("iucv_external_interrupt: out of memory\n");
return;
}
memcpy(&work->data, p, sizeof(work->data));
@@ -1974,8 +1974,7 @@ static int iucv_pm_restore(struct device *dev)
printk(KERN_WARNING "iucv_pm_restore %p\n", iucv_path_table);
#endif
if ((iucv_pm_state != IUCV_PM_RESTORING) && iucv_path_table)
- pr_warning("Suspending Linux did not completely close all IUCV "
- "connections\n");
+ pr_warn("Suspending Linux did not completely close all IUCV connections\n");
iucv_pm_state = IUCV_PM_RESTORING;
if (cpumask_empty(&iucv_irq_cpumask)) {
rc = iucv_query_maxconn();
diff --git a/net/key/af_key.c b/net/key/af_key.c
index ba2a2f95911c..1847ec4e3930 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -405,7 +405,6 @@ static int verify_address_len(const void *p)
* XXX When it can, remove this -EINVAL. -DaveM
*/
return -EINVAL;
- break;
}
return 0;
@@ -536,7 +535,6 @@ pfkey_satype2proto(uint8_t satype)
return IPPROTO_ESP;
case SADB_X_SATYPE_IPCOMP:
return IPPROTO_COMP;
- break;
default:
return 0;
}
@@ -553,7 +551,6 @@ pfkey_proto2satype(uint16_t proto)
return SADB_SATYPE_ESP;
case IPPROTO_COMP:
return SADB_X_SATYPE_IPCOMP;
- break;
default:
return 0;
}
diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig
index adb9843dd7cf..378c73b26093 100644
--- a/net/l2tp/Kconfig
+++ b/net/l2tp/Kconfig
@@ -6,6 +6,7 @@ menuconfig L2TP
tristate "Layer Two Tunneling Protocol (L2TP)"
depends on (IPV6 || IPV6=n)
depends on INET
+ select NET_UDP_TUNNEL
---help---
Layer Two Tunneling Protocol
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index bea259043205..895348e44c7d 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -52,6 +52,7 @@
#include <net/dst.h>
#include <net/ip.h>
#include <net/udp.h>
+#include <net/udp_tunnel.h>
#include <net/inet_common.h>
#include <net/xfrm.h>
#include <net/protocol.h>
@@ -147,7 +148,7 @@ do { \
atomic_read(&_t->ref_count)); \
l2tp_tunnel_inc_refcount_1(_t); \
} while (0)
-#define l2tp_tunnel_dec_refcount(_t)
+#define l2tp_tunnel_dec_refcount(_t) \
do { \
pr_debug("l2tp_tunnel_dec_refcount: %s:%d %s: cnt=%d\n", \
__func__, __LINE__, (_t)->name, \
@@ -1358,81 +1359,46 @@ static int l2tp_tunnel_sock_create(struct net *net,
{
int err = -EINVAL;
struct socket *sock = NULL;
- struct sockaddr_in udp_addr = {0};
- struct sockaddr_l2tpip ip_addr = {0};
-#if IS_ENABLED(CONFIG_IPV6)
- struct sockaddr_in6 udp6_addr = {0};
- struct sockaddr_l2tpip6 ip6_addr = {0};
-#endif
+ struct udp_port_cfg udp_conf;
switch (cfg->encap) {
case L2TP_ENCAPTYPE_UDP:
+ memset(&udp_conf, 0, sizeof(udp_conf));
+
#if IS_ENABLED(CONFIG_IPV6)
if (cfg->local_ip6 && cfg->peer_ip6) {
- err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock);
- if (err < 0)
- goto out;
-
- sk_change_net(sock->sk, net);
-
- udp6_addr.sin6_family = AF_INET6;
- memcpy(&udp6_addr.sin6_addr, cfg->local_ip6,
- sizeof(udp6_addr.sin6_addr));
- udp6_addr.sin6_port = htons(cfg->local_udp_port);
- err = kernel_bind(sock, (struct sockaddr *) &udp6_addr,
- sizeof(udp6_addr));
- if (err < 0)
- goto out;
-
- udp6_addr.sin6_family = AF_INET6;
- memcpy(&udp6_addr.sin6_addr, cfg->peer_ip6,
- sizeof(udp6_addr.sin6_addr));
- udp6_addr.sin6_port = htons(cfg->peer_udp_port);
- err = kernel_connect(sock,
- (struct sockaddr *) &udp6_addr,
- sizeof(udp6_addr), 0);
- if (err < 0)
- goto out;
-
- if (cfg->udp6_zero_tx_checksums)
- udp_set_no_check6_tx(sock->sk, true);
- if (cfg->udp6_zero_rx_checksums)
- udp_set_no_check6_rx(sock->sk, true);
+ udp_conf.family = AF_INET6;
+ memcpy(&udp_conf.local_ip6, cfg->local_ip6,
+ sizeof(udp_conf.local_ip6));
+ memcpy(&udp_conf.peer_ip6, cfg->peer_ip6,
+ sizeof(udp_conf.peer_ip6));
+ udp_conf.use_udp6_tx_checksums =
+ cfg->udp6_zero_tx_checksums;
+ udp_conf.use_udp6_rx_checksums =
+ cfg->udp6_zero_rx_checksums;
} else
#endif
{
- err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock);
- if (err < 0)
- goto out;
-
- sk_change_net(sock->sk, net);
-
- udp_addr.sin_family = AF_INET;
- udp_addr.sin_addr = cfg->local_ip;
- udp_addr.sin_port = htons(cfg->local_udp_port);
- err = kernel_bind(sock, (struct sockaddr *) &udp_addr,
- sizeof(udp_addr));
- if (err < 0)
- goto out;
-
- udp_addr.sin_family = AF_INET;
- udp_addr.sin_addr = cfg->peer_ip;
- udp_addr.sin_port = htons(cfg->peer_udp_port);
- err = kernel_connect(sock,
- (struct sockaddr *) &udp_addr,
- sizeof(udp_addr), 0);
- if (err < 0)
- goto out;
+ udp_conf.family = AF_INET;
+ udp_conf.local_ip = cfg->local_ip;
+ udp_conf.peer_ip = cfg->peer_ip;
+ udp_conf.use_udp_checksums = cfg->use_udp_checksums;
}
- if (!cfg->use_udp_checksums)
- sock->sk->sk_no_check_tx = 1;
+ udp_conf.local_udp_port = htons(cfg->local_udp_port);
+ udp_conf.peer_udp_port = htons(cfg->peer_udp_port);
+
+ err = udp_sock_create(net, &udp_conf, &sock);
+ if (err < 0)
+ goto out;
break;
case L2TP_ENCAPTYPE_IP:
#if IS_ENABLED(CONFIG_IPV6)
if (cfg->local_ip6 && cfg->peer_ip6) {
+ struct sockaddr_l2tpip6 ip6_addr = {0};
+
err = sock_create_kern(AF_INET6, SOCK_DGRAM,
IPPROTO_L2TP, &sock);
if (err < 0)
@@ -1461,6 +1427,8 @@ static int l2tp_tunnel_sock_create(struct net *net,
} else
#endif
{
+ struct sockaddr_l2tpip ip_addr = {0};
+
err = sock_create_kern(AF_INET, SOCK_DGRAM,
IPPROTO_L2TP, &sock);
if (err < 0)
@@ -1614,19 +1582,17 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
tunnel->encap = encap;
if (encap == L2TP_ENCAPTYPE_UDP) {
- /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
- udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP;
- udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv;
- udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy;
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == PF_INET6 && !tunnel->v4mapped)
- udpv6_encap_enable();
- else
-#endif
- udp_encap_enable();
- }
+ struct udp_tunnel_sock_cfg udp_cfg;
+
+ udp_cfg.sk_user_data = tunnel;
+ udp_cfg.encap_type = UDP_ENCAP_L2TPINUDP;
+ udp_cfg.encap_rcv = l2tp_udp_encap_recv;
+ udp_cfg.encap_destroy = l2tp_udp_encap_destroy;
- sk->sk_user_data = tunnel;
+ setup_udp_tunnel_sock(net, sock, &udp_cfg);
+ } else {
+ sk->sk_user_data = tunnel;
+ }
/* Hook on the tunnel socket destructor so that we can cleanup
* if the tunnel socket goes away.
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 76125c57ee6d..edb78e69efe4 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -246,7 +246,8 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
goto out;
}
- dev = alloc_netdev(sizeof(*priv), name, l2tp_eth_dev_setup);
+ dev = alloc_netdev(sizeof(*priv), name, NET_NAME_UNKNOWN,
+ l2tp_eth_dev_setup);
if (!dev) {
rc = -ENOMEM;
goto out_del_session;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index f3f98a156cee..0edb263cc002 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -687,7 +687,7 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk,
lsa->l2tp_scope_id = 0;
lsa->l2tp_conn_id = 0;
if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL)
- lsa->l2tp_scope_id = IP6CB(skb)->iif;
+ lsa->l2tp_scope_id = inet6_iif(skb);
}
if (np->rxopt.all)
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 13752d96275e..b704a9356208 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -755,7 +755,8 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
/* If PMTU discovery was enabled, use the MTU that was discovered */
dst = sk_dst_get(tunnel->sock);
if (dst != NULL) {
- u32 pmtu = dst_mtu(__sk_dst_get(tunnel->sock));
+ u32 pmtu = dst_mtu(dst);
+
if (pmtu != 0)
session->mtu = session->mru = pmtu -
PPPOL2TP_HEADER_OVERHEAD;
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 0080d2b0a8ae..bb9cbc17d926 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -839,7 +839,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
if (!(flags & MSG_PEEK)) {
spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags);
- sk_eat_skb(sk, skb, false);
+ sk_eat_skb(sk, skb);
spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags);
*seq = 0;
}
@@ -861,10 +861,10 @@ copy_uaddr:
llc_cmsg_rcv(msg, skb);
if (!(flags & MSG_PEEK)) {
- spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags);
- sk_eat_skb(sk, skb, false);
- spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags);
- *seq = 0;
+ spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags);
+ sk_eat_skb(sk, skb);
+ spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags);
+ *seq = 0;
}
goto out;
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 97b5dcad5025..aeb6a483b3bc 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -19,14 +19,6 @@ if MAC80211 != n
config MAC80211_HAS_RC
bool
-config MAC80211_RC_PID
- bool "PID controller based rate control algorithm" if EXPERT
- select MAC80211_HAS_RC
- ---help---
- This option enables a TX rate control algorithm for
- mac80211 that uses a PID controller to select the TX
- rate.
-
config MAC80211_RC_MINSTREL
bool "Minstrel" if EXPERT
select MAC80211_HAS_RC
@@ -51,14 +43,6 @@ choice
overridden through the ieee80211_default_rc_algo module
parameter if different algorithms are available.
-config MAC80211_RC_DEFAULT_PID
- bool "PID controller based rate control algorithm"
- depends on MAC80211_RC_PID
- ---help---
- Select the PID controller based rate control as the
- default rate control algorithm. You should choose
- this unless you know what you are doing.
-
config MAC80211_RC_DEFAULT_MINSTREL
bool "Minstrel"
depends on MAC80211_RC_MINSTREL
@@ -72,7 +56,6 @@ config MAC80211_RC_DEFAULT
string
default "minstrel_ht" if MAC80211_RC_DEFAULT_MINSTREL && MAC80211_RC_MINSTREL_HT
default "minstrel" if MAC80211_RC_DEFAULT_MINSTREL
- default "pid" if MAC80211_RC_DEFAULT_PID
default ""
endif
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 1e46ffa69167..7273d2796dd1 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -17,6 +17,7 @@ mac80211-y := \
aes_ccm.o \
aes_cmac.o \
cfg.o \
+ ethtool.o \
rx.o \
spectmgmt.o \
tx.o \
@@ -47,17 +48,12 @@ mac80211-$(CONFIG_PM) += pm.o
CFLAGS_trace.o := -I$(src)
-# objects for PID algorithm
-rc80211_pid-y := rc80211_pid_algo.o
-rc80211_pid-$(CONFIG_MAC80211_DEBUGFS) += rc80211_pid_debugfs.o
-
rc80211_minstrel-y := rc80211_minstrel.o
rc80211_minstrel-$(CONFIG_MAC80211_DEBUGFS) += rc80211_minstrel_debugfs.o
rc80211_minstrel_ht-y := rc80211_minstrel_ht.o
rc80211_minstrel_ht-$(CONFIG_MAC80211_DEBUGFS) += rc80211_minstrel_ht_debugfs.o
-mac80211-$(CONFIG_MAC80211_RC_PID) += $(rc80211_pid-y)
mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y)
mac80211-$(CONFIG_MAC80211_RC_MINSTREL_HT) += $(rc80211_minstrel_ht-y)
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 31bf2586fb84..a48bad468880 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -52,7 +52,7 @@ static void ieee80211_free_tid_rx(struct rcu_head *h)
del_timer_sync(&tid_rx->reorder_timer);
for (i = 0; i < tid_rx->buf_size; i++)
- dev_kfree_skb(tid_rx->reorder_buf[i]);
+ __skb_queue_purge(&tid_rx->reorder_buf[i]);
kfree(tid_rx->reorder_buf);
kfree(tid_rx->reorder_time);
kfree(tid_rx);
@@ -224,28 +224,15 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d
ieee80211_tx_skb(sdata, skb);
}
-void ieee80211_process_addba_request(struct ieee80211_local *local,
- struct sta_info *sta,
- struct ieee80211_mgmt *mgmt,
- size_t len)
+void __ieee80211_start_rx_ba_session(struct sta_info *sta,
+ u8 dialog_token, u16 timeout,
+ u16 start_seq_num, u16 ba_policy, u16 tid,
+ u16 buf_size, bool tx, bool auto_seq)
{
+ struct ieee80211_local *local = sta->sdata->local;
struct tid_ampdu_rx *tid_agg_rx;
- u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num, status;
- u8 dialog_token;
- int ret = -EOPNOTSUPP;
-
- /* extract session parameters from addba request frame */
- dialog_token = mgmt->u.action.u.addba_req.dialog_token;
- timeout = le16_to_cpu(mgmt->u.action.u.addba_req.timeout);
- start_seq_num =
- le16_to_cpu(mgmt->u.action.u.addba_req.start_seq_num) >> 4;
-
- capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab);
- ba_policy = (capab & IEEE80211_ADDBA_PARAM_POLICY_MASK) >> 1;
- tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
- buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
-
- status = WLAN_STATUS_REQUEST_DECLINED;
+ int i, ret = -EOPNOTSUPP;
+ u16 status = WLAN_STATUS_REQUEST_DECLINED;
if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) {
ht_dbg(sta->sdata,
@@ -264,7 +251,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
status = WLAN_STATUS_INVALID_QOS_PARAM;
ht_dbg_ratelimited(sta->sdata,
"AddBA Req with bad params from %pM on tid %u. policy %d, buffer size %d\n",
- mgmt->sa, tid, ba_policy, buf_size);
+ sta->sta.addr, tid, ba_policy, buf_size);
goto end_no_lock;
}
/* determine default buffer size */
@@ -281,7 +268,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
if (sta->ampdu_mlme.tid_rx[tid]) {
ht_dbg_ratelimited(sta->sdata,
"unexpected AddBA Req from %pM on tid %u\n",
- mgmt->sa, tid);
+ sta->sta.addr, tid);
/* delete existing Rx BA session on the same tid */
___ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT,
@@ -308,7 +295,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
/* prepare reordering buffer */
tid_agg_rx->reorder_buf =
- kcalloc(buf_size, sizeof(struct sk_buff *), GFP_KERNEL);
+ kcalloc(buf_size, sizeof(struct sk_buff_head), GFP_KERNEL);
tid_agg_rx->reorder_time =
kcalloc(buf_size, sizeof(unsigned long), GFP_KERNEL);
if (!tid_agg_rx->reorder_buf || !tid_agg_rx->reorder_time) {
@@ -318,6 +305,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
goto end;
}
+ for (i = 0; i < buf_size; i++)
+ __skb_queue_head_init(&tid_agg_rx->reorder_buf[i]);
+
ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_START,
&sta->sta, tid, &start_seq_num, 0);
ht_dbg(sta->sdata, "Rx A-MPDU request on %pM tid %d result %d\n",
@@ -336,6 +326,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
tid_agg_rx->buf_size = buf_size;
tid_agg_rx->timeout = timeout;
tid_agg_rx->stored_mpdu_num = 0;
+ tid_agg_rx->auto_seq = auto_seq;
status = WLAN_STATUS_SUCCESS;
/* activate it for RX */
@@ -350,6 +341,74 @@ end:
mutex_unlock(&sta->ampdu_mlme.mtx);
end_no_lock:
- ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid,
- dialog_token, status, 1, buf_size, timeout);
+ if (tx)
+ ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid,
+ dialog_token, status, 1, buf_size,
+ timeout);
+}
+
+void ieee80211_process_addba_request(struct ieee80211_local *local,
+ struct sta_info *sta,
+ struct ieee80211_mgmt *mgmt,
+ size_t len)
+{
+ u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num;
+ u8 dialog_token;
+
+ /* extract session parameters from addba request frame */
+ dialog_token = mgmt->u.action.u.addba_req.dialog_token;
+ timeout = le16_to_cpu(mgmt->u.action.u.addba_req.timeout);
+ start_seq_num =
+ le16_to_cpu(mgmt->u.action.u.addba_req.start_seq_num) >> 4;
+
+ capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab);
+ ba_policy = (capab & IEEE80211_ADDBA_PARAM_POLICY_MASK) >> 1;
+ tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
+ buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
+
+ __ieee80211_start_rx_ba_session(sta, dialog_token, timeout,
+ start_seq_num, ba_policy, tid,
+ buf_size, true, false);
+}
+
+void ieee80211_start_rx_ba_session_offl(struct ieee80211_vif *vif,
+ const u8 *addr, u16 tid)
+{
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_rx_agg *rx_agg;
+ struct sk_buff *skb = dev_alloc_skb(0);
+
+ if (unlikely(!skb))
+ return;
+
+ rx_agg = (struct ieee80211_rx_agg *) &skb->cb;
+ memcpy(&rx_agg->addr, addr, ETH_ALEN);
+ rx_agg->tid = tid;
+
+ skb->pkt_type = IEEE80211_SDATA_QUEUE_RX_AGG_START;
+ skb_queue_tail(&sdata->skb_queue, skb);
+ ieee80211_queue_work(&local->hw, &sdata->work);
+}
+EXPORT_SYMBOL(ieee80211_start_rx_ba_session_offl);
+
+void ieee80211_stop_rx_ba_session_offl(struct ieee80211_vif *vif,
+ const u8 *addr, u16 tid)
+{
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_rx_agg *rx_agg;
+ struct sk_buff *skb = dev_alloc_skb(0);
+
+ if (unlikely(!skb))
+ return;
+
+ rx_agg = (struct ieee80211_rx_agg *) &skb->cb;
+ memcpy(&rx_agg->addr, addr, ETH_ALEN);
+ rx_agg->tid = tid;
+
+ skb->pkt_type = IEEE80211_SDATA_QUEUE_RX_AGG_STOP;
+ skb_queue_tail(&sdata->skb_queue, skb);
+ ieee80211_queue_work(&local->hw, &sdata->work);
}
+EXPORT_SYMBOL(ieee80211_stop_rx_ba_session_offl);
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index ce9633a3cfb0..d6986f3aa5c4 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -170,10 +170,13 @@ ieee80211_stop_queue_agg(struct ieee80211_sub_if_data *sdata, int tid)
{
int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)];
+ /* we do refcounting here, so don't use the queue reason refcounting */
+
if (atomic_inc_return(&sdata->local->agg_queue_stop[queue]) == 1)
ieee80211_stop_queue_by_reason(
&sdata->local->hw, queue,
- IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
+ IEEE80211_QUEUE_STOP_REASON_AGGREGATION,
+ false);
__acquire(agg_queue);
}
@@ -185,7 +188,8 @@ ieee80211_wake_queue_agg(struct ieee80211_sub_if_data *sdata, int tid)
if (atomic_dec_return(&sdata->local->agg_queue_stop[queue]) == 0)
ieee80211_wake_queue_by_reason(
&sdata->local->hw, queue,
- IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
+ IEEE80211_QUEUE_STOP_REASON_AGGREGATION,
+ false);
__release(agg_queue);
}
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 592f4b152ba8..fb6a1502b6df 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2,6 +2,7 @@
* mac80211 configuration hooks for cfg80211
*
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
*
* This file is GPLv2 as found in COPYING.
*/
@@ -468,330 +469,6 @@ void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
rinfo->flags |= RATE_INFO_FLAGS_160_MHZ_WIDTH;
}
-static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
-{
- struct ieee80211_sub_if_data *sdata = sta->sdata;
- struct ieee80211_local *local = sdata->local;
- struct rate_control_ref *ref = NULL;
- struct timespec uptime;
- u64 packets = 0;
- u32 thr = 0;
- int i, ac;
-
- if (test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
- ref = local->rate_ctrl;
-
- sinfo->generation = sdata->local->sta_generation;
-
- sinfo->filled = STATION_INFO_INACTIVE_TIME |
- STATION_INFO_RX_BYTES64 |
- STATION_INFO_TX_BYTES64 |
- STATION_INFO_RX_PACKETS |
- STATION_INFO_TX_PACKETS |
- STATION_INFO_TX_RETRIES |
- STATION_INFO_TX_FAILED |
- STATION_INFO_TX_BITRATE |
- STATION_INFO_RX_BITRATE |
- STATION_INFO_RX_DROP_MISC |
- STATION_INFO_BSS_PARAM |
- STATION_INFO_CONNECTED_TIME |
- STATION_INFO_STA_FLAGS |
- STATION_INFO_BEACON_LOSS_COUNT;
-
- do_posix_clock_monotonic_gettime(&uptime);
- sinfo->connected_time = uptime.tv_sec - sta->last_connected;
-
- sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
- sinfo->tx_bytes = 0;
- for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
- sinfo->tx_bytes += sta->tx_bytes[ac];
- packets += sta->tx_packets[ac];
- }
- sinfo->tx_packets = packets;
- sinfo->rx_bytes = sta->rx_bytes;
- sinfo->rx_packets = sta->rx_packets;
- sinfo->tx_retries = sta->tx_retry_count;
- sinfo->tx_failed = sta->tx_retry_failed;
- sinfo->rx_dropped_misc = sta->rx_dropped;
- sinfo->beacon_loss_count = sta->beacon_loss_count;
-
- if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) ||
- (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) {
- sinfo->filled |= STATION_INFO_SIGNAL | STATION_INFO_SIGNAL_AVG;
- if (!local->ops->get_rssi ||
- drv_get_rssi(local, sdata, &sta->sta, &sinfo->signal))
- sinfo->signal = (s8)sta->last_signal;
- sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal);
- }
- if (sta->chains) {
- sinfo->filled |= STATION_INFO_CHAIN_SIGNAL |
- STATION_INFO_CHAIN_SIGNAL_AVG;
-
- sinfo->chains = sta->chains;
- for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) {
- sinfo->chain_signal[i] = sta->chain_signal_last[i];
- sinfo->chain_signal_avg[i] =
- (s8) -ewma_read(&sta->chain_signal_avg[i]);
- }
- }
-
- sta_set_rate_info_tx(sta, &sta->last_tx_rate, &sinfo->txrate);
- sta_set_rate_info_rx(sta, &sinfo->rxrate);
-
- if (ieee80211_vif_is_mesh(&sdata->vif)) {
-#ifdef CONFIG_MAC80211_MESH
- sinfo->filled |= STATION_INFO_LLID |
- STATION_INFO_PLID |
- STATION_INFO_PLINK_STATE |
- STATION_INFO_LOCAL_PM |
- STATION_INFO_PEER_PM |
- STATION_INFO_NONPEER_PM;
-
- sinfo->llid = sta->llid;
- sinfo->plid = sta->plid;
- sinfo->plink_state = sta->plink_state;
- if (test_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN)) {
- sinfo->filled |= STATION_INFO_T_OFFSET;
- sinfo->t_offset = sta->t_offset;
- }
- sinfo->local_pm = sta->local_pm;
- sinfo->peer_pm = sta->peer_pm;
- sinfo->nonpeer_pm = sta->nonpeer_pm;
-#endif
- }
-
- sinfo->bss_param.flags = 0;
- if (sdata->vif.bss_conf.use_cts_prot)
- sinfo->bss_param.flags |= BSS_PARAM_FLAGS_CTS_PROT;
- if (sdata->vif.bss_conf.use_short_preamble)
- sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_PREAMBLE;
- if (sdata->vif.bss_conf.use_short_slot)
- sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_SLOT_TIME;
- sinfo->bss_param.dtim_period = sdata->local->hw.conf.ps_dtim_period;
- sinfo->bss_param.beacon_interval = sdata->vif.bss_conf.beacon_int;
-
- sinfo->sta_flags.set = 0;
- sinfo->sta_flags.mask = BIT(NL80211_STA_FLAG_AUTHORIZED) |
- BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) |
- BIT(NL80211_STA_FLAG_WME) |
- BIT(NL80211_STA_FLAG_MFP) |
- BIT(NL80211_STA_FLAG_AUTHENTICATED) |
- BIT(NL80211_STA_FLAG_ASSOCIATED) |
- BIT(NL80211_STA_FLAG_TDLS_PEER);
- if (test_sta_flag(sta, WLAN_STA_AUTHORIZED))
- sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHORIZED);
- if (test_sta_flag(sta, WLAN_STA_SHORT_PREAMBLE))
- sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_SHORT_PREAMBLE);
- if (test_sta_flag(sta, WLAN_STA_WME))
- sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_WME);
- if (test_sta_flag(sta, WLAN_STA_MFP))
- sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_MFP);
- if (test_sta_flag(sta, WLAN_STA_AUTH))
- sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHENTICATED);
- if (test_sta_flag(sta, WLAN_STA_ASSOC))
- sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_ASSOCIATED);
- if (test_sta_flag(sta, WLAN_STA_TDLS_PEER))
- sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER);
-
- /* check if the driver has a SW RC implementation */
- if (ref && ref->ops->get_expected_throughput)
- thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv);
- else
- thr = drv_get_expected_throughput(local, &sta->sta);
-
- if (thr != 0) {
- sinfo->filled |= STATION_INFO_EXPECTED_THROUGHPUT;
- sinfo->expected_throughput = thr;
- }
-}
-
-static const char ieee80211_gstrings_sta_stats[][ETH_GSTRING_LEN] = {
- "rx_packets", "rx_bytes", "wep_weak_iv_count",
- "rx_duplicates", "rx_fragments", "rx_dropped",
- "tx_packets", "tx_bytes", "tx_fragments",
- "tx_filtered", "tx_retry_failed", "tx_retries",
- "beacon_loss", "sta_state", "txrate", "rxrate", "signal",
- "channel", "noise", "ch_time", "ch_time_busy",
- "ch_time_ext_busy", "ch_time_rx", "ch_time_tx"
-};
-#define STA_STATS_LEN ARRAY_SIZE(ieee80211_gstrings_sta_stats)
-
-static int ieee80211_get_et_sset_count(struct wiphy *wiphy,
- struct net_device *dev,
- int sset)
-{
- struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
- int rv = 0;
-
- if (sset == ETH_SS_STATS)
- rv += STA_STATS_LEN;
-
- rv += drv_get_et_sset_count(sdata, sset);
-
- if (rv == 0)
- return -EOPNOTSUPP;
- return rv;
-}
-
-static void ieee80211_get_et_stats(struct wiphy *wiphy,
- struct net_device *dev,
- struct ethtool_stats *stats,
- u64 *data)
-{
- struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
- struct ieee80211_chanctx_conf *chanctx_conf;
- struct ieee80211_channel *channel;
- struct sta_info *sta;
- struct ieee80211_local *local = sdata->local;
- struct station_info sinfo;
- struct survey_info survey;
- int i, q;
-#define STA_STATS_SURVEY_LEN 7
-
- memset(data, 0, sizeof(u64) * STA_STATS_LEN);
-
-#define ADD_STA_STATS(sta) \
- do { \
- data[i++] += sta->rx_packets; \
- data[i++] += sta->rx_bytes; \
- data[i++] += sta->wep_weak_iv_count; \
- data[i++] += sta->num_duplicates; \
- data[i++] += sta->rx_fragments; \
- data[i++] += sta->rx_dropped; \
- \
- data[i++] += sinfo.tx_packets; \
- data[i++] += sinfo.tx_bytes; \
- data[i++] += sta->tx_fragments; \
- data[i++] += sta->tx_filtered_count; \
- data[i++] += sta->tx_retry_failed; \
- data[i++] += sta->tx_retry_count; \
- data[i++] += sta->beacon_loss_count; \
- } while (0)
-
- /* For Managed stations, find the single station based on BSSID
- * and use that. For interface types, iterate through all available
- * stations and add stats for any station that is assigned to this
- * network device.
- */
-
- mutex_lock(&local->sta_mtx);
-
- if (sdata->vif.type == NL80211_IFTYPE_STATION) {
- sta = sta_info_get_bss(sdata, sdata->u.mgd.bssid);
-
- if (!(sta && !WARN_ON(sta->sdata->dev != dev)))
- goto do_survey;
-
- sinfo.filled = 0;
- sta_set_sinfo(sta, &sinfo);
-
- i = 0;
- ADD_STA_STATS(sta);
-
- data[i++] = sta->sta_state;
-
-
- if (sinfo.filled & STATION_INFO_TX_BITRATE)
- data[i] = 100000 *
- cfg80211_calculate_bitrate(&sinfo.txrate);
- i++;
- if (sinfo.filled & STATION_INFO_RX_BITRATE)
- data[i] = 100000 *
- cfg80211_calculate_bitrate(&sinfo.rxrate);
- i++;
-
- if (sinfo.filled & STATION_INFO_SIGNAL_AVG)
- data[i] = (u8)sinfo.signal_avg;
- i++;
- } else {
- list_for_each_entry(sta, &local->sta_list, list) {
- /* Make sure this station belongs to the proper dev */
- if (sta->sdata->dev != dev)
- continue;
-
- sinfo.filled = 0;
- sta_set_sinfo(sta, &sinfo);
- i = 0;
- ADD_STA_STATS(sta);
- }
- }
-
-do_survey:
- i = STA_STATS_LEN - STA_STATS_SURVEY_LEN;
- /* Get survey stats for current channel */
- survey.filled = 0;
-
- rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
- if (chanctx_conf)
- channel = chanctx_conf->def.chan;
- else
- channel = NULL;
- rcu_read_unlock();
-
- if (channel) {
- q = 0;
- do {
- survey.filled = 0;
- if (drv_get_survey(local, q, &survey) != 0) {
- survey.filled = 0;
- break;
- }
- q++;
- } while (channel != survey.channel);
- }
-
- if (survey.filled)
- data[i++] = survey.channel->center_freq;
- else
- data[i++] = 0;
- if (survey.filled & SURVEY_INFO_NOISE_DBM)
- data[i++] = (u8)survey.noise;
- else
- data[i++] = -1LL;
- if (survey.filled & SURVEY_INFO_CHANNEL_TIME)
- data[i++] = survey.channel_time;
- else
- data[i++] = -1LL;
- if (survey.filled & SURVEY_INFO_CHANNEL_TIME_BUSY)
- data[i++] = survey.channel_time_busy;
- else
- data[i++] = -1LL;
- if (survey.filled & SURVEY_INFO_CHANNEL_TIME_EXT_BUSY)
- data[i++] = survey.channel_time_ext_busy;
- else
- data[i++] = -1LL;
- if (survey.filled & SURVEY_INFO_CHANNEL_TIME_RX)
- data[i++] = survey.channel_time_rx;
- else
- data[i++] = -1LL;
- if (survey.filled & SURVEY_INFO_CHANNEL_TIME_TX)
- data[i++] = survey.channel_time_tx;
- else
- data[i++] = -1LL;
-
- mutex_unlock(&local->sta_mtx);
-
- if (WARN_ON(i != STA_STATS_LEN))
- return;
-
- drv_get_et_stats(sdata, stats, &(data[STA_STATS_LEN]));
-}
-
-static void ieee80211_get_et_strings(struct wiphy *wiphy,
- struct net_device *dev,
- u32 sset, u8 *data)
-{
- struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
- int sz_sta_stats = 0;
-
- if (sset == ETH_SS_STATS) {
- sz_sta_stats = sizeof(ieee80211_gstrings_sta_stats);
- memcpy(data, ieee80211_gstrings_sta_stats, sz_sta_stats);
- }
- drv_get_et_strings(sdata, sset, &(data[sz_sta_stats]));
-}
-
static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev,
int idx, u8 *mac, struct station_info *sinfo)
{
@@ -878,7 +555,8 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy,
}
static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata,
- const u8 *resp, size_t resp_len)
+ const u8 *resp, size_t resp_len,
+ const struct ieee80211_csa_settings *csa)
{
struct probe_resp *new, *old;
@@ -894,6 +572,11 @@ static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata,
new->len = resp_len;
memcpy(new->data, resp, resp_len);
+ if (csa)
+ memcpy(new->csa_counter_offsets, csa->counter_offsets_presp,
+ csa->n_counter_offsets_presp *
+ sizeof(new->csa_counter_offsets[0]));
+
rcu_assign_pointer(sdata->u.ap.probe_resp, new);
if (old)
kfree_rcu(old, rcu_head);
@@ -902,7 +585,8 @@ static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata,
}
static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
- struct cfg80211_beacon_data *params)
+ struct cfg80211_beacon_data *params,
+ const struct ieee80211_csa_settings *csa)
{
struct beacon_data *new, *old;
int new_head_len, new_tail_len;
@@ -946,6 +630,13 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
new->head_len = new_head_len;
new->tail_len = new_tail_len;
+ if (csa) {
+ new->csa_current_counter = csa->count;
+ memcpy(new->csa_counter_offsets, csa->counter_offsets_beacon,
+ csa->n_counter_offsets_beacon *
+ sizeof(new->csa_counter_offsets[0]));
+ }
+
/* copy in head */
if (params->head)
memcpy(new->head, params->head, new_head_len);
@@ -960,7 +651,7 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
memcpy(new->tail, old->tail, new_tail_len);
err = ieee80211_set_probe_resp(sdata, params->probe_resp,
- params->probe_resp_len);
+ params->probe_resp_len, csa);
if (err < 0)
return err;
if (err == 0)
@@ -992,8 +683,19 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
if (old)
return -EALREADY;
- /* TODO: make hostapd tell us what it wants */
- sdata->smps_mode = IEEE80211_SMPS_OFF;
+ switch (params->smps_mode) {
+ case NL80211_SMPS_OFF:
+ sdata->smps_mode = IEEE80211_SMPS_OFF;
+ break;
+ case NL80211_SMPS_STATIC:
+ sdata->smps_mode = IEEE80211_SMPS_STATIC;
+ break;
+ case NL80211_SMPS_DYNAMIC:
+ sdata->smps_mode = IEEE80211_SMPS_DYNAMIC;
+ break;
+ default:
+ return -EINVAL;
+ }
sdata->needed_rx_chains = sdata->local->rx_chains;
mutex_lock(&local->mtx);
@@ -1045,7 +747,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow |=
IEEE80211_P2P_OPPPS_ENABLE_BIT;
- err = ieee80211_assign_beacon(sdata, &params->beacon);
+ err = ieee80211_assign_beacon(sdata, &params->beacon, NULL);
if (err < 0) {
ieee80211_vif_release_channel(sdata);
return err;
@@ -1093,38 +795,13 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev,
if (!old)
return -ENOENT;
- err = ieee80211_assign_beacon(sdata, params);
+ err = ieee80211_assign_beacon(sdata, params, NULL);
if (err < 0)
return err;
ieee80211_bss_info_change_notify(sdata, err);
return 0;
}
-bool ieee80211_csa_needs_block_tx(struct ieee80211_local *local)
-{
- struct ieee80211_sub_if_data *sdata;
-
- lockdep_assert_held(&local->mtx);
-
- rcu_read_lock();
- list_for_each_entry_rcu(sdata, &local->interfaces, list) {
- if (!ieee80211_sdata_running(sdata))
- continue;
-
- if (!sdata->vif.csa_active)
- continue;
-
- if (!sdata->csa_block_tx)
- continue;
-
- rcu_read_unlock();
- return true;
- }
- rcu_read_unlock();
-
- return false;
-}
-
static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -1144,10 +821,12 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
/* abort any running channel switch */
mutex_lock(&local->mtx);
sdata->vif.csa_active = false;
- if (!ieee80211_csa_needs_block_tx(local))
- ieee80211_wake_queues_by_reason(&local->hw,
- IEEE80211_MAX_QUEUE_MAP,
- IEEE80211_QUEUE_STOP_REASON_CSA);
+ if (sdata->csa_block_tx) {
+ ieee80211_wake_vif_queues(local, sdata,
+ IEEE80211_QUEUE_STOP_REASON_CSA);
+ sdata->csa_block_tx = false;
+ }
+
mutex_unlock(&local->mtx);
kfree(sdata->u.ap.next_beacon);
@@ -1330,9 +1009,12 @@ static int sta_apply_parameters(struct ieee80211_local *local,
}
}
- ret = sta_apply_auth_flags(local, sta, mask, set);
- if (ret)
- return ret;
+ /* auth flags will be set later for TDLS stations */
+ if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
+ ret = sta_apply_auth_flags(local, sta, mask, set);
+ if (ret)
+ return ret;
+ }
if (mask & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) {
if (set & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE))
@@ -1341,15 +1023,8 @@ static int sta_apply_parameters(struct ieee80211_local *local,
clear_sta_flag(sta, WLAN_STA_SHORT_PREAMBLE);
}
- if (mask & BIT(NL80211_STA_FLAG_WME)) {
- if (set & BIT(NL80211_STA_FLAG_WME)) {
- set_sta_flag(sta, WLAN_STA_WME);
- sta->sta.wme = true;
- } else {
- clear_sta_flag(sta, WLAN_STA_WME);
- sta->sta.wme = false;
- }
- }
+ if (mask & BIT(NL80211_STA_FLAG_WME))
+ sta->sta.wme = set & BIT(NL80211_STA_FLAG_WME);
if (mask & BIT(NL80211_STA_FLAG_MFP)) {
if (set & BIT(NL80211_STA_FLAG_MFP))
@@ -1469,6 +1144,13 @@ static int sta_apply_parameters(struct ieee80211_local *local,
#endif
}
+ /* set the STA state after all sta info from usermode has been set */
+ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
+ ret = sta_apply_auth_flags(local, sta, mask, set);
+ if (ret)
+ return ret;
+ }
+
return 0;
}
@@ -2307,8 +1989,13 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed)
return err;
}
- if (changed & WIPHY_PARAM_COVERAGE_CLASS) {
- err = drv_set_coverage_class(local, wiphy->coverage_class);
+ if ((changed & WIPHY_PARAM_COVERAGE_CLASS) ||
+ (changed & WIPHY_PARAM_DYN_ACK)) {
+ s16 coverage_class;
+
+ coverage_class = changed & WIPHY_PARAM_COVERAGE_CLASS ?
+ wiphy->coverage_class : -1;
+ err = drv_set_coverage_class(local, coverage_class);
if (err)
return err;
@@ -2681,6 +2368,58 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
return 0;
}
+static bool ieee80211_coalesce_started_roc(struct ieee80211_local *local,
+ struct ieee80211_roc_work *new_roc,
+ struct ieee80211_roc_work *cur_roc)
+{
+ unsigned long j = jiffies;
+ unsigned long cur_roc_end = cur_roc->hw_start_time +
+ msecs_to_jiffies(cur_roc->duration);
+ struct ieee80211_roc_work *next_roc;
+ int new_dur;
+
+ if (WARN_ON(!cur_roc->started || !cur_roc->hw_begun))
+ return false;
+
+ if (time_after(j + IEEE80211_ROC_MIN_LEFT, cur_roc_end))
+ return false;
+
+ ieee80211_handle_roc_started(new_roc);
+
+ new_dur = new_roc->duration - jiffies_to_msecs(cur_roc_end - j);
+
+ /* cur_roc is long enough - add new_roc to the dependents list. */
+ if (new_dur <= 0) {
+ list_add_tail(&new_roc->list, &cur_roc->dependents);
+ return true;
+ }
+
+ new_roc->duration = new_dur;
+
+ /*
+ * if cur_roc was already coalesced before, we might
+ * want to extend the next roc instead of adding
+ * a new one.
+ */
+ next_roc = list_entry(cur_roc->list.next,
+ struct ieee80211_roc_work, list);
+ if (&next_roc->list != &local->roc_list &&
+ next_roc->chan == new_roc->chan &&
+ next_roc->sdata == new_roc->sdata &&
+ !WARN_ON(next_roc->started)) {
+ list_add_tail(&new_roc->list, &next_roc->dependents);
+ next_roc->duration = max(next_roc->duration,
+ new_roc->duration);
+ next_roc->type = max(next_roc->type, new_roc->type);
+ return true;
+ }
+
+ /* add right after cur_roc */
+ list_add(&new_roc->list, &cur_roc->list);
+
+ return true;
+}
+
static int ieee80211_start_roc_work(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct ieee80211_channel *channel,
@@ -2786,8 +2525,6 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
/* If it has already started, it's more difficult ... */
if (local->ops->remain_on_channel) {
- unsigned long j = jiffies;
-
/*
* In the offloaded ROC case, if it hasn't begun, add
* this new one to the dependent list to be handled
@@ -2810,28 +2547,8 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
break;
}
- if (time_before(j + IEEE80211_ROC_MIN_LEFT,
- tmp->hw_start_time +
- msecs_to_jiffies(tmp->duration))) {
- int new_dur;
-
- ieee80211_handle_roc_started(roc);
-
- new_dur = roc->duration -
- jiffies_to_msecs(tmp->hw_start_time +
- msecs_to_jiffies(
- tmp->duration) -
- j);
-
- if (new_dur > 0) {
- /* add right after tmp */
- list_add(&roc->list, &tmp->list);
- } else {
- list_add_tail(&roc->list,
- &tmp->dependents);
- }
+ if (ieee80211_coalesce_started_roc(local, roc, tmp))
queued = true;
- }
} else if (del_timer_sync(&tmp->work.timer)) {
unsigned long new_end;
@@ -3076,7 +2793,8 @@ static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata,
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP:
- err = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon);
+ err = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon,
+ NULL);
kfree(sdata->u.ap.next_beacon);
sdata->u.ap.next_beacon = NULL;
@@ -3114,17 +2832,35 @@ static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata)
sdata_assert_lock(sdata);
lockdep_assert_held(&local->mtx);
+ lockdep_assert_held(&local->chanctx_mtx);
- sdata->radar_required = sdata->csa_radar_required;
- err = ieee80211_vif_change_channel(sdata, &changed);
- if (err < 0)
- return err;
+ /*
+ * using reservation isn't immediate as it may be deferred until later
+ * with multi-vif. once reservation is complete it will re-schedule the
+ * work with no reserved_chanctx so verify chandef to check if it
+ * completed successfully
+ */
- if (!local->use_chanctx) {
- local->_oper_chandef = sdata->csa_chandef;
- ieee80211_hw_config(local, 0);
+ if (sdata->reserved_chanctx) {
+ /*
+ * with multi-vif csa driver may call ieee80211_csa_finish()
+ * many times while waiting for other interfaces to use their
+ * reservations
+ */
+ if (sdata->reserved_ready)
+ return 0;
+
+ err = ieee80211_vif_use_reserved_context(sdata);
+ if (err)
+ return err;
+
+ return 0;
}
+ if (!cfg80211_chandef_identical(&sdata->vif.bss_conf.chandef,
+ &sdata->csa_chandef))
+ return -EINVAL;
+
sdata->vif.csa_active = false;
err = ieee80211_set_after_csa_beacon(sdata, &changed);
@@ -3134,10 +2870,11 @@ static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata)
ieee80211_bss_info_change_notify(sdata, changed);
cfg80211_ch_switch_notify(sdata->dev, &sdata->csa_chandef);
- if (!ieee80211_csa_needs_block_tx(local))
- ieee80211_wake_queues_by_reason(&local->hw,
- IEEE80211_MAX_QUEUE_MAP,
- IEEE80211_QUEUE_STOP_REASON_CSA);
+ if (sdata->csa_block_tx) {
+ ieee80211_wake_vif_queues(local, sdata,
+ IEEE80211_QUEUE_STOP_REASON_CSA);
+ sdata->csa_block_tx = false;
+ }
return 0;
}
@@ -3160,6 +2897,7 @@ void ieee80211_csa_finalize_work(struct work_struct *work)
sdata_lock(sdata);
mutex_lock(&local->mtx);
+ mutex_lock(&local->chanctx_mtx);
/* AP might have been stopped while waiting for the lock. */
if (!sdata->vif.csa_active)
@@ -3171,6 +2909,7 @@ void ieee80211_csa_finalize_work(struct work_struct *work)
ieee80211_csa_finalize(sdata);
unlock:
+ mutex_unlock(&local->chanctx_mtx);
mutex_unlock(&local->mtx);
sdata_unlock(sdata);
}
@@ -3179,6 +2918,7 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
struct cfg80211_csa_settings *params,
u32 *changed)
{
+ struct ieee80211_csa_settings csa = {};
int err;
switch (sdata->vif.type) {
@@ -3213,20 +2953,13 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
IEEE80211_MAX_CSA_COUNTERS_NUM))
return -EINVAL;
- /* make sure we don't have garbage in other counters */
- memset(sdata->csa_counter_offset_beacon, 0,
- sizeof(sdata->csa_counter_offset_beacon));
- memset(sdata->csa_counter_offset_presp, 0,
- sizeof(sdata->csa_counter_offset_presp));
+ csa.counter_offsets_beacon = params->counter_offsets_beacon;
+ csa.counter_offsets_presp = params->counter_offsets_presp;
+ csa.n_counter_offsets_beacon = params->n_counter_offsets_beacon;
+ csa.n_counter_offsets_presp = params->n_counter_offsets_presp;
+ csa.count = params->count;
- memcpy(sdata->csa_counter_offset_beacon,
- params->counter_offsets_beacon,
- params->n_counter_offsets_beacon * sizeof(u16));
- memcpy(sdata->csa_counter_offset_presp,
- params->counter_offsets_presp,
- params->n_counter_offsets_presp * sizeof(u16));
-
- err = ieee80211_assign_beacon(sdata, &params->beacon_csa);
+ err = ieee80211_assign_beacon(sdata, &params->beacon_csa, &csa);
if (err < 0) {
kfree(sdata->u.ap.next_beacon);
return err;
@@ -3322,7 +3055,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx_conf *conf;
struct ieee80211_chanctx *chanctx;
- int err, num_chanctx, changed = 0;
+ int err, changed = 0;
sdata_assert_lock(sdata);
lockdep_assert_held(&local->mtx);
@@ -3337,46 +3070,50 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
&sdata->vif.bss_conf.chandef))
return -EINVAL;
+ /* don't allow another channel switch if one is already active. */
+ if (sdata->vif.csa_active)
+ return -EBUSY;
+
mutex_lock(&local->chanctx_mtx);
conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
lockdep_is_held(&local->chanctx_mtx));
if (!conf) {
- mutex_unlock(&local->chanctx_mtx);
- return -EBUSY;
+ err = -EBUSY;
+ goto out;
}
- /* don't handle for multi-VIF cases */
chanctx = container_of(conf, struct ieee80211_chanctx, conf);
- if (ieee80211_chanctx_refcount(local, chanctx) > 1) {
- mutex_unlock(&local->chanctx_mtx);
- return -EBUSY;
+ if (!chanctx) {
+ err = -EBUSY;
+ goto out;
}
- num_chanctx = 0;
- list_for_each_entry_rcu(chanctx, &local->chanctx_list, list)
- num_chanctx++;
- mutex_unlock(&local->chanctx_mtx);
- if (num_chanctx > 1)
- return -EBUSY;
+ err = ieee80211_vif_reserve_chanctx(sdata, &params->chandef,
+ chanctx->mode,
+ params->radar_required);
+ if (err)
+ goto out;
- /* don't allow another channel switch if one is already active. */
- if (sdata->vif.csa_active)
- return -EBUSY;
+ /* if reservation is invalid then this will fail */
+ err = ieee80211_check_combinations(sdata, NULL, chanctx->mode, 0);
+ if (err) {
+ ieee80211_vif_unreserve_chanctx(sdata);
+ goto out;
+ }
err = ieee80211_set_csa_beacon(sdata, params, &changed);
- if (err)
- return err;
+ if (err) {
+ ieee80211_vif_unreserve_chanctx(sdata);
+ goto out;
+ }
- sdata->csa_radar_required = params->radar_required;
sdata->csa_chandef = params->chandef;
sdata->csa_block_tx = params->block_tx;
- sdata->csa_current_counter = params->count;
sdata->vif.csa_active = true;
if (sdata->csa_block_tx)
- ieee80211_stop_queues_by_reason(&local->hw,
- IEEE80211_MAX_QUEUE_MAP,
- IEEE80211_QUEUE_STOP_REASON_CSA);
+ ieee80211_stop_vif_queues(local, sdata,
+ IEEE80211_QUEUE_STOP_REASON_CSA);
if (changed) {
ieee80211_bss_info_change_notify(sdata, changed);
@@ -3386,7 +3123,9 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
ieee80211_csa_finalize(sdata);
}
- return 0;
+out:
+ mutex_unlock(&local->chanctx_mtx);
+ return err;
}
int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
@@ -3518,10 +3257,23 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
sdata->vif.type == NL80211_IFTYPE_ADHOC) &&
params->n_csa_offsets) {
int i;
- u8 c = sdata->csa_current_counter;
+ struct beacon_data *beacon = NULL;
+
+ rcu_read_lock();
+
+ if (sdata->vif.type == NL80211_IFTYPE_AP)
+ beacon = rcu_dereference(sdata->u.ap.beacon);
+ else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
+ beacon = rcu_dereference(sdata->u.ibss.presp);
+ else if (ieee80211_vif_is_mesh(&sdata->vif))
+ beacon = rcu_dereference(sdata->u.mesh.beacon);
- for (i = 0; i < params->n_csa_offsets; i++)
- data[params->csa_offsets[i]] = c;
+ if (beacon)
+ for (i = 0; i < params->n_csa_offsets; i++)
+ data[params->csa_offsets[i]] =
+ beacon->csa_current_counter;
+
+ rcu_read_unlock();
}
IEEE80211_SKB_CB(skb)->flags = flags;
@@ -3601,21 +3353,6 @@ static int ieee80211_get_antenna(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant)
return drv_get_antenna(local, tx_ant, rx_ant);
}
-static int ieee80211_set_ringparam(struct wiphy *wiphy, u32 tx, u32 rx)
-{
- struct ieee80211_local *local = wiphy_priv(wiphy);
-
- return drv_set_ringparam(local, tx, rx);
-}
-
-static void ieee80211_get_ringparam(struct wiphy *wiphy,
- u32 *tx, u32 *tx_max, u32 *rx, u32 *rx_max)
-{
- struct ieee80211_local *local = wiphy_priv(wiphy);
-
- drv_get_ringparam(local, tx, tx_max, rx, rx_max);
-}
-
static int ieee80211_set_rekey_data(struct wiphy *wiphy,
struct net_device *dev,
struct cfg80211_gtk_rekey_data *data)
@@ -3655,7 +3392,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
band = chanctx_conf->def.chan->band;
sta = sta_info_get_bss(sdata, peer);
if (sta) {
- qos = test_sta_flag(sta, WLAN_STA_WME);
+ qos = sta->sta.wme;
} else {
rcu_read_unlock();
return -ENOLINK;
@@ -3847,8 +3584,6 @@ const struct cfg80211_ops mac80211_config_ops = {
.mgmt_frame_register = ieee80211_mgmt_frame_register,
.set_antenna = ieee80211_set_antenna,
.get_antenna = ieee80211_get_antenna,
- .set_ringparam = ieee80211_set_ringparam,
- .get_ringparam = ieee80211_get_ringparam,
.set_rekey_data = ieee80211_set_rekey_data,
.tdls_oper = ieee80211_tdls_oper,
.tdls_mgmt = ieee80211_tdls_mgmt,
@@ -3857,9 +3592,6 @@ const struct cfg80211_ops mac80211_config_ops = {
#ifdef CONFIG_PM
.set_wakeup = ieee80211_set_wakeup,
#endif
- .get_et_sset_count = ieee80211_get_et_sset_count,
- .get_et_stats = ieee80211_get_et_stats,
- .get_et_strings = ieee80211_get_et_strings,
.get_channel = ieee80211_cfg_get_channel,
.start_radar_detection = ieee80211_start_radar_detection,
.channel_switch = ieee80211_channel_switch,
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index a310e33972de..4c74e8da64b9 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -63,6 +63,20 @@ static bool ieee80211_can_create_new_chanctx(struct ieee80211_local *local)
return ieee80211_num_chanctx(local) < ieee80211_max_num_channels(local);
}
+static struct ieee80211_chanctx *
+ieee80211_vif_get_chanctx(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local __maybe_unused = sdata->local;
+ struct ieee80211_chanctx_conf *conf;
+
+ conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+ lockdep_is_held(&local->chanctx_mtx));
+ if (!conf)
+ return NULL;
+
+ return container_of(conf, struct ieee80211_chanctx, conf);
+}
+
static const struct cfg80211_chan_def *
ieee80211_chanctx_reserved_chandef(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx,
@@ -160,6 +174,9 @@ ieee80211_find_reservation_chanctx(struct ieee80211_local *local,
return NULL;
list_for_each_entry(ctx, &local->chanctx_list, list) {
+ if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED)
+ continue;
+
if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE)
continue;
@@ -347,6 +364,9 @@ ieee80211_find_chanctx(struct ieee80211_local *local,
list_for_each_entry(ctx, &local->chanctx_list, list) {
const struct cfg80211_chan_def *compat;
+ if (ctx->replace_state != IEEE80211_CHANCTX_REPLACE_NONE)
+ continue;
+
if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE)
continue;
@@ -521,18 +541,20 @@ static void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local,
continue;
if (rcu_access_pointer(sdata->vif.chanctx_conf) != conf)
continue;
+ if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+ continue;
if (!compat)
compat = &sdata->vif.bss_conf.chandef;
compat = cfg80211_chandef_compatible(
&sdata->vif.bss_conf.chandef, compat);
- if (!compat)
+ if (WARN_ON_ONCE(!compat))
break;
}
rcu_read_unlock();
- if (WARN_ON_ONCE(!compat))
+ if (!compat)
return;
ieee80211_change_chanctx(local, ctx, compat);
@@ -617,29 +639,6 @@ out:
return ret;
}
-static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata)
-{
- struct ieee80211_local *local = sdata->local;
- struct ieee80211_chanctx_conf *conf;
- struct ieee80211_chanctx *ctx;
-
- lockdep_assert_held(&local->chanctx_mtx);
-
- conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
- lockdep_is_held(&local->chanctx_mtx));
- if (!conf)
- return;
-
- ctx = container_of(conf, struct ieee80211_chanctx, conf);
-
- if (sdata->reserved_chanctx)
- ieee80211_vif_unreserve_chanctx(sdata);
-
- ieee80211_assign_vif_chanctx(sdata, NULL);
- if (ieee80211_chanctx_refcount(local, ctx) == 0)
- ieee80211_free_chanctx(local, ctx);
-}
-
void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local,
struct ieee80211_chanctx *chanctx)
{
@@ -730,127 +729,6 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local,
drv_change_chanctx(local, chanctx, IEEE80211_CHANCTX_CHANGE_RX_CHAINS);
}
-int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata,
- const struct cfg80211_chan_def *chandef,
- enum ieee80211_chanctx_mode mode)
-{
- struct ieee80211_local *local = sdata->local;
- struct ieee80211_chanctx *ctx;
- u8 radar_detect_width = 0;
- int ret;
-
- lockdep_assert_held(&local->mtx);
-
- WARN_ON(sdata->dev && netif_carrier_ok(sdata->dev));
-
- mutex_lock(&local->chanctx_mtx);
-
- ret = cfg80211_chandef_dfs_required(local->hw.wiphy,
- chandef,
- sdata->wdev.iftype);
- if (ret < 0)
- goto out;
- if (ret > 0)
- radar_detect_width = BIT(chandef->width);
-
- sdata->radar_required = ret;
-
- ret = ieee80211_check_combinations(sdata, chandef, mode,
- radar_detect_width);
- if (ret < 0)
- goto out;
-
- __ieee80211_vif_release_channel(sdata);
-
- ctx = ieee80211_find_chanctx(local, chandef, mode);
- if (!ctx)
- ctx = ieee80211_new_chanctx(local, chandef, mode);
- if (IS_ERR(ctx)) {
- ret = PTR_ERR(ctx);
- goto out;
- }
-
- sdata->vif.bss_conf.chandef = *chandef;
-
- ret = ieee80211_assign_vif_chanctx(sdata, ctx);
- if (ret) {
- /* if assign fails refcount stays the same */
- if (ieee80211_chanctx_refcount(local, ctx) == 0)
- ieee80211_free_chanctx(local, ctx);
- goto out;
- }
-
- ieee80211_recalc_smps_chanctx(local, ctx);
- ieee80211_recalc_radar_chanctx(local, ctx);
- out:
- mutex_unlock(&local->chanctx_mtx);
- return ret;
-}
-
-static int __ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata,
- struct ieee80211_chanctx *ctx,
- u32 *changed)
-{
- struct ieee80211_local *local = sdata->local;
- const struct cfg80211_chan_def *chandef = &sdata->csa_chandef;
- u32 chanctx_changed = 0;
-
- if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef,
- IEEE80211_CHAN_DISABLED))
- return -EINVAL;
-
- if (ieee80211_chanctx_refcount(local, ctx) != 1)
- return -EINVAL;
-
- if (sdata->vif.bss_conf.chandef.width != chandef->width) {
- chanctx_changed = IEEE80211_CHANCTX_CHANGE_WIDTH;
- *changed |= BSS_CHANGED_BANDWIDTH;
- }
-
- sdata->vif.bss_conf.chandef = *chandef;
- ctx->conf.def = *chandef;
-
- chanctx_changed |= IEEE80211_CHANCTX_CHANGE_CHANNEL;
- drv_change_chanctx(local, ctx, chanctx_changed);
-
- ieee80211_recalc_chanctx_chantype(local, ctx);
- ieee80211_recalc_smps_chanctx(local, ctx);
- ieee80211_recalc_radar_chanctx(local, ctx);
- ieee80211_recalc_chanctx_min_def(local, ctx);
-
- return 0;
-}
-
-int ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata,
- u32 *changed)
-{
- struct ieee80211_local *local = sdata->local;
- struct ieee80211_chanctx_conf *conf;
- struct ieee80211_chanctx *ctx;
- int ret;
-
- lockdep_assert_held(&local->mtx);
-
- /* should never be called if not performing a channel switch. */
- if (WARN_ON(!sdata->vif.csa_active))
- return -EINVAL;
-
- mutex_lock(&local->chanctx_mtx);
- conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
- lockdep_is_held(&local->chanctx_mtx));
- if (!conf) {
- ret = -EINVAL;
- goto out;
- }
-
- ctx = container_of(conf, struct ieee80211_chanctx, conf);
-
- ret = __ieee80211_vif_change_channel(sdata, ctx, changed);
- out:
- mutex_unlock(&local->chanctx_mtx);
- return ret;
-}
-
static void
__ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata,
bool clear)
@@ -905,8 +783,25 @@ int ieee80211_vif_unreserve_chanctx(struct ieee80211_sub_if_data *sdata)
list_del(&sdata->reserved_chanctx_list);
sdata->reserved_chanctx = NULL;
- if (ieee80211_chanctx_refcount(sdata->local, ctx) == 0)
- ieee80211_free_chanctx(sdata->local, ctx);
+ if (ieee80211_chanctx_refcount(sdata->local, ctx) == 0) {
+ if (ctx->replace_state == IEEE80211_CHANCTX_REPLACES_OTHER) {
+ if (WARN_ON(!ctx->replace_ctx))
+ return -EINVAL;
+
+ WARN_ON(ctx->replace_ctx->replace_state !=
+ IEEE80211_CHANCTX_WILL_BE_REPLACED);
+ WARN_ON(ctx->replace_ctx->replace_ctx != ctx);
+
+ ctx->replace_ctx->replace_ctx = NULL;
+ ctx->replace_ctx->replace_state =
+ IEEE80211_CHANCTX_REPLACE_NONE;
+
+ list_del_rcu(&ctx->list);
+ kfree_rcu(ctx, rcu_head);
+ } else {
+ ieee80211_free_chanctx(sdata->local, ctx);
+ }
+ }
return 0;
}
@@ -917,40 +812,84 @@ int ieee80211_vif_reserve_chanctx(struct ieee80211_sub_if_data *sdata,
bool radar_required)
{
struct ieee80211_local *local = sdata->local;
- struct ieee80211_chanctx_conf *conf;
- struct ieee80211_chanctx *new_ctx, *curr_ctx;
- int ret = 0;
-
- mutex_lock(&local->chanctx_mtx);
+ struct ieee80211_chanctx *new_ctx, *curr_ctx, *ctx;
- conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
- lockdep_is_held(&local->chanctx_mtx));
- if (!conf) {
- ret = -EINVAL;
- goto out;
- }
+ lockdep_assert_held(&local->chanctx_mtx);
- curr_ctx = container_of(conf, struct ieee80211_chanctx, conf);
+ curr_ctx = ieee80211_vif_get_chanctx(sdata);
+ if (curr_ctx && local->use_chanctx && !local->ops->switch_vif_chanctx)
+ return -ENOTSUPP;
new_ctx = ieee80211_find_reservation_chanctx(local, chandef, mode);
if (!new_ctx) {
- if (ieee80211_chanctx_refcount(local, curr_ctx) == 1 &&
- (local->hw.flags & IEEE80211_HW_CHANGE_RUNNING_CHANCTX)) {
- /* if we're the only users of the chanctx and
- * the driver supports changing a running
- * context, reserve our current context
- */
- new_ctx = curr_ctx;
- } else if (ieee80211_can_create_new_chanctx(local)) {
- /* create a new context and reserve it */
+ if (ieee80211_can_create_new_chanctx(local)) {
new_ctx = ieee80211_new_chanctx(local, chandef, mode);
- if (IS_ERR(new_ctx)) {
- ret = PTR_ERR(new_ctx);
- goto out;
- }
+ if (IS_ERR(new_ctx))
+ return PTR_ERR(new_ctx);
} else {
- ret = -EBUSY;
- goto out;
+ if (!curr_ctx ||
+ (curr_ctx->replace_state ==
+ IEEE80211_CHANCTX_WILL_BE_REPLACED) ||
+ !list_empty(&curr_ctx->reserved_vifs)) {
+ /*
+ * Another vif already requested this context
+ * for a reservation. Find another one hoping
+ * all vifs assigned to it will also switch
+ * soon enough.
+ *
+ * TODO: This needs a little more work as some
+ * cases (more than 2 chanctx capable devices)
+ * may fail which could otherwise succeed
+ * provided some channel context juggling was
+ * performed.
+ *
+ * Consider ctx1..3, vif1..6, each ctx has 2
+ * vifs. vif1 and vif2 from ctx1 request new
+ * different chandefs starting 2 in-place
+ * reserations with ctx4 and ctx5 replacing
+ * ctx1 and ctx2 respectively. Next vif5 and
+ * vif6 from ctx3 reserve ctx4. If vif3 and
+ * vif4 remain on ctx2 as they are then this
+ * fails unless `replace_ctx` from ctx5 is
+ * replaced with ctx3.
+ */
+ list_for_each_entry(ctx, &local->chanctx_list,
+ list) {
+ if (ctx->replace_state !=
+ IEEE80211_CHANCTX_REPLACE_NONE)
+ continue;
+
+ if (!list_empty(&ctx->reserved_vifs))
+ continue;
+
+ curr_ctx = ctx;
+ break;
+ }
+ }
+
+ /*
+ * If that's true then all available contexts already
+ * have reservations and cannot be used.
+ */
+ if (!curr_ctx ||
+ (curr_ctx->replace_state ==
+ IEEE80211_CHANCTX_WILL_BE_REPLACED) ||
+ !list_empty(&curr_ctx->reserved_vifs))
+ return -EBUSY;
+
+ new_ctx = ieee80211_alloc_chanctx(local, chandef, mode);
+ if (!new_ctx)
+ return -ENOMEM;
+
+ new_ctx->replace_ctx = curr_ctx;
+ new_ctx->replace_state =
+ IEEE80211_CHANCTX_REPLACES_OTHER;
+
+ curr_ctx->replace_ctx = new_ctx;
+ curr_ctx->replace_state =
+ IEEE80211_CHANCTX_WILL_BE_REPLACED;
+
+ list_add_rcu(&new_ctx->list, &local->chanctx_list);
}
}
@@ -958,84 +897,694 @@ int ieee80211_vif_reserve_chanctx(struct ieee80211_sub_if_data *sdata,
sdata->reserved_chanctx = new_ctx;
sdata->reserved_chandef = *chandef;
sdata->reserved_radar_required = radar_required;
-out:
- mutex_unlock(&local->chanctx_mtx);
- return ret;
+ sdata->reserved_ready = false;
+
+ return 0;
}
-int ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata,
- u32 *changed)
+static void
+ieee80211_vif_chanctx_reservation_complete(struct ieee80211_sub_if_data *sdata)
{
- struct ieee80211_local *local = sdata->local;
- struct ieee80211_chanctx *ctx;
- struct ieee80211_chanctx *old_ctx;
- struct ieee80211_chanctx_conf *conf;
- int ret;
- u32 tmp_changed = *changed;
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_ADHOC:
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_MESH_POINT:
+ ieee80211_queue_work(&sdata->local->hw,
+ &sdata->csa_finalize_work);
+ break;
+ case NL80211_IFTYPE_STATION:
+ ieee80211_queue_work(&sdata->local->hw,
+ &sdata->u.mgd.chswitch_work);
+ break;
+ case NL80211_IFTYPE_UNSPECIFIED:
+ case NL80211_IFTYPE_AP_VLAN:
+ case NL80211_IFTYPE_WDS:
+ case NL80211_IFTYPE_MONITOR:
+ case NL80211_IFTYPE_P2P_CLIENT:
+ case NL80211_IFTYPE_P2P_GO:
+ case NL80211_IFTYPE_P2P_DEVICE:
+ case NUM_NL80211_IFTYPES:
+ WARN_ON(1);
+ break;
+ }
+}
- /* TODO: need to recheck if the chandef is usable etc.? */
+static int
+ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_vif_chanctx_switch vif_chsw[1] = {};
+ struct ieee80211_chanctx *old_ctx, *new_ctx;
+ const struct cfg80211_chan_def *chandef;
+ u32 changed = 0;
+ int err;
lockdep_assert_held(&local->mtx);
+ lockdep_assert_held(&local->chanctx_mtx);
- mutex_lock(&local->chanctx_mtx);
+ new_ctx = sdata->reserved_chanctx;
+ old_ctx = ieee80211_vif_get_chanctx(sdata);
- ctx = sdata->reserved_chanctx;
- if (WARN_ON(!ctx)) {
- ret = -EINVAL;
- goto out;
- }
+ if (WARN_ON(!sdata->reserved_ready))
+ return -EBUSY;
+
+ if (WARN_ON(!new_ctx))
+ return -EINVAL;
+
+ if (WARN_ON(!old_ctx))
+ return -EINVAL;
+
+ if (WARN_ON(new_ctx->replace_state ==
+ IEEE80211_CHANCTX_REPLACES_OTHER))
+ return -EINVAL;
+
+ chandef = ieee80211_chanctx_non_reserved_chandef(local, new_ctx,
+ &sdata->reserved_chandef);
+ if (WARN_ON(!chandef))
+ return -EINVAL;
+
+ vif_chsw[0].vif = &sdata->vif;
+ vif_chsw[0].old_ctx = &old_ctx->conf;
+ vif_chsw[0].new_ctx = &new_ctx->conf;
+
+ list_del(&sdata->reserved_chanctx_list);
+ sdata->reserved_chanctx = NULL;
+
+ err = drv_switch_vif_chanctx(local, vif_chsw, 1,
+ CHANCTX_SWMODE_REASSIGN_VIF);
+ if (err) {
+ if (ieee80211_chanctx_refcount(local, new_ctx) == 0)
+ ieee80211_free_chanctx(local, new_ctx);
- conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
- lockdep_is_held(&local->chanctx_mtx));
- if (!conf) {
- ret = -EINVAL;
goto out;
}
- old_ctx = container_of(conf, struct ieee80211_chanctx, conf);
+ list_move(&sdata->assigned_chanctx_list, &new_ctx->assigned_vifs);
+ rcu_assign_pointer(sdata->vif.chanctx_conf, &new_ctx->conf);
+
+ if (sdata->vif.type == NL80211_IFTYPE_AP)
+ __ieee80211_vif_copy_chanctx_to_vlans(sdata, false);
+
+ if (ieee80211_chanctx_refcount(local, old_ctx) == 0)
+ ieee80211_free_chanctx(local, old_ctx);
if (sdata->vif.bss_conf.chandef.width != sdata->reserved_chandef.width)
- tmp_changed |= BSS_CHANGED_BANDWIDTH;
+ changed = BSS_CHANGED_BANDWIDTH;
sdata->vif.bss_conf.chandef = sdata->reserved_chandef;
- /* unref our reservation */
- sdata->reserved_chanctx = NULL;
- sdata->radar_required = sdata->reserved_radar_required;
+ if (changed)
+ ieee80211_bss_info_change_notify(sdata, changed);
+
+out:
+ ieee80211_vif_chanctx_reservation_complete(sdata);
+ return err;
+}
+
+static int
+ieee80211_vif_use_reserved_assign(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_chanctx *old_ctx, *new_ctx;
+ const struct cfg80211_chan_def *chandef;
+ int err;
+
+ old_ctx = ieee80211_vif_get_chanctx(sdata);
+ new_ctx = sdata->reserved_chanctx;
+
+ if (WARN_ON(!sdata->reserved_ready))
+ return -EINVAL;
+
+ if (WARN_ON(old_ctx))
+ return -EINVAL;
+
+ if (WARN_ON(!new_ctx))
+ return -EINVAL;
+
+ if (WARN_ON(new_ctx->replace_state ==
+ IEEE80211_CHANCTX_REPLACES_OTHER))
+ return -EINVAL;
+
+ chandef = ieee80211_chanctx_non_reserved_chandef(local, new_ctx,
+ &sdata->reserved_chandef);
+ if (WARN_ON(!chandef))
+ return -EINVAL;
+
list_del(&sdata->reserved_chanctx_list);
+ sdata->reserved_chanctx = NULL;
- if (old_ctx == ctx) {
- /* This is our own context, just change it */
- ret = __ieee80211_vif_change_channel(sdata, old_ctx,
- &tmp_changed);
- if (ret)
+ err = ieee80211_assign_vif_chanctx(sdata, new_ctx);
+ if (err) {
+ if (ieee80211_chanctx_refcount(local, new_ctx) == 0)
+ ieee80211_free_chanctx(local, new_ctx);
+
+ goto out;
+ }
+
+out:
+ ieee80211_vif_chanctx_reservation_complete(sdata);
+ return err;
+}
+
+static bool
+ieee80211_vif_has_in_place_reservation(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_chanctx *old_ctx, *new_ctx;
+
+ lockdep_assert_held(&sdata->local->chanctx_mtx);
+
+ new_ctx = sdata->reserved_chanctx;
+ old_ctx = ieee80211_vif_get_chanctx(sdata);
+
+ if (!old_ctx)
+ return false;
+
+ if (WARN_ON(!new_ctx))
+ return false;
+
+ if (old_ctx->replace_state != IEEE80211_CHANCTX_WILL_BE_REPLACED)
+ return false;
+
+ if (new_ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER)
+ return false;
+
+ return true;
+}
+
+static int ieee80211_chsw_switch_hwconf(struct ieee80211_local *local,
+ struct ieee80211_chanctx *new_ctx)
+{
+ const struct cfg80211_chan_def *chandef;
+
+ lockdep_assert_held(&local->mtx);
+ lockdep_assert_held(&local->chanctx_mtx);
+
+ chandef = ieee80211_chanctx_reserved_chandef(local, new_ctx, NULL);
+ if (WARN_ON(!chandef))
+ return -EINVAL;
+
+ local->hw.conf.radar_enabled = new_ctx->conf.radar_enabled;
+ local->_oper_chandef = *chandef;
+ ieee80211_hw_config(local, 0);
+
+ return 0;
+}
+
+static int ieee80211_chsw_switch_vifs(struct ieee80211_local *local,
+ int n_vifs)
+{
+ struct ieee80211_vif_chanctx_switch *vif_chsw;
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_chanctx *ctx, *old_ctx;
+ int i, err;
+
+ lockdep_assert_held(&local->mtx);
+ lockdep_assert_held(&local->chanctx_mtx);
+
+ vif_chsw = kzalloc(sizeof(vif_chsw[0]) * n_vifs, GFP_KERNEL);
+ if (!vif_chsw)
+ return -ENOMEM;
+
+ i = 0;
+ list_for_each_entry(ctx, &local->chanctx_list, list) {
+ if (ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER)
+ continue;
+
+ if (WARN_ON(!ctx->replace_ctx)) {
+ err = -EINVAL;
goto out;
+ }
+
+ list_for_each_entry(sdata, &ctx->reserved_vifs,
+ reserved_chanctx_list) {
+ if (!ieee80211_vif_has_in_place_reservation(
+ sdata))
+ continue;
+
+ old_ctx = ieee80211_vif_get_chanctx(sdata);
+ vif_chsw[i].vif = &sdata->vif;
+ vif_chsw[i].old_ctx = &old_ctx->conf;
+ vif_chsw[i].new_ctx = &ctx->conf;
+
+ i++;
+ }
+ }
+
+ err = drv_switch_vif_chanctx(local, vif_chsw, n_vifs,
+ CHANCTX_SWMODE_SWAP_CONTEXTS);
+
+out:
+ kfree(vif_chsw);
+ return err;
+}
+
+static int ieee80211_chsw_switch_ctxs(struct ieee80211_local *local)
+{
+ struct ieee80211_chanctx *ctx;
+ int err;
+
+ lockdep_assert_held(&local->mtx);
+ lockdep_assert_held(&local->chanctx_mtx);
+
+ list_for_each_entry(ctx, &local->chanctx_list, list) {
+ if (ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER)
+ continue;
+
+ if (!list_empty(&ctx->replace_ctx->assigned_vifs))
+ continue;
+
+ ieee80211_del_chanctx(local, ctx->replace_ctx);
+ err = ieee80211_add_chanctx(local, ctx);
+ if (err)
+ goto err;
+ }
+
+ return 0;
+
+err:
+ WARN_ON(ieee80211_add_chanctx(local, ctx));
+ list_for_each_entry_continue_reverse(ctx, &local->chanctx_list, list) {
+ if (ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER)
+ continue;
+
+ if (!list_empty(&ctx->replace_ctx->assigned_vifs))
+ continue;
+
+ ieee80211_del_chanctx(local, ctx);
+ WARN_ON(ieee80211_add_chanctx(local, ctx->replace_ctx));
+ }
+
+ return err;
+}
+
+static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
+{
+ struct ieee80211_sub_if_data *sdata, *sdata_tmp;
+ struct ieee80211_chanctx *ctx, *ctx_tmp, *old_ctx;
+ struct ieee80211_chanctx *new_ctx = NULL;
+ int i, err, n_assigned, n_reserved, n_ready;
+ int n_ctx = 0, n_vifs_switch = 0, n_vifs_assign = 0, n_vifs_ctxless = 0;
+
+ lockdep_assert_held(&local->mtx);
+ lockdep_assert_held(&local->chanctx_mtx);
+
+ /*
+ * If there are 2 independent pairs of channel contexts performing
+ * cross-switch of their vifs this code will still wait until both are
+ * ready even though it could be possible to switch one before the
+ * other is ready.
+ *
+ * For practical reasons and code simplicity just do a single huge
+ * switch.
+ */
+
+ /*
+ * Verify if the reservation is still feasible.
+ * - if it's not then disconnect
+ * - if it is but not all vifs necessary are ready then defer
+ */
+
+ list_for_each_entry(ctx, &local->chanctx_list, list) {
+ if (ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER)
+ continue;
+
+ if (WARN_ON(!ctx->replace_ctx)) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ if (!local->use_chanctx)
+ new_ctx = ctx;
+
+ n_ctx++;
+
+ n_assigned = 0;
+ n_reserved = 0;
+ n_ready = 0;
+
+ list_for_each_entry(sdata, &ctx->replace_ctx->assigned_vifs,
+ assigned_chanctx_list) {
+ n_assigned++;
+ if (sdata->reserved_chanctx) {
+ n_reserved++;
+ if (sdata->reserved_ready)
+ n_ready++;
+ }
+ }
+
+ if (n_assigned != n_reserved) {
+ if (n_ready == n_reserved) {
+ wiphy_info(local->hw.wiphy,
+ "channel context reservation cannot be finalized because some interfaces aren't switching\n");
+ err = -EBUSY;
+ goto err;
+ }
+
+ return -EAGAIN;
+ }
+
+ ctx->conf.radar_enabled = false;
+ list_for_each_entry(sdata, &ctx->reserved_vifs,
+ reserved_chanctx_list) {
+ if (ieee80211_vif_has_in_place_reservation(sdata) &&
+ !sdata->reserved_ready)
+ return -EAGAIN;
+
+ old_ctx = ieee80211_vif_get_chanctx(sdata);
+ if (old_ctx) {
+ if (old_ctx->replace_state ==
+ IEEE80211_CHANCTX_WILL_BE_REPLACED)
+ n_vifs_switch++;
+ else
+ n_vifs_assign++;
+ } else {
+ n_vifs_ctxless++;
+ }
+
+ if (sdata->reserved_radar_required)
+ ctx->conf.radar_enabled = true;
+ }
+ }
+
+ if (WARN_ON(n_ctx == 0) ||
+ WARN_ON(n_vifs_switch == 0 &&
+ n_vifs_assign == 0 &&
+ n_vifs_ctxless == 0) ||
+ WARN_ON(n_ctx > 1 && !local->use_chanctx) ||
+ WARN_ON(!new_ctx && !local->use_chanctx)) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ /*
+ * All necessary vifs are ready. Perform the switch now depending on
+ * reservations and driver capabilities.
+ */
+
+ if (local->use_chanctx) {
+ if (n_vifs_switch > 0) {
+ err = ieee80211_chsw_switch_vifs(local, n_vifs_switch);
+ if (err)
+ goto err;
+ }
+
+ if (n_vifs_assign > 0 || n_vifs_ctxless > 0) {
+ err = ieee80211_chsw_switch_ctxs(local);
+ if (err)
+ goto err;
+ }
} else {
- ret = ieee80211_assign_vif_chanctx(sdata, ctx);
- if (ieee80211_chanctx_refcount(local, old_ctx) == 0)
- ieee80211_free_chanctx(local, old_ctx);
- if (ret) {
- /* if assign fails refcount stays the same */
- if (ieee80211_chanctx_refcount(local, ctx) == 0)
- ieee80211_free_chanctx(local, ctx);
- goto out;
+ err = ieee80211_chsw_switch_hwconf(local, new_ctx);
+ if (err)
+ goto err;
+ }
+
+ /*
+ * Update all structures, values and pointers to point to new channel
+ * context(s).
+ */
+
+ i = 0;
+ list_for_each_entry(ctx, &local->chanctx_list, list) {
+ if (ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER)
+ continue;
+
+ if (WARN_ON(!ctx->replace_ctx)) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ list_for_each_entry(sdata, &ctx->reserved_vifs,
+ reserved_chanctx_list) {
+ u32 changed = 0;
+
+ if (!ieee80211_vif_has_in_place_reservation(sdata))
+ continue;
+
+ rcu_assign_pointer(sdata->vif.chanctx_conf, &ctx->conf);
+
+ if (sdata->vif.type == NL80211_IFTYPE_AP)
+ __ieee80211_vif_copy_chanctx_to_vlans(sdata,
+ false);
+
+ sdata->radar_required = sdata->reserved_radar_required;
+
+ if (sdata->vif.bss_conf.chandef.width !=
+ sdata->reserved_chandef.width)
+ changed = BSS_CHANGED_BANDWIDTH;
+
+ sdata->vif.bss_conf.chandef = sdata->reserved_chandef;
+ if (changed)
+ ieee80211_bss_info_change_notify(sdata,
+ changed);
+
+ ieee80211_recalc_txpower(sdata);
+ }
+
+ ieee80211_recalc_chanctx_chantype(local, ctx);
+ ieee80211_recalc_smps_chanctx(local, ctx);
+ ieee80211_recalc_radar_chanctx(local, ctx);
+ ieee80211_recalc_chanctx_min_def(local, ctx);
+
+ list_for_each_entry_safe(sdata, sdata_tmp, &ctx->reserved_vifs,
+ reserved_chanctx_list) {
+ if (ieee80211_vif_get_chanctx(sdata) != ctx)
+ continue;
+
+ list_del(&sdata->reserved_chanctx_list);
+ list_move(&sdata->assigned_chanctx_list,
+ &ctx->assigned_vifs);
+ sdata->reserved_chanctx = NULL;
+
+ ieee80211_vif_chanctx_reservation_complete(sdata);
}
- if (sdata->vif.type == NL80211_IFTYPE_AP)
- __ieee80211_vif_copy_chanctx_to_vlans(sdata, false);
+ /*
+ * This context might have been a dependency for an already
+ * ready re-assign reservation interface that was deferred. Do
+ * not propagate error to the caller though. The in-place
+ * reservation for originally requested interface has already
+ * succeeded at this point.
+ */
+ list_for_each_entry_safe(sdata, sdata_tmp, &ctx->reserved_vifs,
+ reserved_chanctx_list) {
+ if (WARN_ON(ieee80211_vif_has_in_place_reservation(
+ sdata)))
+ continue;
+
+ if (WARN_ON(sdata->reserved_chanctx != ctx))
+ continue;
+
+ if (!sdata->reserved_ready)
+ continue;
+
+ if (ieee80211_vif_get_chanctx(sdata))
+ err = ieee80211_vif_use_reserved_reassign(
+ sdata);
+ else
+ err = ieee80211_vif_use_reserved_assign(sdata);
+
+ if (err) {
+ sdata_info(sdata,
+ "failed to finalize (re-)assign reservation (err=%d)\n",
+ err);
+ ieee80211_vif_unreserve_chanctx(sdata);
+ cfg80211_stop_iface(local->hw.wiphy,
+ &sdata->wdev,
+ GFP_KERNEL);
+ }
+ }
}
- *changed = tmp_changed;
+ /*
+ * Finally free old contexts
+ */
+
+ list_for_each_entry_safe(ctx, ctx_tmp, &local->chanctx_list, list) {
+ if (ctx->replace_state != IEEE80211_CHANCTX_WILL_BE_REPLACED)
+ continue;
+
+ ctx->replace_ctx->replace_ctx = NULL;
+ ctx->replace_ctx->replace_state =
+ IEEE80211_CHANCTX_REPLACE_NONE;
+
+ list_del_rcu(&ctx->list);
+ kfree_rcu(ctx, rcu_head);
+ }
+
+ return 0;
+
+err:
+ list_for_each_entry(ctx, &local->chanctx_list, list) {
+ if (ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER)
+ continue;
+
+ list_for_each_entry_safe(sdata, sdata_tmp, &ctx->reserved_vifs,
+ reserved_chanctx_list) {
+ ieee80211_vif_unreserve_chanctx(sdata);
+ ieee80211_vif_chanctx_reservation_complete(sdata);
+ }
+ }
+
+ return err;
+}
+
+static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_chanctx_conf *conf;
+ struct ieee80211_chanctx *ctx;
+ bool use_reserved_switch = false;
+
+ lockdep_assert_held(&local->chanctx_mtx);
+
+ conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+ lockdep_is_held(&local->chanctx_mtx));
+ if (!conf)
+ return;
+
+ ctx = container_of(conf, struct ieee80211_chanctx, conf);
+
+ if (sdata->reserved_chanctx) {
+ if (sdata->reserved_chanctx->replace_state ==
+ IEEE80211_CHANCTX_REPLACES_OTHER &&
+ ieee80211_chanctx_num_reserved(local,
+ sdata->reserved_chanctx) > 1)
+ use_reserved_switch = true;
+
+ ieee80211_vif_unreserve_chanctx(sdata);
+ }
+
+ ieee80211_assign_vif_chanctx(sdata, NULL);
+ if (ieee80211_chanctx_refcount(local, ctx) == 0)
+ ieee80211_free_chanctx(local, ctx);
+
+ /* Unreserving may ready an in-place reservation. */
+ if (use_reserved_switch)
+ ieee80211_vif_use_reserved_switch(local);
+}
+
+int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata,
+ const struct cfg80211_chan_def *chandef,
+ enum ieee80211_chanctx_mode mode)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_chanctx *ctx;
+ u8 radar_detect_width = 0;
+ int ret;
+
+ lockdep_assert_held(&local->mtx);
+
+ WARN_ON(sdata->dev && netif_carrier_ok(sdata->dev));
+
+ mutex_lock(&local->chanctx_mtx);
+
+ ret = cfg80211_chandef_dfs_required(local->hw.wiphy,
+ chandef,
+ sdata->wdev.iftype);
+ if (ret < 0)
+ goto out;
+ if (ret > 0)
+ radar_detect_width = BIT(chandef->width);
+
+ sdata->radar_required = ret;
+
+ ret = ieee80211_check_combinations(sdata, chandef, mode,
+ radar_detect_width);
+ if (ret < 0)
+ goto out;
+
+ __ieee80211_vif_release_channel(sdata);
+
+ ctx = ieee80211_find_chanctx(local, chandef, mode);
+ if (!ctx)
+ ctx = ieee80211_new_chanctx(local, chandef, mode);
+ if (IS_ERR(ctx)) {
+ ret = PTR_ERR(ctx);
+ goto out;
+ }
+
+ sdata->vif.bss_conf.chandef = *chandef;
+
+ ret = ieee80211_assign_vif_chanctx(sdata, ctx);
+ if (ret) {
+ /* if assign fails refcount stays the same */
+ if (ieee80211_chanctx_refcount(local, ctx) == 0)
+ ieee80211_free_chanctx(local, ctx);
+ goto out;
+ }
- ieee80211_recalc_chanctx_chantype(local, ctx);
ieee80211_recalc_smps_chanctx(local, ctx);
ieee80211_recalc_radar_chanctx(local, ctx);
- ieee80211_recalc_chanctx_min_def(local, ctx);
-out:
+ out:
mutex_unlock(&local->chanctx_mtx);
return ret;
}
+int ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_chanctx *new_ctx;
+ struct ieee80211_chanctx *old_ctx;
+ int err;
+
+ lockdep_assert_held(&local->mtx);
+ lockdep_assert_held(&local->chanctx_mtx);
+
+ new_ctx = sdata->reserved_chanctx;
+ old_ctx = ieee80211_vif_get_chanctx(sdata);
+
+ if (WARN_ON(!new_ctx))
+ return -EINVAL;
+
+ if (WARN_ON(new_ctx->replace_state ==
+ IEEE80211_CHANCTX_WILL_BE_REPLACED))
+ return -EINVAL;
+
+ if (WARN_ON(sdata->reserved_ready))
+ return -EINVAL;
+
+ sdata->reserved_ready = true;
+
+ if (new_ctx->replace_state == IEEE80211_CHANCTX_REPLACE_NONE) {
+ if (old_ctx)
+ err = ieee80211_vif_use_reserved_reassign(sdata);
+ else
+ err = ieee80211_vif_use_reserved_assign(sdata);
+
+ if (err)
+ return err;
+ }
+
+ /*
+ * In-place reservation may need to be finalized now either if:
+ * a) sdata is taking part in the swapping itself and is the last one
+ * b) sdata has switched with a re-assign reservation to an existing
+ * context readying in-place switching of old_ctx
+ *
+ * In case of (b) do not propagate the error up because the requested
+ * sdata already switched successfully. Just spill an extra warning.
+ * The ieee80211_vif_use_reserved_switch() already stops all necessary
+ * interfaces upon failure.
+ */
+ if ((old_ctx &&
+ old_ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED) ||
+ new_ctx->replace_state == IEEE80211_CHANCTX_REPLACES_OTHER) {
+ err = ieee80211_vif_use_reserved_switch(local);
+ if (err && err != -EAGAIN) {
+ if (new_ctx->replace_state ==
+ IEEE80211_CHANCTX_REPLACES_OTHER)
+ return err;
+
+ wiphy_info(local->hw.wiphy,
+ "depending in-place reservation failed (err=%d)\n",
+ err);
+ }
+ }
+
+ return 0;
+}
+
int ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata,
const struct cfg80211_chan_def *chandef,
u32 *changed)
@@ -1043,6 +1592,7 @@ int ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx_conf *conf;
struct ieee80211_chanctx *ctx;
+ const struct cfg80211_chan_def *compat;
int ret;
if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef,
@@ -1069,11 +1619,33 @@ int ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata,
}
ctx = container_of(conf, struct ieee80211_chanctx, conf);
- if (!cfg80211_chandef_compatible(&conf->def, chandef)) {
+
+ compat = cfg80211_chandef_compatible(&conf->def, chandef);
+ if (!compat) {
ret = -EINVAL;
goto out;
}
+ switch (ctx->replace_state) {
+ case IEEE80211_CHANCTX_REPLACE_NONE:
+ if (!ieee80211_chanctx_reserved_chandef(local, ctx, compat)) {
+ ret = -EBUSY;
+ goto out;
+ }
+ break;
+ case IEEE80211_CHANCTX_WILL_BE_REPLACED:
+ /* TODO: Perhaps the bandwith change could be treated as a
+ * reservation itself? */
+ ret = -EBUSY;
+ goto out;
+ case IEEE80211_CHANCTX_REPLACES_OTHER:
+ /* channel context that is going to replace another channel
+ * context doesn't really exist and shouldn't be assigned
+ * anywhere yet */
+ WARN_ON(1);
+ break;
+ }
+
sdata->vif.bss_conf.chandef = *chandef;
ieee80211_recalc_chanctx_chantype(local, ctx);
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 0e963bc1ceac..54a189f0393e 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -3,6 +3,7 @@
* mac80211 debugfs for wireless PHYs
*
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
*
* GPLv2
*
@@ -302,11 +303,6 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf,
sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_DYNAMIC_PS\n");
if (local->hw.flags & IEEE80211_HW_MFP_CAPABLE)
sf += scnprintf(buf + sf, mxln - sf, "MFP_CAPABLE\n");
- if (local->hw.flags & IEEE80211_HW_SUPPORTS_STATIC_SMPS)
- sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_STATIC_SMPS\n");
- if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS)
- sf += scnprintf(buf + sf, mxln - sf,
- "SUPPORTS_DYNAMIC_SMPS\n");
if (local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)
sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_UAPSD\n");
if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index e205ebabfa50..c68896adfa96 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -226,12 +226,12 @@ static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
int err;
- if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_STATIC_SMPS) &&
+ if (!(local->hw.wiphy->features & NL80211_FEATURE_STATIC_SMPS) &&
smps_mode == IEEE80211_SMPS_STATIC)
return -EINVAL;
/* auto should be dynamic if in PS mode */
- if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS) &&
+ if (!(local->hw.wiphy->features & NL80211_FEATURE_DYNAMIC_SMPS) &&
(smps_mode == IEEE80211_SMPS_DYNAMIC ||
smps_mode == IEEE80211_SMPS_AUTOMATIC))
return -EINVAL;
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 2ecb4deddb5d..bafe48916229 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -2,6 +2,7 @@
* Copyright 2003-2005 Devicescape Software, Inc.
* Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -77,7 +78,8 @@ static ssize_t sta_flags_read(struct file *file, char __user *userbuf,
TEST(AUTH), TEST(ASSOC), TEST(PS_STA),
TEST(PS_DRIVER), TEST(AUTHORIZED),
TEST(SHORT_PREAMBLE),
- TEST(WME), TEST(WDS), TEST(CLEAR_PS_FILT),
+ sta->sta.wme ? "WME\n" : "",
+ TEST(WDS), TEST(CLEAR_PS_FILT),
TEST(MFP), TEST(BLOCK_BA), TEST(PSPOLL),
TEST(UAPSD), TEST(SP), TEST(TDLS_PEER),
TEST(TDLS_PEER_AUTH), TEST(4ADDR_EVENT),
@@ -124,7 +126,7 @@ static ssize_t sta_connected_time_read(struct file *file, char __user *userbuf,
long connected_time_secs;
char buf[100];
int res;
- do_posix_clock_monotonic_gettime(&uptime);
+ ktime_get_ts(&uptime);
connected_time_secs = uptime.tv_sec - sta->last_connected;
time_to_tm(connected_time_secs, 0, &result);
result.tm_year -= 70;
@@ -167,7 +169,7 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
p += scnprintf(p, sizeof(buf) + buf - p, "next dialog_token: %#02x\n",
sta->ampdu_mlme.dialog_token_allocator + 1);
p += scnprintf(p, sizeof(buf) + buf - p,
- "TID\t\tRX active\tDTKN\tSSN\t\tTX\tDTKN\tpending\n");
+ "TID\t\tRX\tDTKN\tSSN\t\tTX\tDTKN\tpending\n");
for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[i]);
@@ -587,7 +589,6 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
DEBUGFS_ADD_COUNTER(tx_filtered, tx_filtered_count);
DEBUGFS_ADD_COUNTER(tx_retry_failed, tx_retry_failed);
DEBUGFS_ADD_COUNTER(tx_retry_count, tx_retry_count);
- DEBUGFS_ADD_COUNTER(wep_weak_iv_count, wep_weak_iv_count);
if (sizeof(sta->driver_buffered_tids) == sizeof(u32))
debugfs_create_x32("driver_buffered_tids", 0400,
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index bd782dcffcc7..196d48c68134 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -314,7 +314,7 @@ static inline void drv_update_tkip_key(struct ieee80211_local *local,
static inline int drv_hw_scan(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
- struct cfg80211_scan_request *req)
+ struct ieee80211_scan_request *req)
{
int ret;
@@ -346,7 +346,7 @@ static inline int
drv_sched_scan_start(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct cfg80211_sched_scan_request *req,
- struct ieee80211_sched_scan_ies *ies)
+ struct ieee80211_scan_ies *ies)
{
int ret;
@@ -450,7 +450,7 @@ static inline int drv_set_rts_threshold(struct ieee80211_local *local,
}
static inline int drv_set_coverage_class(struct ieee80211_local *local,
- u8 value)
+ s16 value)
{
int ret = 0;
might_sleep();
@@ -970,6 +970,22 @@ static inline void drv_mgd_prepare_tx(struct ieee80211_local *local,
trace_drv_return_void(local);
}
+static inline void
+drv_mgd_protect_tdls_discover(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata)
+{
+ might_sleep();
+
+ if (!check_sdata_in_driver(sdata))
+ return;
+ WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION);
+
+ trace_drv_mgd_protect_tdls_discover(local, sdata);
+ if (local->ops->mgd_protect_tdls_discover)
+ local->ops->mgd_protect_tdls_discover(&local->hw, &sdata->vif);
+ trace_drv_return_void(local);
+}
+
static inline int drv_add_chanctx(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx)
{
diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c
new file mode 100644
index 000000000000..ebfc8091557b
--- /dev/null
+++ b/net/mac80211/ethtool.c
@@ -0,0 +1,244 @@
+/*
+ * mac80211 ethtool hooks for cfg80211
+ *
+ * Copied from cfg.c - originally
+ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2014 Intel Corporation (Author: Johannes Berg)
+ *
+ * This file is GPLv2 as found in COPYING.
+ */
+#include <linux/types.h>
+#include <net/cfg80211.h>
+#include "ieee80211_i.h"
+#include "sta_info.h"
+#include "driver-ops.h"
+
+static int ieee80211_set_ringparam(struct net_device *dev,
+ struct ethtool_ringparam *rp)
+{
+ struct ieee80211_local *local = wiphy_priv(dev->ieee80211_ptr->wiphy);
+
+ if (rp->rx_mini_pending != 0 || rp->rx_jumbo_pending != 0)
+ return -EINVAL;
+
+ return drv_set_ringparam(local, rp->tx_pending, rp->rx_pending);
+}
+
+static void ieee80211_get_ringparam(struct net_device *dev,
+ struct ethtool_ringparam *rp)
+{
+ struct ieee80211_local *local = wiphy_priv(dev->ieee80211_ptr->wiphy);
+
+ memset(rp, 0, sizeof(*rp));
+
+ drv_get_ringparam(local, &rp->tx_pending, &rp->tx_max_pending,
+ &rp->rx_pending, &rp->rx_max_pending);
+}
+
+static const char ieee80211_gstrings_sta_stats[][ETH_GSTRING_LEN] = {
+ "rx_packets", "rx_bytes",
+ "rx_duplicates", "rx_fragments", "rx_dropped",
+ "tx_packets", "tx_bytes", "tx_fragments",
+ "tx_filtered", "tx_retry_failed", "tx_retries",
+ "beacon_loss", "sta_state", "txrate", "rxrate", "signal",
+ "channel", "noise", "ch_time", "ch_time_busy",
+ "ch_time_ext_busy", "ch_time_rx", "ch_time_tx"
+};
+#define STA_STATS_LEN ARRAY_SIZE(ieee80211_gstrings_sta_stats)
+
+static int ieee80211_get_sset_count(struct net_device *dev, int sset)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ int rv = 0;
+
+ if (sset == ETH_SS_STATS)
+ rv += STA_STATS_LEN;
+
+ rv += drv_get_et_sset_count(sdata, sset);
+
+ if (rv == 0)
+ return -EOPNOTSUPP;
+ return rv;
+}
+
+static void ieee80211_get_stats(struct net_device *dev,
+ struct ethtool_stats *stats,
+ u64 *data)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_chanctx_conf *chanctx_conf;
+ struct ieee80211_channel *channel;
+ struct sta_info *sta;
+ struct ieee80211_local *local = sdata->local;
+ struct station_info sinfo;
+ struct survey_info survey;
+ int i, q;
+#define STA_STATS_SURVEY_LEN 7
+
+ memset(data, 0, sizeof(u64) * STA_STATS_LEN);
+
+#define ADD_STA_STATS(sta) \
+ do { \
+ data[i++] += sta->rx_packets; \
+ data[i++] += sta->rx_bytes; \
+ data[i++] += sta->num_duplicates; \
+ data[i++] += sta->rx_fragments; \
+ data[i++] += sta->rx_dropped; \
+ \
+ data[i++] += sinfo.tx_packets; \
+ data[i++] += sinfo.tx_bytes; \
+ data[i++] += sta->tx_fragments; \
+ data[i++] += sta->tx_filtered_count; \
+ data[i++] += sta->tx_retry_failed; \
+ data[i++] += sta->tx_retry_count; \
+ data[i++] += sta->beacon_loss_count; \
+ } while (0)
+
+ /* For Managed stations, find the single station based on BSSID
+ * and use that. For interface types, iterate through all available
+ * stations and add stats for any station that is assigned to this
+ * network device.
+ */
+
+ mutex_lock(&local->sta_mtx);
+
+ if (sdata->vif.type == NL80211_IFTYPE_STATION) {
+ sta = sta_info_get_bss(sdata, sdata->u.mgd.bssid);
+
+ if (!(sta && !WARN_ON(sta->sdata->dev != dev)))
+ goto do_survey;
+
+ sinfo.filled = 0;
+ sta_set_sinfo(sta, &sinfo);
+
+ i = 0;
+ ADD_STA_STATS(sta);
+
+ data[i++] = sta->sta_state;
+
+
+ if (sinfo.filled & STATION_INFO_TX_BITRATE)
+ data[i] = 100000 *
+ cfg80211_calculate_bitrate(&sinfo.txrate);
+ i++;
+ if (sinfo.filled & STATION_INFO_RX_BITRATE)
+ data[i] = 100000 *
+ cfg80211_calculate_bitrate(&sinfo.rxrate);
+ i++;
+
+ if (sinfo.filled & STATION_INFO_SIGNAL_AVG)
+ data[i] = (u8)sinfo.signal_avg;
+ i++;
+ } else {
+ list_for_each_entry(sta, &local->sta_list, list) {
+ /* Make sure this station belongs to the proper dev */
+ if (sta->sdata->dev != dev)
+ continue;
+
+ sinfo.filled = 0;
+ sta_set_sinfo(sta, &sinfo);
+ i = 0;
+ ADD_STA_STATS(sta);
+ }
+ }
+
+do_survey:
+ i = STA_STATS_LEN - STA_STATS_SURVEY_LEN;
+ /* Get survey stats for current channel */
+ survey.filled = 0;
+
+ rcu_read_lock();
+ chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ if (chanctx_conf)
+ channel = chanctx_conf->def.chan;
+ else
+ channel = NULL;
+ rcu_read_unlock();
+
+ if (channel) {
+ q = 0;
+ do {
+ survey.filled = 0;
+ if (drv_get_survey(local, q, &survey) != 0) {
+ survey.filled = 0;
+ break;
+ }
+ q++;
+ } while (channel != survey.channel);
+ }
+
+ if (survey.filled)
+ data[i++] = survey.channel->center_freq;
+ else
+ data[i++] = 0;
+ if (survey.filled & SURVEY_INFO_NOISE_DBM)
+ data[i++] = (u8)survey.noise;
+ else
+ data[i++] = -1LL;
+ if (survey.filled & SURVEY_INFO_CHANNEL_TIME)
+ data[i++] = survey.channel_time;
+ else
+ data[i++] = -1LL;
+ if (survey.filled & SURVEY_INFO_CHANNEL_TIME_BUSY)
+ data[i++] = survey.channel_time_busy;
+ else
+ data[i++] = -1LL;
+ if (survey.filled & SURVEY_INFO_CHANNEL_TIME_EXT_BUSY)
+ data[i++] = survey.channel_time_ext_busy;
+ else
+ data[i++] = -1LL;
+ if (survey.filled & SURVEY_INFO_CHANNEL_TIME_RX)
+ data[i++] = survey.channel_time_rx;
+ else
+ data[i++] = -1LL;
+ if (survey.filled & SURVEY_INFO_CHANNEL_TIME_TX)
+ data[i++] = survey.channel_time_tx;
+ else
+ data[i++] = -1LL;
+
+ mutex_unlock(&local->sta_mtx);
+
+ if (WARN_ON(i != STA_STATS_LEN))
+ return;
+
+ drv_get_et_stats(sdata, stats, &(data[STA_STATS_LEN]));
+}
+
+static void ieee80211_get_strings(struct net_device *dev, u32 sset, u8 *data)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ int sz_sta_stats = 0;
+
+ if (sset == ETH_SS_STATS) {
+ sz_sta_stats = sizeof(ieee80211_gstrings_sta_stats);
+ memcpy(data, ieee80211_gstrings_sta_stats, sz_sta_stats);
+ }
+ drv_get_et_strings(sdata, sset, &(data[sz_sta_stats]));
+}
+
+static int ieee80211_get_regs_len(struct net_device *dev)
+{
+ return 0;
+}
+
+static void ieee80211_get_regs(struct net_device *dev,
+ struct ethtool_regs *regs,
+ void *data)
+{
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+ regs->version = wdev->wiphy->hw_version;
+ regs->len = 0;
+}
+
+const struct ethtool_ops ieee80211_ethtool_ops = {
+ .get_drvinfo = cfg80211_get_drvinfo,
+ .get_regs_len = ieee80211_get_regs_len,
+ .get_regs = ieee80211_get_regs,
+ .get_link = ethtool_op_get_link,
+ .get_ringparam = ieee80211_get_ringparam,
+ .set_ringparam = ieee80211_set_ringparam,
+ .get_strings = ieee80211_get_strings,
+ .get_ethtool_stats = ieee80211_get_stats,
+ .get_sset_count = ieee80211_get_sset_count,
+};
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 15702ff64a4c..ff630be2ca75 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -150,13 +150,12 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
/*
* If user has specified capability over-rides, take care
- * of that if the station we're setting up is the AP that
+ * of that if the station we're setting up is the AP or TDLS peer that
* we advertised a restricted capability set to. Override
* our own capabilities and then use those below.
*/
- if ((sdata->vif.type == NL80211_IFTYPE_STATION ||
- sdata->vif.type == NL80211_IFTYPE_ADHOC) &&
- !test_sta_flag(sta, WLAN_STA_TDLS_PEER))
+ if (sdata->vif.type == NL80211_IFTYPE_STATION ||
+ sdata->vif.type == NL80211_IFTYPE_ADHOC)
ieee80211_apply_htcap_overrides(sdata, &own_cap);
/*
@@ -228,6 +227,9 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
if (own_cap.mcs.rx_mask[32/8] & ht_cap_ie->mcs.rx_mask[32/8] & 1)
ht_cap.mcs.rx_mask[32/8] |= 1;
+ /* set Rx highest rate */
+ ht_cap.mcs.rx_highest = ht_cap_ie->mcs.rx_highest;
+
apply:
changed = memcmp(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap));
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 18ee0a256b1e..56b53571c807 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -6,6 +6,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2009, Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -143,7 +144,7 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata,
*pos++ = csa_settings->block_tx ? 1 : 0;
*pos++ = ieee80211_frequency_to_channel(
csa_settings->chandef.chan->center_freq);
- sdata->csa_counter_offset_beacon[0] = (pos - presp->head);
+ presp->csa_counter_offsets[0] = (pos - presp->head);
*pos++ = csa_settings->count;
}
@@ -189,17 +190,8 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata,
chandef, 0);
}
- if (local->hw.queues >= IEEE80211_NUM_ACS) {
- *pos++ = WLAN_EID_VENDOR_SPECIFIC;
- *pos++ = 7; /* len */
- *pos++ = 0x00; /* Microsoft OUI 00:50:F2 */
- *pos++ = 0x50;
- *pos++ = 0xf2;
- *pos++ = 2; /* WME */
- *pos++ = 0; /* WME info */
- *pos++ = 1; /* WME ver */
- *pos++ = 0; /* U-APSD no in use */
- }
+ if (local->hw.queues >= IEEE80211_NUM_ACS)
+ pos = ieee80211_add_wmm_info_ie(pos, 0); /* U-APSD not in use */
presp->head_len = pos - presp->head;
if (WARN_ON(presp->head_len > frame_len))
@@ -1047,7 +1039,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
}
if (sta && elems->wmm_info)
- set_sta_flag(sta, WLAN_STA_WME);
+ sta->sta.wme = true;
if (sta && elems->ht_operation && elems->ht_cap_elem &&
sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT &&
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index ac9836e0aab3..c2aaec4dfcf0 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -3,6 +3,7 @@
* Copyright 2005, Devicescape Software, Inc.
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -229,16 +230,29 @@ struct ieee80211_rx_data {
u16 tkip_iv16;
};
+struct ieee80211_csa_settings {
+ const u16 *counter_offsets_beacon;
+ const u16 *counter_offsets_presp;
+
+ int n_counter_offsets_beacon;
+ int n_counter_offsets_presp;
+
+ u8 count;
+};
+
struct beacon_data {
u8 *head, *tail;
int head_len, tail_len;
struct ieee80211_meshconf_ie *meshconf;
+ u16 csa_counter_offsets[IEEE80211_MAX_CSA_COUNTERS_NUM];
+ u8 csa_current_counter;
struct rcu_head rcu_head;
};
struct probe_resp {
struct rcu_head rcu_head;
int len;
+ u16 csa_counter_offsets[IEEE80211_MAX_CSA_COUNTERS_NUM];
u8 data[0];
};
@@ -332,7 +346,6 @@ enum ieee80211_sta_flags {
IEEE80211_STA_CONNECTION_POLL = BIT(1),
IEEE80211_STA_CONTROL_PORT = BIT(2),
IEEE80211_STA_DISABLE_HT = BIT(4),
- IEEE80211_STA_CSA_RECEIVED = BIT(5),
IEEE80211_STA_MFP_ENABLED = BIT(6),
IEEE80211_STA_UAPSD_ENABLED = BIT(7),
IEEE80211_STA_NULLFUNC_ACKED = BIT(8),
@@ -342,6 +355,7 @@ enum ieee80211_sta_flags {
IEEE80211_STA_DISABLE_80P80MHZ = BIT(12),
IEEE80211_STA_DISABLE_160MHZ = BIT(13),
IEEE80211_STA_DISABLE_WMM = BIT(14),
+ IEEE80211_STA_ENABLE_RRM = BIT(15),
};
struct ieee80211_mgd_auth_data {
@@ -490,6 +504,9 @@ struct ieee80211_if_managed {
struct ieee80211_ht_cap ht_capa_mask; /* Valid parts of ht_capa */
struct ieee80211_vht_cap vht_capa; /* configured VHT overrides */
struct ieee80211_vht_cap vht_capa_mask; /* Valid parts of vht_capa */
+
+ u8 tdls_peer[ETH_ALEN] __aligned(2);
+ struct delayed_work tdls_peer_del_work;
};
struct ieee80211_if_ibss {
@@ -688,6 +705,24 @@ enum ieee80211_chanctx_mode {
IEEE80211_CHANCTX_EXCLUSIVE
};
+/**
+ * enum ieee80211_chanctx_replace_state - channel context replacement state
+ *
+ * This is used for channel context in-place reservations that require channel
+ * context switch/swap.
+ *
+ * @IEEE80211_CHANCTX_REPLACE_NONE: no replacement is taking place
+ * @IEEE80211_CHANCTX_WILL_BE_REPLACED: this channel context will be replaced
+ * by a (not yet registered) channel context pointed by %replace_ctx.
+ * @IEEE80211_CHANCTX_REPLACES_OTHER: this (not yet registered) channel context
+ * replaces an existing channel context pointed to by %replace_ctx.
+ */
+enum ieee80211_chanctx_replace_state {
+ IEEE80211_CHANCTX_REPLACE_NONE,
+ IEEE80211_CHANCTX_WILL_BE_REPLACED,
+ IEEE80211_CHANCTX_REPLACES_OTHER,
+};
+
struct ieee80211_chanctx {
struct list_head list;
struct rcu_head rcu_head;
@@ -695,6 +730,9 @@ struct ieee80211_chanctx {
struct list_head assigned_vifs;
struct list_head reserved_vifs;
+ enum ieee80211_chanctx_replace_state replace_state;
+ struct ieee80211_chanctx *replace_ctx;
+
enum ieee80211_chanctx_mode mode;
bool driver_present;
@@ -754,9 +792,6 @@ struct ieee80211_sub_if_data {
struct mac80211_qos_map __rcu *qos_map;
struct work_struct csa_finalize_work;
- u16 csa_counter_offset_beacon[IEEE80211_MAX_CSA_COUNTERS_NUM];
- u16 csa_counter_offset_presp[IEEE80211_MAX_CSA_COUNTERS_NUM];
- bool csa_radar_required;
bool csa_block_tx; /* write-protected by sdata_lock and local->mtx */
struct cfg80211_chan_def csa_chandef;
@@ -767,7 +802,7 @@ struct ieee80211_sub_if_data {
struct ieee80211_chanctx *reserved_chanctx;
struct cfg80211_chan_def reserved_chandef;
bool reserved_radar_required;
- u8 csa_current_counter;
+ bool reserved_ready;
/* used to reconfigure hardware SM PS */
struct work_struct recalc_smps;
@@ -892,10 +927,17 @@ ieee80211_vif_get_shift(struct ieee80211_vif *vif)
return shift;
}
+struct ieee80211_rx_agg {
+ u8 addr[ETH_ALEN];
+ u16 tid;
+};
+
enum sdata_queue_type {
IEEE80211_SDATA_QUEUE_TYPE_FRAME = 0,
IEEE80211_SDATA_QUEUE_AGG_START = 1,
IEEE80211_SDATA_QUEUE_AGG_STOP = 2,
+ IEEE80211_SDATA_QUEUE_RX_AGG_START = 3,
+ IEEE80211_SDATA_QUEUE_RX_AGG_STOP = 4,
};
enum {
@@ -912,6 +954,9 @@ enum queue_stop_reason {
IEEE80211_QUEUE_STOP_REASON_SKB_ADD,
IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL,
IEEE80211_QUEUE_STOP_REASON_FLUSH,
+ IEEE80211_QUEUE_STOP_REASON_TDLS_TEARDOWN,
+
+ IEEE80211_QUEUE_STOP_REASONS,
};
#ifdef CONFIG_MAC80211_LEDS
@@ -1008,6 +1053,7 @@ struct ieee80211_local {
struct workqueue_struct *workqueue;
unsigned long queue_stop_reasons[IEEE80211_MAX_QUEUES];
+ int q_stop_reasons[IEEE80211_MAX_QUEUES][IEEE80211_QUEUE_STOP_REASONS];
/* also used to protect ampdu_ac_queue and amdpu_ac_stop_refcnt */
spinlock_t queue_stop_reason_lock;
@@ -1135,7 +1181,8 @@ struct ieee80211_local {
unsigned long scanning;
struct cfg80211_ssid scan_ssid;
struct cfg80211_scan_request *int_scan_req;
- struct cfg80211_scan_request *scan_req, *hw_scan_req;
+ struct cfg80211_scan_request *scan_req;
+ struct ieee80211_scan_request *hw_scan_req;
struct cfg80211_chan_def scan_chandef;
enum ieee80211_band hw_scan_band;
int scan_channel_idx;
@@ -1322,6 +1369,7 @@ struct ieee802_11_elems {
const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie;
const u8 *country_elem;
const u8 *pwr_constr_elem;
+ const u8 *cisco_dtpc_elem;
const struct ieee80211_timeout_interval_ie *timeout_int;
const u8 *opmode_notif;
const struct ieee80211_sec_chan_offs_ie *sec_chan_offs;
@@ -1476,7 +1524,6 @@ void ieee80211_sw_roc_work(struct work_struct *work);
void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc);
/* channel switch handling */
-bool ieee80211_csa_needs_block_tx(struct ieee80211_local *local);
void ieee80211_csa_finalize_work(struct work_struct *work);
int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_csa_settings *params);
@@ -1540,6 +1587,10 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
u16 initiator, u16 reason, bool stop);
void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
u16 initiator, u16 reason, bool stop);
+void __ieee80211_start_rx_ba_session(struct sta_info *sta,
+ u8 dialog_token, u16 timeout,
+ u16 start_seq_num, u16 ba_policy, u16 tid,
+ u16 buf_size, bool tx, bool auto_seq);
void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
enum ieee80211_agg_stop_reason reason);
void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
@@ -1692,6 +1743,21 @@ static inline void ieee802_11_parse_elems(const u8 *start, size_t len,
ieee802_11_parse_elems_crc(start, len, action, elems, 0, 0);
}
+static inline bool ieee80211_rx_reorder_ready(struct sk_buff_head *frames)
+{
+ struct sk_buff *tail = skb_peek_tail(frames);
+ struct ieee80211_rx_status *status;
+
+ if (!tail)
+ return false;
+
+ status = IEEE80211_SKB_RXCB(tail);
+ if (status->flag & RX_FLAG_AMSDU_MORE)
+ return false;
+
+ return true;
+}
+
void ieee80211_dynamic_ps_enable_work(struct work_struct *work);
void ieee80211_dynamic_ps_disable_work(struct work_struct *work);
void ieee80211_dynamic_ps_timer(unsigned long data);
@@ -1705,14 +1771,24 @@ void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
unsigned long queues,
- enum queue_stop_reason reason);
+ enum queue_stop_reason reason,
+ bool refcounted);
+void ieee80211_stop_vif_queues(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ enum queue_stop_reason reason);
+void ieee80211_wake_vif_queues(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ enum queue_stop_reason reason);
void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
unsigned long queues,
- enum queue_stop_reason reason);
+ enum queue_stop_reason reason,
+ bool refcounted);
void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue,
- enum queue_stop_reason reason);
+ enum queue_stop_reason reason,
+ bool refcounted);
void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue,
- enum queue_stop_reason reason);
+ enum queue_stop_reason reason,
+ bool refcounted);
void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue);
void ieee80211_add_pending_skb(struct ieee80211_local *local,
struct sk_buff *skb);
@@ -1730,8 +1806,10 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
const u8 *bssid, u16 stype, u16 reason,
bool send_frame, u8 *frame_buf);
int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
- size_t buffer_len, const u8 *ie, size_t ie_len,
- enum ieee80211_band band, u32 rate_mask,
+ size_t buffer_len,
+ struct ieee80211_scan_ies *ie_desc,
+ const u8 *ie, size_t ie_len,
+ u8 bands_used, u32 *rate_masks,
struct cfg80211_chan_def *chandef);
struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
u8 *dst, u32 ratemask,
@@ -1774,6 +1852,7 @@ int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata,
int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, bool need_basic,
enum ieee80211_band band);
+u8 *ieee80211_add_wmm_info_ie(u8 *buf, u8 qosinfo);
/* channel management */
void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan,
@@ -1791,18 +1870,13 @@ ieee80211_vif_reserve_chanctx(struct ieee80211_sub_if_data *sdata,
enum ieee80211_chanctx_mode mode,
bool radar_required);
int __must_check
-ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata,
- u32 *changed);
+ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata);
int ieee80211_vif_unreserve_chanctx(struct ieee80211_sub_if_data *sdata);
int __must_check
ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata,
const struct cfg80211_chan_def *chandef,
u32 *changed);
-/* NOTE: only use ieee80211_vif_change_channel() for channel switch */
-int __must_check
-ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata,
- u32 *changed);
void ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata);
void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata);
void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata,
@@ -1842,10 +1916,13 @@ int ieee80211_max_num_channels(struct ieee80211_local *local);
int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
const u8 *peer, u8 action_code, u8 dialog_token,
u16 status_code, u32 peer_capability,
- const u8 *extra_ies, size_t extra_ies_len);
+ bool initiator, const u8 *extra_ies,
+ size_t extra_ies_len);
int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
const u8 *peer, enum nl80211_tdls_operation oper);
+void ieee80211_tdls_peer_del_work(struct work_struct *wk);
+extern const struct ethtool_ops ieee80211_ethtool_ops;
#ifdef CONFIG_MAC80211_NOINLINE
#define debug_noinline noinline
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 388b863e821c..af237223a8cd 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -5,6 +5,7 @@
* Copyright 2005-2006, Devicescape Software, Inc.
* Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
* Copyright 2008, Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -841,10 +842,11 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
sdata_lock(sdata);
mutex_lock(&local->mtx);
sdata->vif.csa_active = false;
- if (!ieee80211_csa_needs_block_tx(local))
- ieee80211_wake_queues_by_reason(&local->hw,
- IEEE80211_MAX_QUEUE_MAP,
- IEEE80211_QUEUE_STOP_REASON_CSA);
+ if (sdata->csa_block_tx) {
+ ieee80211_wake_vif_queues(local, sdata,
+ IEEE80211_QUEUE_STOP_REASON_CSA);
+ sdata->csa_block_tx = false;
+ }
mutex_unlock(&local->mtx);
sdata_unlock(sdata);
@@ -1139,6 +1141,7 @@ static void ieee80211_iface_work(struct work_struct *work)
struct sk_buff *skb;
struct sta_info *sta;
struct ieee80211_ra_tid *ra_tid;
+ struct ieee80211_rx_agg *rx_agg;
if (!ieee80211_sdata_running(sdata))
return;
@@ -1166,6 +1169,26 @@ static void ieee80211_iface_work(struct work_struct *work)
ra_tid = (void *)&skb->cb;
ieee80211_stop_tx_ba_cb(&sdata->vif, ra_tid->ra,
ra_tid->tid);
+ } else if (skb->pkt_type == IEEE80211_SDATA_QUEUE_RX_AGG_START) {
+ rx_agg = (void *)&skb->cb;
+ mutex_lock(&local->sta_mtx);
+ sta = sta_info_get_bss(sdata, rx_agg->addr);
+ if (sta)
+ __ieee80211_start_rx_ba_session(sta,
+ 0, 0, 0, 1, rx_agg->tid,
+ IEEE80211_MAX_AMPDU_BUF,
+ false, true);
+ mutex_unlock(&local->sta_mtx);
+ } else if (skb->pkt_type == IEEE80211_SDATA_QUEUE_RX_AGG_STOP) {
+ rx_agg = (void *)&skb->cb;
+ mutex_lock(&local->sta_mtx);
+ sta = sta_info_get_bss(sdata, rx_agg->addr);
+ if (sta)
+ __ieee80211_stop_rx_ba_session(sta,
+ rx_agg->tid,
+ WLAN_BACK_RECIPIENT, 0,
+ false);
+ mutex_unlock(&local->sta_mtx);
} else if (ieee80211_is_action(mgmt->frame_control) &&
mgmt->u.action.category == WLAN_CATEGORY_BACK) {
int len = skb->len;
@@ -1623,9 +1646,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
if (local->hw.queues >= IEEE80211_NUM_ACS)
txqs = IEEE80211_NUM_ACS;
- ndev = alloc_netdev_mqs(sizeof(*sdata) +
- local->hw.vif_data_size,
- name, ieee80211_if_setup, txqs, 1);
+ ndev = alloc_netdev_mqs(sizeof(*sdata) + local->hw.vif_data_size,
+ name, NET_NAME_UNKNOWN,
+ ieee80211_if_setup, txqs, 1);
if (!ndev)
return -ENOMEM;
dev_net_set(ndev, wiphy_net(local->hw.wiphy));
@@ -1705,6 +1728,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
ndev->features |= local->hw.netdev_features;
+ netdev_set_default_ethtool_ops(ndev, &ieee80211_ethtool_ops);
+
ret = register_netdevice(ndev);
if (ret) {
free_netdev(ndev);
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 16d97f044a20..4712150dc210 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -3,6 +3,7 @@
* Copyright 2005-2006, Devicescape Software, Inc.
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007-2008 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -130,9 +131,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
if (!ret) {
key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE;
- if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) ||
- (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) ||
- (key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)))
+ if (!(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC))
sdata->crypto_tx_tailroom_needed_cnt--;
WARN_ON((key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) &&
@@ -180,9 +179,7 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
sta = key->sta;
sdata = key->sdata;
- if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) ||
- (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) ||
- (key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)))
+ if (!(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC))
increment_tailroom_need_count(sdata);
ret = drv_set_key(key->local, DISABLE_KEY, sdata,
@@ -425,7 +422,7 @@ static void ieee80211_key_free_common(struct ieee80211_key *key)
ieee80211_aes_key_free(key->u.ccmp.tfm);
if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC)
ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
- kfree(key);
+ kzfree(key);
}
static void __ieee80211_key_destroy(struct ieee80211_key *key,
@@ -482,9 +479,6 @@ int ieee80211_key_link(struct ieee80211_key *key,
int idx, ret;
bool pairwise;
- if (WARN_ON(!sdata || !key))
- return -EINVAL;
-
pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE;
idx = key->conf.keyidx;
key->local = sdata->local;
@@ -881,9 +875,7 @@ void ieee80211_remove_key(struct ieee80211_key_conf *keyconf)
if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) {
key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
- if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) ||
- (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) ||
- (key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)))
+ if (!(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC))
increment_tailroom_need_count(key->sdata);
}
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index d17c26d6e369..0de7c93bf62b 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -2,6 +2,7 @@
* Copyright 2002-2005, Instant802 Networks, Inc.
* Copyright 2005-2006, Devicescape Software, Inc.
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -272,7 +273,8 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw)
/* use this reason, ieee80211_reconfig will unblock it */
ieee80211_stop_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
- IEEE80211_QUEUE_STOP_REASON_SUSPEND);
+ IEEE80211_QUEUE_STOP_REASON_SUSPEND,
+ false);
/*
* Stop all Rx during the reconfig. We don't want state changes
@@ -1187,18 +1189,12 @@ static int __init ieee80211_init(void)
if (ret)
goto err_minstrel;
- ret = rc80211_pid_init();
- if (ret)
- goto err_pid;
-
ret = ieee80211_iface_init();
if (ret)
goto err_netdev;
return 0;
err_netdev:
- rc80211_pid_exit();
- err_pid:
rc80211_minstrel_ht_exit();
err_minstrel:
rc80211_minstrel_exit();
@@ -1208,7 +1204,6 @@ static int __init ieee80211_init(void)
static void __exit ieee80211_exit(void)
{
- rc80211_pid_exit();
rc80211_minstrel_ht_exit();
rc80211_minstrel_exit();
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 6495a3f0428d..e9f99c1e3fad 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -679,7 +679,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
*pos++ = 0x0;
*pos++ = ieee80211_frequency_to_channel(
csa->settings.chandef.chan->center_freq);
- sdata->csa_counter_offset_beacon[0] = hdr_len + 6;
+ bcn->csa_counter_offsets[0] = hdr_len + 6;
*pos++ = csa->settings.count;
*pos++ = WLAN_EID_CHAN_SWITCH_PARAM;
*pos++ = 6;
@@ -1122,7 +1122,7 @@ static int mesh_fwd_csa_frame(struct ieee80211_sub_if_data *sdata,
mgmt_fwd = (struct ieee80211_mgmt *) skb_put(skb, len);
/* offset_ttl is based on whether the secondary channel
- * offset is available or not. Substract 1 from the mesh TTL
+ * offset is available or not. Subtract 1 from the mesh TTL
* and disable the initiator flag before forwarding.
*/
offset_ttl = (len < 42) ? 7 : 10;
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 94758b9c9ed4..214e63b84e5c 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -157,7 +157,6 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
default:
kfree_skb(skb);
return -ENOTSUPP;
- break;
}
*pos++ = ie_len;
*pos++ = flags;
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index cf032a8db9d7..a6699dceae7c 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -729,7 +729,7 @@ void mesh_plink_broken(struct sta_info *sta)
tbl = rcu_dereference(mesh_paths);
for_each_mesh_entry(tbl, node, i) {
mpath = node->mpath;
- if (rcu_dereference(mpath->next_hop) == sta &&
+ if (rcu_access_pointer(mpath->next_hop) == sta &&
mpath->flags & MESH_PATH_ACTIVE &&
!(mpath->flags & MESH_PATH_FIXED)) {
spin_lock_bh(&mpath->state_lock);
@@ -794,7 +794,7 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta)
tbl = resize_dereference_mesh_paths();
for_each_mesh_entry(tbl, node, i) {
mpath = node->mpath;
- if (rcu_dereference(mpath->next_hop) == sta) {
+ if (rcu_access_pointer(mpath->next_hop) == sta) {
spin_lock(&tbl->hashwlock[i]);
__mesh_path_del(tbl, node);
spin_unlock(&tbl->hashwlock[i]);
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index e8f60aa2e848..b488e1859b18 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -431,14 +431,12 @@ __mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *hw_addr)
return NULL;
sta->plink_state = NL80211_PLINK_LISTEN;
+ sta->sta.wme = true;
sta_info_pre_move_state(sta, IEEE80211_STA_AUTH);
sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC);
sta_info_pre_move_state(sta, IEEE80211_STA_AUTHORIZED);
- set_sta_flag(sta, WLAN_STA_WME);
- sta->sta.wme = true;
-
return sta;
}
@@ -551,11 +549,30 @@ static void mesh_plink_timer(unsigned long data)
return;
spin_lock_bh(&sta->lock);
- if (sta->ignore_plink_timer) {
- sta->ignore_plink_timer = false;
+
+ /* If a timer fires just before a state transition on another CPU,
+ * we may have already extended the timeout and changed state by the
+ * time we've acquired the lock and arrived here. In that case,
+ * skip this timer and wait for the new one.
+ */
+ if (time_before(jiffies, sta->plink_timer.expires)) {
+ mpl_dbg(sta->sdata,
+ "Ignoring timer for %pM in state %s (timer adjusted)",
+ sta->sta.addr, mplstates[sta->plink_state]);
+ spin_unlock_bh(&sta->lock);
+ return;
+ }
+
+ /* del_timer() and handler may race when entering these states */
+ if (sta->plink_state == NL80211_PLINK_LISTEN ||
+ sta->plink_state == NL80211_PLINK_ESTAB) {
+ mpl_dbg(sta->sdata,
+ "Ignoring timer for %pM in state %s (timer deleted)",
+ sta->sta.addr, mplstates[sta->plink_state]);
spin_unlock_bh(&sta->lock);
return;
}
+
mpl_dbg(sta->sdata,
"Mesh plink timer for %pM fired on state %s\n",
sta->sta.addr, mplstates[sta->plink_state]);
@@ -773,9 +790,7 @@ static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata,
break;
case CNF_ACPT:
sta->plink_state = NL80211_PLINK_CNF_RCVD;
- if (!mod_plink_timer(sta,
- mshcfg->dot11MeshConfirmTimeout))
- sta->ignore_plink_timer = true;
+ mod_plink_timer(sta, mshcfg->dot11MeshConfirmTimeout);
break;
default:
break;
@@ -834,8 +849,7 @@ static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata,
case NL80211_PLINK_HOLDING:
switch (event) {
case CLS_ACPT:
- if (del_timer(&sta->plink_timer))
- sta->ignore_plink_timer = 1;
+ del_timer(&sta->plink_timer);
mesh_plink_fsm_restart(sta);
break;
case OPN_ACPT:
@@ -943,7 +957,8 @@ mesh_plink_get_event(struct ieee80211_sub_if_data *sdata,
if (!matches_local)
event = CNF_RJCT;
if (!mesh_plink_free_count(sdata) ||
- (sta->llid != llid || sta->plid != plid))
+ sta->llid != llid ||
+ (sta->plid && sta->plid != plid))
event = CNF_IGNR;
else
event = CNF_ACPT;
@@ -987,7 +1002,6 @@ mesh_process_plink_frame(struct ieee80211_sub_if_data *sdata,
enum ieee80211_self_protected_actioncode ftype;
u32 changed = 0;
u8 ie_len = elems->peering_len;
- __le16 _plid, _llid;
u16 plid, llid = 0;
if (!elems->peering) {
@@ -1022,13 +1036,10 @@ mesh_process_plink_frame(struct ieee80211_sub_if_data *sdata,
/* Note the lines below are correct, the llid in the frame is the plid
* from the point of view of this host.
*/
- memcpy(&_plid, PLINK_GET_LLID(elems->peering), sizeof(__le16));
- plid = le16_to_cpu(_plid);
+ plid = get_unaligned_le16(PLINK_GET_LLID(elems->peering));
if (ftype == WLAN_SP_MESH_PEERING_CONFIRM ||
- (ftype == WLAN_SP_MESH_PEERING_CLOSE && ie_len == 8)) {
- memcpy(&_llid, PLINK_GET_PLID(elems->peering), sizeof(__le16));
- llid = le16_to_cpu(_llid);
- }
+ (ftype == WLAN_SP_MESH_PEERING_CLOSE && ie_len == 8))
+ llid = get_unaligned_le16(PLINK_GET_PLID(elems->peering));
/* WARNING: Only for sta pointer, is dropped & re-acquired */
rcu_read_lock();
@@ -1064,6 +1075,10 @@ mesh_process_plink_frame(struct ieee80211_sub_if_data *sdata,
goto unlock_rcu;
}
+ /* 802.11-2012 13.3.7.2 - update plid on CNF if not set */
+ if (!sta->plid && event == CNF_ACPT)
+ sta->plid = plid;
+
changed |= mesh_plink_fsm(sdata, sta, event);
unlock_rcu:
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 3345401be1b3..2de88704278b 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -5,6 +5,7 @@
* Copyright 2005, Devicescape Software, Inc.
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -149,6 +150,7 @@ static u32
ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband,
struct ieee80211_channel *channel,
+ const struct ieee80211_ht_cap *ht_cap,
const struct ieee80211_ht_operation *ht_oper,
const struct ieee80211_vht_operation *vht_oper,
struct cfg80211_chan_def *chandef, bool tracking)
@@ -162,13 +164,19 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
chandef->center_freq1 = channel->center_freq;
chandef->center_freq2 = 0;
- if (!ht_oper || !sband->ht_cap.ht_supported) {
+ if (!ht_cap || !ht_oper || !sband->ht_cap.ht_supported) {
ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
goto out;
}
chandef->width = NL80211_CHAN_WIDTH_20;
+ if (!(ht_cap->cap_info &
+ cpu_to_le16(IEEE80211_HT_CAP_SUP_WIDTH_20_40))) {
+ ret = IEEE80211_STA_DISABLE_40MHZ;
+ goto out;
+ }
+
ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan,
channel->band);
/* check that channel matches the right operating channel */
@@ -328,6 +336,7 @@ out:
static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta,
+ const struct ieee80211_ht_cap *ht_cap,
const struct ieee80211_ht_operation *ht_oper,
const struct ieee80211_vht_operation *vht_oper,
const u8 *bssid, u32 *changed)
@@ -367,8 +376,9 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
sband = local->hw.wiphy->bands[chan->band];
/* calculate new channel (type) based on HT/VHT operation IEs */
- flags = ieee80211_determine_chantype(sdata, sband, chan, ht_oper,
- vht_oper, &chandef, true);
+ flags = ieee80211_determine_chantype(sdata, sband, chan,
+ ht_cap, ht_oper, vht_oper,
+ &chandef, true);
/*
* Downgrade the new channel if we associated with restricted
@@ -663,6 +673,9 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
(local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT))
capab |= WLAN_CAPABILITY_SPECTRUM_MGMT;
+ if (ifmgd->flags & IEEE80211_STA_ENABLE_RRM)
+ capab |= WLAN_CAPABILITY_RADIO_MEASURE;
+
mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
memset(mgmt, 0, 24);
memcpy(mgmt->da, assoc_data->bss->bssid, ETH_ALEN);
@@ -728,16 +741,17 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
}
}
- if (capab & WLAN_CAPABILITY_SPECTRUM_MGMT) {
- /* 1. power capabilities */
+ if (capab & WLAN_CAPABILITY_SPECTRUM_MGMT ||
+ capab & WLAN_CAPABILITY_RADIO_MEASURE) {
pos = skb_put(skb, 4);
*pos++ = WLAN_EID_PWR_CAPABILITY;
*pos++ = 2;
*pos++ = 0; /* min tx power */
/* max tx power */
*pos++ = ieee80211_chandef_max_power(&chanctx_conf->def);
+ }
- /* 2. supported channels */
+ if (capab & WLAN_CAPABILITY_SPECTRUM_MGMT) {
/* TODO: get this in reg domain format */
pos = skb_put(skb, 2 * sband->n_channels + 2);
*pos++ = WLAN_EID_SUPPORTED_CHANNELS;
@@ -830,16 +844,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
qos_info = 0;
}
- pos = skb_put(skb, 9);
- *pos++ = WLAN_EID_VENDOR_SPECIFIC;
- *pos++ = 7; /* len */
- *pos++ = 0x00; /* Microsoft OUI 00:50:F2 */
- *pos++ = 0x50;
- *pos++ = 0xf2;
- *pos++ = 2; /* WME */
- *pos++ = 0; /* WME info */
- *pos++ = 1; /* WME ver */
- *pos++ = qos_info;
+ pos = ieee80211_add_wmm_info_ie(skb_put(skb, 9), qos_info);
}
/* add any remaining custom (i.e. vendor specific here) IEs */
@@ -940,58 +945,77 @@ static void ieee80211_chswitch_work(struct work_struct *work)
container_of(work, struct ieee80211_sub_if_data, u.mgd.chswitch_work);
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- u32 changed = 0;
int ret;
if (!ieee80211_sdata_running(sdata))
return;
sdata_lock(sdata);
+ mutex_lock(&local->mtx);
+ mutex_lock(&local->chanctx_mtx);
+
if (!ifmgd->associated)
goto out;
- mutex_lock(&local->mtx);
- ret = ieee80211_vif_change_channel(sdata, &changed);
- mutex_unlock(&local->mtx);
- if (ret) {
+ if (!sdata->vif.csa_active)
+ goto out;
+
+ /*
+ * using reservation isn't immediate as it may be deferred until later
+ * with multi-vif. once reservation is complete it will re-schedule the
+ * work with no reserved_chanctx so verify chandef to check if it
+ * completed successfully
+ */
+
+ if (sdata->reserved_chanctx) {
+ /*
+ * with multi-vif csa driver may call ieee80211_csa_finish()
+ * many times while waiting for other interfaces to use their
+ * reservations
+ */
+ if (sdata->reserved_ready)
+ goto out;
+
+ ret = ieee80211_vif_use_reserved_context(sdata);
+ if (ret) {
+ sdata_info(sdata,
+ "failed to use reserved channel context, disconnecting (err=%d)\n",
+ ret);
+ ieee80211_queue_work(&sdata->local->hw,
+ &ifmgd->csa_connection_drop_work);
+ goto out;
+ }
+
+ goto out;
+ }
+
+ if (!cfg80211_chandef_identical(&sdata->vif.bss_conf.chandef,
+ &sdata->csa_chandef)) {
sdata_info(sdata,
- "vif channel switch failed, disconnecting\n");
+ "failed to finalize channel switch, disconnecting\n");
ieee80211_queue_work(&sdata->local->hw,
&ifmgd->csa_connection_drop_work);
goto out;
}
- if (!local->use_chanctx) {
- local->_oper_chandef = sdata->csa_chandef;
- /* Call "hw_config" only if doing sw channel switch.
- * Otherwise update the channel directly
- */
- if (!local->ops->channel_switch)
- ieee80211_hw_config(local, 0);
- else
- local->hw.conf.chandef = local->_oper_chandef;
- }
-
/* XXX: shouldn't really modify cfg80211-owned data! */
ifmgd->associated->channel = sdata->csa_chandef.chan;
- ieee80211_bss_info_change_notify(sdata, changed);
-
- mutex_lock(&local->mtx);
sdata->vif.csa_active = false;
- /* XXX: wait for a beacon first? */
- if (!ieee80211_csa_needs_block_tx(local))
- ieee80211_wake_queues_by_reason(&local->hw,
- IEEE80211_MAX_QUEUE_MAP,
- IEEE80211_QUEUE_STOP_REASON_CSA);
- mutex_unlock(&local->mtx);
- ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED;
+ /* XXX: wait for a beacon first? */
+ if (sdata->csa_block_tx) {
+ ieee80211_wake_vif_queues(local, sdata,
+ IEEE80211_QUEUE_STOP_REASON_CSA);
+ sdata->csa_block_tx = false;
+ }
ieee80211_sta_reset_beacon_monitor(sdata);
ieee80211_sta_reset_conn_monitor(sdata);
out:
+ mutex_unlock(&local->chanctx_mtx);
+ mutex_unlock(&local->mtx);
sdata_unlock(sdata);
}
@@ -1028,6 +1052,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct cfg80211_bss *cbss = ifmgd->associated;
+ struct ieee80211_chanctx_conf *conf;
struct ieee80211_chanctx *chanctx;
enum ieee80211_band current_band;
struct ieee80211_csa_ie csa_ie;
@@ -1042,7 +1067,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
return;
/* disregard subsequent announcements if we are already processing */
- if (ifmgd->flags & IEEE80211_STA_CSA_RECEIVED)
+ if (sdata->vif.csa_active)
return;
current_band = cbss->channel->band;
@@ -1069,9 +1094,22 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
return;
}
- ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED;
-
+ mutex_lock(&local->mtx);
mutex_lock(&local->chanctx_mtx);
+ conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+ lockdep_is_held(&local->chanctx_mtx));
+ if (!conf) {
+ sdata_info(sdata,
+ "no channel context assigned to vif?, disconnecting\n");
+ ieee80211_queue_work(&local->hw,
+ &ifmgd->csa_connection_drop_work);
+ mutex_unlock(&local->chanctx_mtx);
+ mutex_unlock(&local->mtx);
+ return;
+ }
+
+ chanctx = container_of(conf, struct ieee80211_chanctx, conf);
+
if (local->use_chanctx) {
u32 num_chanctx = 0;
list_for_each_entry(chanctx, &local->chanctx_list, list)
@@ -1084,38 +1122,32 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
ieee80211_queue_work(&local->hw,
&ifmgd->csa_connection_drop_work);
mutex_unlock(&local->chanctx_mtx);
+ mutex_unlock(&local->mtx);
return;
}
}
- if (WARN_ON(!rcu_access_pointer(sdata->vif.chanctx_conf))) {
- ieee80211_queue_work(&local->hw,
- &ifmgd->csa_connection_drop_work);
- mutex_unlock(&local->chanctx_mtx);
- return;
- }
- chanctx = container_of(rcu_access_pointer(sdata->vif.chanctx_conf),
- struct ieee80211_chanctx, conf);
- if (ieee80211_chanctx_refcount(local, chanctx) > 1) {
+ res = ieee80211_vif_reserve_chanctx(sdata, &csa_ie.chandef,
+ chanctx->mode, false);
+ if (res) {
sdata_info(sdata,
- "channel switch with multiple interfaces on the same channel, disconnecting\n");
+ "failed to reserve channel context for channel switch, disconnecting (err=%d)\n",
+ res);
ieee80211_queue_work(&local->hw,
&ifmgd->csa_connection_drop_work);
mutex_unlock(&local->chanctx_mtx);
+ mutex_unlock(&local->mtx);
return;
}
mutex_unlock(&local->chanctx_mtx);
- sdata->csa_chandef = csa_ie.chandef;
-
- mutex_lock(&local->mtx);
sdata->vif.csa_active = true;
+ sdata->csa_chandef = csa_ie.chandef;
sdata->csa_block_tx = csa_ie.mode;
if (sdata->csa_block_tx)
- ieee80211_stop_queues_by_reason(&local->hw,
- IEEE80211_MAX_QUEUE_MAP,
- IEEE80211_QUEUE_STOP_REASON_CSA);
+ ieee80211_stop_vif_queues(local, sdata,
+ IEEE80211_QUEUE_STOP_REASON_CSA);
mutex_unlock(&local->mtx);
if (local->ops->channel_switch) {
@@ -1139,19 +1171,21 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
TU_TO_EXP_TIME(csa_ie.count * cbss->beacon_interval));
}
-static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
- struct ieee80211_channel *channel,
- const u8 *country_ie, u8 country_ie_len,
- const u8 *pwr_constr_elem)
+static bool
+ieee80211_find_80211h_pwr_constr(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_channel *channel,
+ const u8 *country_ie, u8 country_ie_len,
+ const u8 *pwr_constr_elem,
+ int *chan_pwr, int *pwr_reduction)
{
struct ieee80211_country_ie_triplet *triplet;
int chan = ieee80211_frequency_to_channel(channel->center_freq);
- int i, chan_pwr, chan_increment, new_ap_level;
+ int i, chan_increment;
bool have_chan_pwr = false;
/* Invalid IE */
if (country_ie_len % 2 || country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN)
- return 0;
+ return false;
triplet = (void *)(country_ie + 3);
country_ie_len -= 3;
@@ -1179,7 +1213,7 @@ static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
for (i = 0; i < triplet->chans.num_channels; i++) {
if (first_channel + i * chan_increment == chan) {
have_chan_pwr = true;
- chan_pwr = triplet->chans.max_power;
+ *chan_pwr = triplet->chans.max_power;
break;
}
}
@@ -1191,18 +1225,76 @@ static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
country_ie_len -= 3;
}
- if (!have_chan_pwr)
+ if (have_chan_pwr)
+ *pwr_reduction = *pwr_constr_elem;
+ return have_chan_pwr;
+}
+
+static void ieee80211_find_cisco_dtpc(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_channel *channel,
+ const u8 *cisco_dtpc_ie,
+ int *pwr_level)
+{
+ /* From practical testing, the first data byte of the DTPC element
+ * seems to contain the requested dBm level, and the CLI on Cisco
+ * APs clearly state the range is -127 to 127 dBm, which indicates
+ * a signed byte, although it seemingly never actually goes negative.
+ * The other byte seems to always be zero.
+ */
+ *pwr_level = (__s8)cisco_dtpc_ie[4];
+}
+
+static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_channel *channel,
+ struct ieee80211_mgmt *mgmt,
+ const u8 *country_ie, u8 country_ie_len,
+ const u8 *pwr_constr_ie,
+ const u8 *cisco_dtpc_ie)
+{
+ bool has_80211h_pwr = false, has_cisco_pwr = false;
+ int chan_pwr = 0, pwr_reduction_80211h = 0;
+ int pwr_level_cisco, pwr_level_80211h;
+ int new_ap_level;
+
+ if (country_ie && pwr_constr_ie &&
+ mgmt->u.probe_resp.capab_info &
+ cpu_to_le16(WLAN_CAPABILITY_SPECTRUM_MGMT)) {
+ has_80211h_pwr = ieee80211_find_80211h_pwr_constr(
+ sdata, channel, country_ie, country_ie_len,
+ pwr_constr_ie, &chan_pwr, &pwr_reduction_80211h);
+ pwr_level_80211h =
+ max_t(int, 0, chan_pwr - pwr_reduction_80211h);
+ }
+
+ if (cisco_dtpc_ie) {
+ ieee80211_find_cisco_dtpc(
+ sdata, channel, cisco_dtpc_ie, &pwr_level_cisco);
+ has_cisco_pwr = true;
+ }
+
+ if (!has_80211h_pwr && !has_cisco_pwr)
return 0;
- new_ap_level = max_t(int, 0, chan_pwr - *pwr_constr_elem);
+ /* If we have both 802.11h and Cisco DTPC, apply both limits
+ * by picking the smallest of the two power levels advertised.
+ */
+ if (has_80211h_pwr &&
+ (!has_cisco_pwr || pwr_level_80211h <= pwr_level_cisco)) {
+ sdata_info(sdata,
+ "Limiting TX power to %d (%d - %d) dBm as advertised by %pM\n",
+ pwr_level_80211h, chan_pwr, pwr_reduction_80211h,
+ sdata->u.mgd.bssid);
+ new_ap_level = pwr_level_80211h;
+ } else { /* has_cisco_pwr is always true here. */
+ sdata_info(sdata,
+ "Limiting TX power to %d dBm as advertised by %pM\n",
+ pwr_level_cisco, sdata->u.mgd.bssid);
+ new_ap_level = pwr_level_cisco;
+ }
if (sdata->ap_power_level == new_ap_level)
return 0;
- sdata_info(sdata,
- "Limiting TX power to %d (%d - %d) dBm as advertised by %pM\n",
- new_ap_level, chan_pwr, *pwr_constr_elem,
- sdata->u.mgd.bssid);
sdata->ap_power_level = new_ap_level;
if (__ieee80211_recalc_txpower(sdata))
return BSS_CHANGED_TXPOWER;
@@ -1385,7 +1477,8 @@ void ieee80211_dynamic_ps_disable_work(struct work_struct *work)
ieee80211_wake_queues_by_reason(&local->hw,
IEEE80211_MAX_QUEUE_MAP,
- IEEE80211_QUEUE_STOP_REASON_PS);
+ IEEE80211_QUEUE_STOP_REASON_PS,
+ false);
}
void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
@@ -1830,10 +1923,11 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
ieee80211_vif_release_channel(sdata);
sdata->vif.csa_active = false;
- if (!ieee80211_csa_needs_block_tx(local))
- ieee80211_wake_queues_by_reason(&local->hw,
- IEEE80211_MAX_QUEUE_MAP,
- IEEE80211_QUEUE_STOP_REASON_CSA);
+ if (sdata->csa_block_tx) {
+ ieee80211_wake_vif_queues(local, sdata,
+ IEEE80211_QUEUE_STOP_REASON_CSA);
+ sdata->csa_block_tx = false;
+ }
mutex_unlock(&local->mtx);
sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM;
@@ -2075,14 +2169,13 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
true, frame_buf);
- ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED;
-
mutex_lock(&local->mtx);
sdata->vif.csa_active = false;
- if (!ieee80211_csa_needs_block_tx(local))
- ieee80211_wake_queues_by_reason(&local->hw,
- IEEE80211_MAX_QUEUE_MAP,
- IEEE80211_QUEUE_STOP_REASON_CSA);
+ if (sdata->csa_block_tx) {
+ ieee80211_wake_vif_queues(local, sdata,
+ IEEE80211_QUEUE_STOP_REASON_CSA);
+ sdata->csa_block_tx = false;
+ }
mutex_unlock(&local->mtx);
cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf,
@@ -2658,8 +2751,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
if (ifmgd->flags & IEEE80211_STA_MFP_ENABLED)
set_sta_flag(sta, WLAN_STA_MFP);
- if (elems.wmm_param)
- set_sta_flag(sta, WLAN_STA_WME);
+ sta->sta.wme = elems.wmm_param;
err = sta_info_move_state(sta, IEEE80211_STA_ASSOC);
if (!err && !(ifmgd->flags & IEEE80211_STA_CONTROL_PORT))
@@ -2725,6 +2817,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgd_assoc_data *assoc_data = ifmgd->assoc_data;
u16 capab_info, status_code, aid;
struct ieee802_11_elems elems;
+ int ac, uapsd_queues = -1;
u8 *pos;
bool reassoc;
struct cfg80211_bss *bss;
@@ -2794,9 +2887,15 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
* is set can cause the interface to go idle
*/
ieee80211_destroy_assoc_data(sdata, true);
+
+ /* get uapsd queues configuration */
+ uapsd_queues = 0;
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
+ if (sdata->tx_conf[ac].uapsd)
+ uapsd_queues |= BIT(ac);
}
- cfg80211_rx_assoc_resp(sdata->dev, bss, (u8 *)mgmt, len);
+ cfg80211_rx_assoc_resp(sdata->dev, bss, (u8 *)mgmt, len, uapsd_queues);
}
static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
@@ -2866,7 +2965,9 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
/*
* This is the canonical list of information elements we care about,
* the filter code also gives us all changes to the Microsoft OUI
- * (00:50:F2) vendor IE which is used for WMM which we need to track.
+ * (00:50:F2) vendor IE which is used for WMM which we need to track,
+ * as well as the DTPC IE (part of the Cisco OUI) used for signaling
+ * changes to requested client power.
*
* We implement beacon filtering in software since that means we can
* avoid processing the frame here and in cfg80211, and userspace
@@ -3155,7 +3256,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
mutex_lock(&local->sta_mtx);
sta = sta_info_get(sdata, bssid);
- if (ieee80211_config_bw(sdata, sta, elems.ht_operation,
+ if (ieee80211_config_bw(sdata, sta,
+ elems.ht_cap_elem, elems.ht_operation,
elems.vht_operation, bssid, &changed)) {
mutex_unlock(&local->sta_mtx);
ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
@@ -3171,13 +3273,11 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
rx_status->band, true);
mutex_unlock(&local->sta_mtx);
- if (elems.country_elem && elems.pwr_constr_elem &&
- mgmt->u.probe_resp.capab_info &
- cpu_to_le16(WLAN_CAPABILITY_SPECTRUM_MGMT))
- changed |= ieee80211_handle_pwr_constr(sdata, chan,
- elems.country_elem,
- elems.country_elem_len,
- elems.pwr_constr_elem);
+ changed |= ieee80211_handle_pwr_constr(sdata, chan, mgmt,
+ elems.country_elem,
+ elems.country_elem_len,
+ elems.pwr_constr_elem,
+ elems.cisco_dtpc_elem);
ieee80211_bss_info_change_notify(sdata, changed);
}
@@ -3688,6 +3788,8 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
INIT_WORK(&ifmgd->csa_connection_drop_work,
ieee80211_csa_connection_drop_work);
INIT_WORK(&ifmgd->request_smps_work, ieee80211_request_smps_mgd_work);
+ INIT_DELAYED_WORK(&ifmgd->tdls_peer_del_work,
+ ieee80211_tdls_peer_del_work);
setup_timer(&ifmgd->timer, ieee80211_sta_timer,
(unsigned long) sdata);
setup_timer(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer,
@@ -3703,7 +3805,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
ifmgd->uapsd_max_sp_len = sdata->local->hw.uapsd_max_sp_len;
ifmgd->p2p_noa_index = -1;
- if (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS)
+ if (sdata->local->hw.wiphy->features & NL80211_FEATURE_DYNAMIC_SMPS)
ifmgd->req_smps = IEEE80211_SMPS_AUTOMATIC;
else
ifmgd->req_smps = IEEE80211_SMPS_OFF;
@@ -3787,6 +3889,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+ const struct ieee80211_ht_cap *ht_cap = NULL;
const struct ieee80211_ht_operation *ht_oper = NULL;
const struct ieee80211_vht_operation *vht_oper = NULL;
struct ieee80211_supported_band *sband;
@@ -3803,14 +3906,17 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) &&
sband->ht_cap.ht_supported) {
- const u8 *ht_oper_ie, *ht_cap;
+ const u8 *ht_oper_ie, *ht_cap_ie;
ht_oper_ie = ieee80211_bss_get_ie(cbss, WLAN_EID_HT_OPERATION);
if (ht_oper_ie && ht_oper_ie[1] >= sizeof(*ht_oper))
ht_oper = (void *)(ht_oper_ie + 2);
- ht_cap = ieee80211_bss_get_ie(cbss, WLAN_EID_HT_CAPABILITY);
- if (!ht_cap || ht_cap[1] < sizeof(struct ieee80211_ht_cap)) {
+ ht_cap_ie = ieee80211_bss_get_ie(cbss, WLAN_EID_HT_CAPABILITY);
+ if (ht_cap_ie && ht_cap_ie[1] >= sizeof(*ht_cap))
+ ht_cap = (void *)(ht_cap_ie + 2);
+
+ if (!ht_cap) {
ifmgd->flags |= IEEE80211_STA_DISABLE_HT;
ht_oper = NULL;
}
@@ -3841,7 +3947,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
ifmgd->flags |= ieee80211_determine_chantype(sdata, sband,
cbss->channel,
- ht_oper, vht_oper,
+ ht_cap, ht_oper, vht_oper,
&chandef, false);
sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss),
@@ -4355,8 +4461,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
rcu_read_unlock();
if (bss->wmm_used && bss->uapsd_supported &&
- (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD) &&
- sdata->wmm_acm != 0xff) {
+ (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)) {
assoc_data->uapsd = true;
ifmgd->flags |= IEEE80211_STA_UAPSD_ENABLED;
} else {
@@ -4375,6 +4480,11 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
ifmgd->flags &= ~IEEE80211_STA_MFP_ENABLED;
}
+ if (req->flags & ASSOC_REQ_USE_RRM)
+ ifmgd->flags |= IEEE80211_STA_ENABLE_RRM;
+ else
+ ifmgd->flags &= ~IEEE80211_STA_ENABLE_RRM;
+
if (req->crypto.control_port)
ifmgd->flags |= IEEE80211_STA_CONTROL_PORT;
else
@@ -4551,6 +4661,7 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata)
cancel_work_sync(&ifmgd->request_smps_work);
cancel_work_sync(&ifmgd->csa_connection_drop_work);
cancel_work_sync(&ifmgd->chswitch_work);
+ cancel_delayed_work_sync(&ifmgd->tdls_peer_del_work);
sdata_lock(sdata);
if (ifmgd->assoc_data) {
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 7a17decd27f9..ff20b2ebdb30 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -119,7 +119,8 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local)
* before sending nullfunc to enable powersave at the AP.
*/
ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
- IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL);
+ IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL,
+ false);
ieee80211_flush_queues(local, NULL);
mutex_lock(&local->iflist_mtx);
@@ -182,7 +183,8 @@ void ieee80211_offchannel_return(struct ieee80211_local *local)
mutex_unlock(&local->iflist_mtx);
ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
- IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL);
+ IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL,
+ false);
}
void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc)
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index d478b880a0af..4c5192e0d66c 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -35,7 +35,8 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
ieee80211_stop_queues_by_reason(hw,
IEEE80211_MAX_QUEUE_MAP,
- IEEE80211_QUEUE_STOP_REASON_SUSPEND);
+ IEEE80211_QUEUE_STOP_REASON_SUSPEND,
+ false);
/* flush out all packets */
synchronize_net();
@@ -74,7 +75,8 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
}
ieee80211_wake_queues_by_reason(hw,
IEEE80211_MAX_QUEUE_MAP,
- IEEE80211_QUEUE_STOP_REASON_SUSPEND);
+ IEEE80211_QUEUE_STOP_REASON_SUSPEND,
+ false);
return err;
} else if (err > 0) {
WARN_ON(err != 1);
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index 9aa2a1190a86..18babe302832 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -143,19 +143,6 @@ void rate_control_deinitialize(struct ieee80211_local *local);
/* Rate control algorithms */
-#ifdef CONFIG_MAC80211_RC_PID
-int rc80211_pid_init(void);
-void rc80211_pid_exit(void);
-#else
-static inline int rc80211_pid_init(void)
-{
- return 0;
-}
-static inline void rc80211_pid_exit(void)
-{
-}
-#endif
-
#ifdef CONFIG_MAC80211_RC_MINSTREL
int rc80211_minstrel_init(void);
void rc80211_minstrel_exit(void);
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 1c1469c36dca..2baa7ed8789d 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -75,7 +75,7 @@ minstrel_sort_best_tp_rates(struct minstrel_sta_info *mi, int i, u8 *tp_list)
{
int j = MAX_THR_RATES;
- while (j > 0 && mi->r[i].cur_tp > mi->r[tp_list[j - 1]].cur_tp)
+ while (j > 0 && mi->r[i].stats.cur_tp > mi->r[tp_list[j - 1]].stats.cur_tp)
j--;
if (j < MAX_THR_RATES - 1)
memmove(&tp_list[j + 1], &tp_list[j], MAX_THR_RATES - (j + 1));
@@ -92,7 +92,7 @@ minstrel_set_rate(struct minstrel_sta_info *mi, struct ieee80211_sta_rates *rate
ratetbl->rate[offset].idx = r->rix;
ratetbl->rate[offset].count = r->adjusted_retry_count;
ratetbl->rate[offset].count_cts = r->retry_count_cts;
- ratetbl->rate[offset].count_rts = r->retry_count_rtscts;
+ ratetbl->rate[offset].count_rts = r->stats.retry_count_rtscts;
}
static void
@@ -140,44 +140,46 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
for (i = 0; i < mi->n_rates; i++) {
struct minstrel_rate *mr = &mi->r[i];
+ struct minstrel_rate_stats *mrs = &mi->r[i].stats;
usecs = mr->perfect_tx_time;
if (!usecs)
usecs = 1000000;
- if (unlikely(mr->attempts > 0)) {
- mr->sample_skipped = 0;
- mr->cur_prob = MINSTREL_FRAC(mr->success, mr->attempts);
- mr->succ_hist += mr->success;
- mr->att_hist += mr->attempts;
- mr->probability = minstrel_ewma(mr->probability,
- mr->cur_prob,
- EWMA_LEVEL);
+ if (unlikely(mrs->attempts > 0)) {
+ mrs->sample_skipped = 0;
+ mrs->cur_prob = MINSTREL_FRAC(mrs->success,
+ mrs->attempts);
+ mrs->succ_hist += mrs->success;
+ mrs->att_hist += mrs->attempts;
+ mrs->probability = minstrel_ewma(mrs->probability,
+ mrs->cur_prob,
+ EWMA_LEVEL);
} else
- mr->sample_skipped++;
+ mrs->sample_skipped++;
- mr->last_success = mr->success;
- mr->last_attempts = mr->attempts;
- mr->success = 0;
- mr->attempts = 0;
+ mrs->last_success = mrs->success;
+ mrs->last_attempts = mrs->attempts;
+ mrs->success = 0;
+ mrs->attempts = 0;
/* Update throughput per rate, reset thr. below 10% success */
- if (mr->probability < MINSTREL_FRAC(10, 100))
- mr->cur_tp = 0;
+ if (mrs->probability < MINSTREL_FRAC(10, 100))
+ mrs->cur_tp = 0;
else
- mr->cur_tp = mr->probability * (1000000 / usecs);
+ mrs->cur_tp = mrs->probability * (1000000 / usecs);
/* Sample less often below the 10% chance of success.
* Sample less often above the 95% chance of success. */
- if (mr->probability > MINSTREL_FRAC(95, 100) ||
- mr->probability < MINSTREL_FRAC(10, 100)) {
- mr->adjusted_retry_count = mr->retry_count >> 1;
+ if (mrs->probability > MINSTREL_FRAC(95, 100) ||
+ mrs->probability < MINSTREL_FRAC(10, 100)) {
+ mr->adjusted_retry_count = mrs->retry_count >> 1;
if (mr->adjusted_retry_count > 2)
mr->adjusted_retry_count = 2;
mr->sample_limit = 4;
} else {
mr->sample_limit = -1;
- mr->adjusted_retry_count = mr->retry_count;
+ mr->adjusted_retry_count = mrs->retry_count;
}
if (!mr->adjusted_retry_count)
mr->adjusted_retry_count = 2;
@@ -190,11 +192,11 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
* choose the maximum throughput rate as max_prob_rate
* (2) if all success probabilities < 95%, the rate with
* highest success probability is choosen as max_prob_rate */
- if (mr->probability >= MINSTREL_FRAC(95, 100)) {
- if (mr->cur_tp >= mi->r[tmp_prob_rate].cur_tp)
+ if (mrs->probability >= MINSTREL_FRAC(95, 100)) {
+ if (mrs->cur_tp >= mi->r[tmp_prob_rate].stats.cur_tp)
tmp_prob_rate = i;
} else {
- if (mr->probability >= mi->r[tmp_prob_rate].probability)
+ if (mrs->probability >= mi->r[tmp_prob_rate].stats.probability)
tmp_prob_rate = i;
}
}
@@ -240,14 +242,14 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband,
if (ndx < 0)
continue;
- mi->r[ndx].attempts += ar[i].count;
+ mi->r[ndx].stats.attempts += ar[i].count;
if ((i != IEEE80211_TX_MAX_RATES - 1) && (ar[i + 1].idx < 0))
- mi->r[ndx].success += success;
+ mi->r[ndx].stats.success += success;
}
if ((info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) && (i >= 0))
- mi->sample_count++;
+ mi->sample_packets++;
if (mi->sample_deferred > 0)
mi->sample_deferred--;
@@ -265,7 +267,7 @@ minstrel_get_retry_count(struct minstrel_rate *mr,
unsigned int retry = mr->adjusted_retry_count;
if (info->control.use_rts)
- retry = max(2U, min(mr->retry_count_rtscts, retry));
+ retry = max(2U, min(mr->stats.retry_count_rtscts, retry));
else if (info->control.use_cts_prot)
retry = max(2U, min(mr->retry_count_cts, retry));
return retry;
@@ -317,15 +319,15 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
sampling_ratio = mp->lookaround_rate;
/* increase sum packet counter */
- mi->packet_count++;
+ mi->total_packets++;
#ifdef CONFIG_MAC80211_DEBUGFS
if (mp->fixed_rate_idx != -1)
return;
#endif
- delta = (mi->packet_count * sampling_ratio / 100) -
- (mi->sample_count + mi->sample_deferred / 2);
+ delta = (mi->total_packets * sampling_ratio / 100) -
+ (mi->sample_packets + mi->sample_deferred / 2);
/* delta < 0: no sampling required */
prev_sample = mi->prev_sample;
@@ -333,10 +335,10 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
if (delta < 0 || (!mrr_capable && prev_sample))
return;
- if (mi->packet_count >= 10000) {
+ if (mi->total_packets >= 10000) {
mi->sample_deferred = 0;
- mi->sample_count = 0;
- mi->packet_count = 0;
+ mi->sample_packets = 0;
+ mi->total_packets = 0;
} else if (delta > mi->n_rates * 2) {
/* With multi-rate retry, not every planned sample
* attempt actually gets used, due to the way the retry
@@ -347,7 +349,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
* starts getting worse, minstrel would start bursting
* out lots of sampling frames, which would result
* in a large throughput loss. */
- mi->sample_count += (delta - mi->n_rates * 2);
+ mi->sample_packets += (delta - mi->n_rates * 2);
}
/* get next random rate sample */
@@ -361,7 +363,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
*/
if (mrr_capable &&
msr->perfect_tx_time > mr->perfect_tx_time &&
- msr->sample_skipped < 20) {
+ msr->stats.sample_skipped < 20) {
/* Only use IEEE80211_TX_CTL_RATE_CTRL_PROBE to mark
* packets that have the sampling rate deferred to the
* second MRR stage. Increase the sample counter only
@@ -375,7 +377,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
if (!msr->sample_limit != 0)
return;
- mi->sample_count++;
+ mi->sample_packets++;
if (msr->sample_limit > 0)
msr->sample_limit--;
}
@@ -384,7 +386,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
* has a probability of >95%, we shouldn't be attempting
* to use it, as this only wastes precious airtime */
if (!mrr_capable &&
- (mi->r[ndx].probability > MINSTREL_FRAC(95, 100)))
+ (mi->r[ndx].stats.probability > MINSTREL_FRAC(95, 100)))
return;
mi->prev_sample = true;
@@ -459,6 +461,7 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband,
for (i = 0; i < sband->n_bitrates; i++) {
struct minstrel_rate *mr = &mi->r[n];
+ struct minstrel_rate_stats *mrs = &mi->r[n].stats;
unsigned int tx_time = 0, tx_time_cts = 0, tx_time_rtscts = 0;
unsigned int tx_time_single;
unsigned int cw = mp->cw_min;
@@ -471,6 +474,7 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband,
n++;
memset(mr, 0, sizeof(*mr));
+ memset(mrs, 0, sizeof(*mrs));
mr->rix = i;
shift = ieee80211_chandef_get_shift(chandef);
@@ -482,9 +486,9 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband,
/* calculate maximum number of retransmissions before
* fallback (based on maximum segment size) */
mr->sample_limit = -1;
- mr->retry_count = 1;
+ mrs->retry_count = 1;
mr->retry_count_cts = 1;
- mr->retry_count_rtscts = 1;
+ mrs->retry_count_rtscts = 1;
tx_time = mr->perfect_tx_time + mi->sp_ack_dur;
do {
/* add one retransmission */
@@ -501,13 +505,13 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband,
(mr->retry_count_cts < mp->max_retry))
mr->retry_count_cts++;
if ((tx_time_rtscts < mp->segment_size) &&
- (mr->retry_count_rtscts < mp->max_retry))
- mr->retry_count_rtscts++;
+ (mrs->retry_count_rtscts < mp->max_retry))
+ mrs->retry_count_rtscts++;
} while ((tx_time < mp->segment_size) &&
- (++mr->retry_count < mp->max_retry));
- mr->adjusted_retry_count = mr->retry_count;
+ (++mr->stats.retry_count < mp->max_retry));
+ mr->adjusted_retry_count = mrs->retry_count;
if (!(sband->bitrates[i].flags & IEEE80211_RATE_ERP_G))
- mr->retry_count_cts = mr->retry_count;
+ mr->retry_count_cts = mrs->retry_count;
}
for (i = n; i < sband->n_bitrates; i++) {
@@ -665,7 +669,7 @@ static u32 minstrel_get_expected_throughput(void *priv_sta)
/* convert pkt per sec in kbps (1200 is the average pkt size used for
* computing cur_tp
*/
- return MINSTREL_TRUNC(mi->r[idx].cur_tp) * 1200 * 8 / 1024;
+ return MINSTREL_TRUNC(mi->r[idx].stats.cur_tp) * 1200 * 8 / 1024;
}
const struct rate_control_ops mac80211_minstrel = {
diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h
index 046d1bd598a8..97eca86a4af0 100644
--- a/net/mac80211/rc80211_minstrel.h
+++ b/net/mac80211/rc80211_minstrel.h
@@ -31,6 +31,27 @@ minstrel_ewma(int old, int new, int weight)
return (new * (EWMA_DIV - weight) + old * weight) / EWMA_DIV;
}
+struct minstrel_rate_stats {
+ /* current / last sampling period attempts/success counters */
+ unsigned int attempts, last_attempts;
+ unsigned int success, last_success;
+
+ /* total attempts/success counters */
+ u64 att_hist, succ_hist;
+
+ /* current throughput */
+ unsigned int cur_tp;
+
+ /* packet delivery probabilities */
+ unsigned int cur_prob, probability;
+
+ /* maximum retry counts */
+ unsigned int retry_count;
+ unsigned int retry_count_rtscts;
+
+ u8 sample_skipped;
+ bool retry_updated;
+};
struct minstrel_rate {
int bitrate;
@@ -40,26 +61,10 @@ struct minstrel_rate {
unsigned int ack_time;
int sample_limit;
- unsigned int retry_count;
unsigned int retry_count_cts;
- unsigned int retry_count_rtscts;
unsigned int adjusted_retry_count;
- u32 success;
- u32 attempts;
- u32 last_attempts;
- u32 last_success;
- u8 sample_skipped;
-
- /* parts per thousand */
- u32 cur_prob;
- u32 probability;
-
- /* per-rate throughput */
- u32 cur_tp;
-
- u64 succ_hist;
- u64 att_hist;
+ struct minstrel_rate_stats stats;
};
struct minstrel_sta_info {
@@ -73,8 +78,8 @@ struct minstrel_sta_info {
u8 max_tp_rate[MAX_THR_RATES];
u8 max_prob_rate;
- unsigned int packet_count;
- unsigned int sample_count;
+ unsigned int total_packets;
+ unsigned int sample_packets;
int sample_deferred;
unsigned int sample_row;
diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c
index fd0b9ca1570e..edde723f9f00 100644
--- a/net/mac80211/rc80211_minstrel_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_debugfs.c
@@ -72,6 +72,7 @@ minstrel_stats_open(struct inode *inode, struct file *file)
"this succ/attempt success attempts\n");
for (i = 0; i < mi->n_rates; i++) {
struct minstrel_rate *mr = &mi->r[i];
+ struct minstrel_rate_stats *mrs = &mi->r[i].stats;
*(p++) = (i == mi->max_tp_rate[0]) ? 'A' : ' ';
*(p++) = (i == mi->max_tp_rate[1]) ? 'B' : ' ';
@@ -81,24 +82,24 @@ minstrel_stats_open(struct inode *inode, struct file *file)
p += sprintf(p, "%3u%s", mr->bitrate / 2,
(mr->bitrate & 1 ? ".5" : " "));
- tp = MINSTREL_TRUNC(mr->cur_tp / 10);
- prob = MINSTREL_TRUNC(mr->cur_prob * 1000);
- eprob = MINSTREL_TRUNC(mr->probability * 1000);
+ tp = MINSTREL_TRUNC(mrs->cur_tp / 10);
+ prob = MINSTREL_TRUNC(mrs->cur_prob * 1000);
+ eprob = MINSTREL_TRUNC(mrs->probability * 1000);
p += sprintf(p, " %6u.%1u %6u.%1u %6u.%1u "
" %3u(%3u) %8llu %8llu\n",
tp / 10, tp % 10,
eprob / 10, eprob % 10,
prob / 10, prob % 10,
- mr->last_success,
- mr->last_attempts,
- (unsigned long long)mr->succ_hist,
- (unsigned long long)mr->att_hist);
+ mrs->last_success,
+ mrs->last_attempts,
+ (unsigned long long)mrs->succ_hist,
+ (unsigned long long)mrs->att_hist);
}
p += sprintf(p, "\nTotal packet count:: ideal %d "
"lookaround %d\n\n",
- mi->packet_count - mi->sample_count,
- mi->sample_count);
+ mi->total_packets - mi->sample_packets,
+ mi->sample_packets);
ms->len = p - ms->buf;
return 0;
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 85c1e74b7714..df90ce2db00c 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -135,7 +135,7 @@ minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi);
static int
minstrel_ht_get_group_idx(struct ieee80211_tx_rate *rate)
{
- return GROUP_IDX((rate->idx / 8) + 1,
+ return GROUP_IDX((rate->idx / MCS_GROUP_RATES) + 1,
!!(rate->flags & IEEE80211_TX_RC_SHORT_GI),
!!(rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH));
}
@@ -233,12 +233,151 @@ minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate)
}
/*
+ * Find & sort topmost throughput rates
+ *
+ * If multiple rates provide equal throughput the sorting is based on their
+ * current success probability. Higher success probability is preferred among
+ * MCS groups, CCK rates do not provide aggregation and are therefore at last.
+ */
+static void
+minstrel_ht_sort_best_tp_rates(struct minstrel_ht_sta *mi, u8 index,
+ u8 *tp_list)
+{
+ int cur_group, cur_idx, cur_thr, cur_prob;
+ int tmp_group, tmp_idx, tmp_thr, tmp_prob;
+ int j = MAX_THR_RATES;
+
+ cur_group = index / MCS_GROUP_RATES;
+ cur_idx = index % MCS_GROUP_RATES;
+ cur_thr = mi->groups[cur_group].rates[cur_idx].cur_tp;
+ cur_prob = mi->groups[cur_group].rates[cur_idx].probability;
+
+ tmp_group = tp_list[j - 1] / MCS_GROUP_RATES;
+ tmp_idx = tp_list[j - 1] % MCS_GROUP_RATES;
+ tmp_thr = mi->groups[tmp_group].rates[tmp_idx].cur_tp;
+ tmp_prob = mi->groups[tmp_group].rates[tmp_idx].probability;
+
+ while (j > 0 && (cur_thr > tmp_thr ||
+ (cur_thr == tmp_thr && cur_prob > tmp_prob))) {
+ j--;
+ tmp_group = tp_list[j - 1] / MCS_GROUP_RATES;
+ tmp_idx = tp_list[j - 1] % MCS_GROUP_RATES;
+ tmp_thr = mi->groups[tmp_group].rates[tmp_idx].cur_tp;
+ tmp_prob = mi->groups[tmp_group].rates[tmp_idx].probability;
+ }
+
+ if (j < MAX_THR_RATES - 1) {
+ memmove(&tp_list[j + 1], &tp_list[j], (sizeof(*tp_list) *
+ (MAX_THR_RATES - (j + 1))));
+ }
+ if (j < MAX_THR_RATES)
+ tp_list[j] = index;
+}
+
+/*
+ * Find and set the topmost probability rate per sta and per group
+ */
+static void
+minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u8 index)
+{
+ struct minstrel_mcs_group_data *mg;
+ struct minstrel_rate_stats *mr;
+ int tmp_group, tmp_idx, tmp_tp, tmp_prob, max_tp_group;
+
+ mg = &mi->groups[index / MCS_GROUP_RATES];
+ mr = &mg->rates[index % MCS_GROUP_RATES];
+
+ tmp_group = mi->max_prob_rate / MCS_GROUP_RATES;
+ tmp_idx = mi->max_prob_rate % MCS_GROUP_RATES;
+ tmp_tp = mi->groups[tmp_group].rates[tmp_idx].cur_tp;
+ tmp_prob = mi->groups[tmp_group].rates[tmp_idx].probability;
+
+ /* if max_tp_rate[0] is from MCS_GROUP max_prob_rate get selected from
+ * MCS_GROUP as well as CCK_GROUP rates do not allow aggregation */
+ max_tp_group = mi->max_tp_rate[0] / MCS_GROUP_RATES;
+ if((index / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) &&
+ (max_tp_group != MINSTREL_CCK_GROUP))
+ return;
+
+ if (mr->probability > MINSTREL_FRAC(75, 100)) {
+ if (mr->cur_tp > tmp_tp)
+ mi->max_prob_rate = index;
+ if (mr->cur_tp > mg->rates[mg->max_group_prob_rate].cur_tp)
+ mg->max_group_prob_rate = index;
+ } else {
+ if (mr->probability > tmp_prob)
+ mi->max_prob_rate = index;
+ if (mr->probability > mg->rates[mg->max_group_prob_rate].probability)
+ mg->max_group_prob_rate = index;
+ }
+}
+
+
+/*
+ * Assign new rate set per sta and use CCK rates only if the fastest
+ * rate (max_tp_rate[0]) is from CCK group. This prohibits such sorted
+ * rate sets where MCS and CCK rates are mixed, because CCK rates can
+ * not use aggregation.
+ */
+static void
+minstrel_ht_assign_best_tp_rates(struct minstrel_ht_sta *mi,
+ u8 tmp_mcs_tp_rate[MAX_THR_RATES],
+ u8 tmp_cck_tp_rate[MAX_THR_RATES])
+{
+ unsigned int tmp_group, tmp_idx, tmp_cck_tp, tmp_mcs_tp;
+ int i;
+
+ tmp_group = tmp_cck_tp_rate[0] / MCS_GROUP_RATES;
+ tmp_idx = tmp_cck_tp_rate[0] % MCS_GROUP_RATES;
+ tmp_cck_tp = mi->groups[tmp_group].rates[tmp_idx].cur_tp;
+
+ tmp_group = tmp_mcs_tp_rate[0] / MCS_GROUP_RATES;
+ tmp_idx = tmp_mcs_tp_rate[0] % MCS_GROUP_RATES;
+ tmp_mcs_tp = mi->groups[tmp_group].rates[tmp_idx].cur_tp;
+
+ if (tmp_cck_tp > tmp_mcs_tp) {
+ for(i = 0; i < MAX_THR_RATES; i++) {
+ minstrel_ht_sort_best_tp_rates(mi, tmp_cck_tp_rate[i],
+ tmp_mcs_tp_rate);
+ }
+ }
+
+}
+
+/*
+ * Try to increase robustness of max_prob rate by decrease number of
+ * streams if possible.
+ */
+static inline void
+minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi)
+{
+ struct minstrel_mcs_group_data *mg;
+ struct minstrel_rate_stats *mr;
+ int tmp_max_streams, group;
+ int tmp_tp = 0;
+
+ tmp_max_streams = minstrel_mcs_groups[mi->max_tp_rate[0] /
+ MCS_GROUP_RATES].streams;
+ for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) {
+ mg = &mi->groups[group];
+ if (!mg->supported || group == MINSTREL_CCK_GROUP)
+ continue;
+ mr = minstrel_get_ratestats(mi, mg->max_group_prob_rate);
+ if (tmp_tp < mr->cur_tp &&
+ (minstrel_mcs_groups[group].streams < tmp_max_streams)) {
+ mi->max_prob_rate = mg->max_group_prob_rate;
+ tmp_tp = mr->cur_tp;
+ }
+ }
+}
+
+/*
* Update rate statistics and select new primary rates
*
* Rules for rate selection:
* - max_prob_rate must use only one stream, as a tradeoff between delivery
* probability and throughput during strong fluctuations
- * - as long as the max prob rate has a probability of more than 3/4, pick
+ * - as long as the max prob rate has a probability of more than 75%, pick
* higher throughput rates, even if the probablity is a bit lower
*/
static void
@@ -246,9 +385,9 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
{
struct minstrel_mcs_group_data *mg;
struct minstrel_rate_stats *mr;
- int cur_prob, cur_prob_tp, cur_tp, cur_tp2;
- int group, i, index;
- bool mi_rates_valid = false;
+ int group, i, j;
+ u8 tmp_mcs_tp_rate[MAX_THR_RATES], tmp_group_tp_rate[MAX_THR_RATES];
+ u8 tmp_cck_tp_rate[MAX_THR_RATES], index;
if (mi->ampdu_packets > 0) {
mi->avg_ampdu_len = minstrel_ewma(mi->avg_ampdu_len,
@@ -260,13 +399,14 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
mi->sample_slow = 0;
mi->sample_count = 0;
- for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) {
- bool mg_rates_valid = false;
+ /* Initialize global rate indexes */
+ for(j = 0; j < MAX_THR_RATES; j++){
+ tmp_mcs_tp_rate[j] = 0;
+ tmp_cck_tp_rate[j] = 0;
+ }
- cur_prob = 0;
- cur_prob_tp = 0;
- cur_tp = 0;
- cur_tp2 = 0;
+ /* Find best rate sets within all MCS groups*/
+ for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) {
mg = &mi->groups[group];
if (!mg->supported)
@@ -274,24 +414,16 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
mi->sample_count++;
+ /* (re)Initialize group rate indexes */
+ for(j = 0; j < MAX_THR_RATES; j++)
+ tmp_group_tp_rate[j] = group;
+
for (i = 0; i < MCS_GROUP_RATES; i++) {
if (!(mg->supported & BIT(i)))
continue;
index = MCS_GROUP_RATES * group + i;
- /* initialize rates selections starting indexes */
- if (!mg_rates_valid) {
- mg->max_tp_rate = mg->max_tp_rate2 =
- mg->max_prob_rate = i;
- if (!mi_rates_valid) {
- mi->max_tp_rate = mi->max_tp_rate2 =
- mi->max_prob_rate = index;
- mi_rates_valid = true;
- }
- mg_rates_valid = true;
- }
-
mr = &mg->rates[i];
mr->retry_updated = false;
minstrel_calc_rate_ewma(mr);
@@ -300,82 +432,47 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
if (!mr->cur_tp)
continue;
- if ((mr->cur_tp > cur_prob_tp && mr->probability >
- MINSTREL_FRAC(3, 4)) || mr->probability > cur_prob) {
- mg->max_prob_rate = index;
- cur_prob = mr->probability;
- cur_prob_tp = mr->cur_tp;
- }
-
- if (mr->cur_tp > cur_tp) {
- swap(index, mg->max_tp_rate);
- cur_tp = mr->cur_tp;
- mr = minstrel_get_ratestats(mi, index);
- }
-
- if (index >= mg->max_tp_rate)
- continue;
-
- if (mr->cur_tp > cur_tp2) {
- mg->max_tp_rate2 = index;
- cur_tp2 = mr->cur_tp;
+ /* Find max throughput rate set */
+ if (group != MINSTREL_CCK_GROUP) {
+ minstrel_ht_sort_best_tp_rates(mi, index,
+ tmp_mcs_tp_rate);
+ } else if (group == MINSTREL_CCK_GROUP) {
+ minstrel_ht_sort_best_tp_rates(mi, index,
+ tmp_cck_tp_rate);
}
- }
- }
- /* try to sample all available rates during each interval */
- mi->sample_count *= 8;
+ /* Find max throughput rate set within a group */
+ minstrel_ht_sort_best_tp_rates(mi, index,
+ tmp_group_tp_rate);
- cur_prob = 0;
- cur_prob_tp = 0;
- cur_tp = 0;
- cur_tp2 = 0;
- for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) {
- mg = &mi->groups[group];
- if (!mg->supported)
- continue;
-
- mr = minstrel_get_ratestats(mi, mg->max_tp_rate);
- if (cur_tp < mr->cur_tp) {
- mi->max_tp_rate2 = mi->max_tp_rate;
- cur_tp2 = cur_tp;
- mi->max_tp_rate = mg->max_tp_rate;
- cur_tp = mr->cur_tp;
- mi->max_prob_streams = minstrel_mcs_groups[group].streams - 1;
+ /* Find max probability rate per group and global */
+ minstrel_ht_set_best_prob_rate(mi, index);
}
- mr = minstrel_get_ratestats(mi, mg->max_tp_rate2);
- if (cur_tp2 < mr->cur_tp) {
- mi->max_tp_rate2 = mg->max_tp_rate2;
- cur_tp2 = mr->cur_tp;
- }
+ memcpy(mg->max_group_tp_rate, tmp_group_tp_rate,
+ sizeof(mg->max_group_tp_rate));
}
- if (mi->max_prob_streams < 1)
- mi->max_prob_streams = 1;
+ /* Assign new rate set per sta */
+ minstrel_ht_assign_best_tp_rates(mi, tmp_mcs_tp_rate, tmp_cck_tp_rate);
+ memcpy(mi->max_tp_rate, tmp_mcs_tp_rate, sizeof(mi->max_tp_rate));
- for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) {
- mg = &mi->groups[group];
- if (!mg->supported)
- continue;
- mr = minstrel_get_ratestats(mi, mg->max_prob_rate);
- if (cur_prob_tp < mr->cur_tp &&
- minstrel_mcs_groups[group].streams <= mi->max_prob_streams) {
- mi->max_prob_rate = mg->max_prob_rate;
- cur_prob = mr->cur_prob;
- cur_prob_tp = mr->cur_tp;
- }
- }
+ /* Try to increase robustness of max_prob_rate*/
+ minstrel_ht_prob_rate_reduce_streams(mi);
+
+ /* try to sample all available rates during each interval */
+ mi->sample_count *= 8;
#ifdef CONFIG_MAC80211_DEBUGFS
/* use fixed index if set */
if (mp->fixed_rate_idx != -1) {
- mi->max_tp_rate = mp->fixed_rate_idx;
- mi->max_tp_rate2 = mp->fixed_rate_idx;
+ for (i = 0; i < 4; i++)
+ mi->max_tp_rate[i] = mp->fixed_rate_idx;
mi->max_prob_rate = mp->fixed_rate_idx;
}
#endif
+ /* Reset update timer */
mi->stats_update = jiffies;
}
@@ -420,8 +517,7 @@ minstrel_next_sample_idx(struct minstrel_ht_sta *mi)
}
static void
-minstrel_downgrade_rate(struct minstrel_ht_sta *mi, unsigned int *idx,
- bool primary)
+minstrel_downgrade_rate(struct minstrel_ht_sta *mi, u8 *idx, bool primary)
{
int group, orig_group;
@@ -437,9 +533,9 @@ minstrel_downgrade_rate(struct minstrel_ht_sta *mi, unsigned int *idx,
continue;
if (primary)
- *idx = mi->groups[group].max_tp_rate;
+ *idx = mi->groups[group].max_group_tp_rate[0];
else
- *idx = mi->groups[group].max_tp_rate2;
+ *idx = mi->groups[group].max_group_tp_rate[1];
break;
}
}
@@ -524,19 +620,19 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
* check for sudden death of spatial multiplexing,
* downgrade to a lower number of streams if necessary.
*/
- rate = minstrel_get_ratestats(mi, mi->max_tp_rate);
+ rate = minstrel_get_ratestats(mi, mi->max_tp_rate[0]);
if (rate->attempts > 30 &&
MINSTREL_FRAC(rate->success, rate->attempts) <
MINSTREL_FRAC(20, 100)) {
- minstrel_downgrade_rate(mi, &mi->max_tp_rate, true);
+ minstrel_downgrade_rate(mi, &mi->max_tp_rate[0], true);
update = true;
}
- rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate2);
+ rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate[1]);
if (rate2->attempts > 30 &&
MINSTREL_FRAC(rate2->success, rate2->attempts) <
MINSTREL_FRAC(20, 100)) {
- minstrel_downgrade_rate(mi, &mi->max_tp_rate2, false);
+ minstrel_downgrade_rate(mi, &mi->max_tp_rate[1], false);
update = true;
}
@@ -661,12 +757,12 @@ minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
if (!rates)
return;
- /* Start with max_tp_rate */
- minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate);
+ /* Start with max_tp_rate[0] */
+ minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate[0]);
if (mp->hw->max_rates >= 3) {
- /* At least 3 tx rates supported, use max_tp_rate2 next */
- minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate2);
+ /* At least 3 tx rates supported, use max_tp_rate[1] next */
+ minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate[1]);
}
if (mp->hw->max_rates >= 2) {
@@ -691,7 +787,7 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
{
struct minstrel_rate_stats *mr;
struct minstrel_mcs_group_data *mg;
- unsigned int sample_dur, sample_group;
+ unsigned int sample_dur, sample_group, cur_max_tp_streams;
int sample_idx = 0;
if (mi->sample_wait > 0) {
@@ -718,8 +814,8 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
* to the frame. Hence, don't use sampling for the currently
* used rates.
*/
- if (sample_idx == mi->max_tp_rate ||
- sample_idx == mi->max_tp_rate2 ||
+ if (sample_idx == mi->max_tp_rate[0] ||
+ sample_idx == mi->max_tp_rate[1] ||
sample_idx == mi->max_prob_rate)
return -1;
@@ -734,9 +830,12 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
* Make sure that lower rates get sampled only occasionally,
* if the link is working perfectly.
*/
+
+ cur_max_tp_streams = minstrel_mcs_groups[mi->max_tp_rate[0] /
+ MCS_GROUP_RATES].streams;
sample_dur = minstrel_get_duration(sample_idx);
- if (sample_dur >= minstrel_get_duration(mi->max_tp_rate2) &&
- (mi->max_prob_streams <
+ if (sample_dur >= minstrel_get_duration(mi->max_tp_rate[1]) &&
+ (cur_max_tp_streams - 1 <
minstrel_mcs_groups[sample_group].streams ||
sample_dur >= minstrel_get_duration(mi->max_prob_rate))) {
if (mr->sample_skipped < 20)
@@ -1041,8 +1140,8 @@ static u32 minstrel_ht_get_expected_throughput(void *priv_sta)
if (!msp->is_ht)
return mac80211_minstrel.get_expected_throughput(priv_sta);
- i = mi->max_tp_rate / MCS_GROUP_RATES;
- j = mi->max_tp_rate % MCS_GROUP_RATES;
+ i = mi->max_tp_rate[0] / MCS_GROUP_RATES;
+ j = mi->max_tp_rate[0] % MCS_GROUP_RATES;
/* convert cur_tp from pkt per second in kbps */
return mi->groups[i].rates[j].cur_tp * AVG_PKT_SIZE * 8 / 1024;
diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h
index d655586773ac..01570e0e014b 100644
--- a/net/mac80211/rc80211_minstrel_ht.h
+++ b/net/mac80211/rc80211_minstrel_ht.h
@@ -26,28 +26,6 @@ struct mcs_group {
extern const struct mcs_group minstrel_mcs_groups[];
-struct minstrel_rate_stats {
- /* current / last sampling period attempts/success counters */
- unsigned int attempts, last_attempts;
- unsigned int success, last_success;
-
- /* total attempts/success counters */
- u64 att_hist, succ_hist;
-
- /* current throughput */
- unsigned int cur_tp;
-
- /* packet delivery probabilities */
- unsigned int cur_prob, probability;
-
- /* maximum retry counts */
- unsigned int retry_count;
- unsigned int retry_count_rtscts;
-
- bool retry_updated;
- u8 sample_skipped;
-};
-
struct minstrel_mcs_group_data {
u8 index;
u8 column;
@@ -55,10 +33,9 @@ struct minstrel_mcs_group_data {
/* bitfield of supported MCS rates of this group */
u8 supported;
- /* selected primary rates */
- unsigned int max_tp_rate;
- unsigned int max_tp_rate2;
- unsigned int max_prob_rate;
+ /* sorted rate set within a MCS group*/
+ u8 max_group_tp_rate[MAX_THR_RATES];
+ u8 max_group_prob_rate;
/* MCS rate statistics */
struct minstrel_rate_stats rates[MCS_GROUP_RATES];
@@ -74,15 +51,9 @@ struct minstrel_ht_sta {
/* ampdu length (EWMA) */
unsigned int avg_ampdu_len;
- /* best throughput rate */
- unsigned int max_tp_rate;
-
- /* second best throughput rate */
- unsigned int max_tp_rate2;
-
- /* best probability rate */
- unsigned int max_prob_rate;
- unsigned int max_prob_streams;
+ /* overall sorted rate set */
+ u8 max_tp_rate[MAX_THR_RATES];
+ u8 max_prob_rate;
/* time of last status update */
unsigned long stats_update;
diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c
index 3e7d793de0c3..a72ad46f2a04 100644
--- a/net/mac80211/rc80211_minstrel_ht_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c
@@ -46,8 +46,10 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p)
else
p += sprintf(p, "HT%c0/%cGI ", htmode, gimode);
- *(p++) = (idx == mi->max_tp_rate) ? 'T' : ' ';
- *(p++) = (idx == mi->max_tp_rate2) ? 't' : ' ';
+ *(p++) = (idx == mi->max_tp_rate[0]) ? 'A' : ' ';
+ *(p++) = (idx == mi->max_tp_rate[1]) ? 'B' : ' ';
+ *(p++) = (idx == mi->max_tp_rate[2]) ? 'C' : ' ';
+ *(p++) = (idx == mi->max_tp_rate[3]) ? 'D' : ' ';
*(p++) = (idx == mi->max_prob_rate) ? 'P' : ' ';
if (i == max_mcs) {
@@ -100,8 +102,8 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file)
file->private_data = ms;
p = ms->buf;
- p += sprintf(p, "type rate throughput ewma prob this prob "
- "retry this succ/attempt success attempts\n");
+ p += sprintf(p, "type rate throughput ewma prob "
+ "this prob retry this succ/attempt success attempts\n");
p = minstrel_ht_stats_dump(mi, max_mcs, p);
for (i = 0; i < max_mcs; i++)
diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h
deleted file mode 100644
index 19111c7bf454..000000000000
--- a/net/mac80211/rc80211_pid.h
+++ /dev/null
@@ -1,278 +0,0 @@
-/*
- * Copyright 2007, Mattias Nissler <mattias.nissler@gmx.de>
- * Copyright 2007, Stefano Brivio <stefano.brivio@polimi.it>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef RC80211_PID_H
-#define RC80211_PID_H
-
-/* Sampling period for measuring percentage of failed frames in ms. */
-#define RC_PID_INTERVAL 125
-
-/* Exponential averaging smoothness (used for I part of PID controller) */
-#define RC_PID_SMOOTHING_SHIFT 3
-#define RC_PID_SMOOTHING (1 << RC_PID_SMOOTHING_SHIFT)
-
-/* Sharpening factor (used for D part of PID controller) */
-#define RC_PID_SHARPENING_FACTOR 0
-#define RC_PID_SHARPENING_DURATION 0
-
-/* Fixed point arithmetic shifting amount. */
-#define RC_PID_ARITH_SHIFT 8
-
-/* Proportional PID component coefficient. */
-#define RC_PID_COEFF_P 15
-/* Integral PID component coefficient. */
-#define RC_PID_COEFF_I 9
-/* Derivative PID component coefficient. */
-#define RC_PID_COEFF_D 15
-
-/* Target failed frames rate for the PID controller. NB: This effectively gives
- * maximum failed frames percentage we're willing to accept. If the wireless
- * link quality is good, the controller will fail to adjust failed frames
- * percentage to the target. This is intentional.
- */
-#define RC_PID_TARGET_PF 14
-
-/* Rate behaviour normalization quantity over time. */
-#define RC_PID_NORM_OFFSET 3
-
-/* Push high rates right after loading. */
-#define RC_PID_FAST_START 0
-
-/* Arithmetic right shift for positive and negative values for ISO C. */
-#define RC_PID_DO_ARITH_RIGHT_SHIFT(x, y) \
- ((x) < 0 ? -((-(x)) >> (y)) : (x) >> (y))
-
-enum rc_pid_event_type {
- RC_PID_EVENT_TYPE_TX_STATUS,
- RC_PID_EVENT_TYPE_RATE_CHANGE,
- RC_PID_EVENT_TYPE_TX_RATE,
- RC_PID_EVENT_TYPE_PF_SAMPLE,
-};
-
-union rc_pid_event_data {
- /* RC_PID_EVENT_TX_STATUS */
- struct {
- u32 flags;
- struct ieee80211_tx_info tx_status;
- };
- /* RC_PID_EVENT_TYPE_RATE_CHANGE */
- /* RC_PID_EVENT_TYPE_TX_RATE */
- struct {
- int index;
- int rate;
- };
- /* RC_PID_EVENT_TYPE_PF_SAMPLE */
- struct {
- s32 pf_sample;
- s32 prop_err;
- s32 int_err;
- s32 der_err;
- };
-};
-
-struct rc_pid_event {
- /* The time when the event occurred */
- unsigned long timestamp;
-
- /* Event ID number */
- unsigned int id;
-
- /* Type of event */
- enum rc_pid_event_type type;
-
- /* type specific data */
- union rc_pid_event_data data;
-};
-
-/* Size of the event ring buffer. */
-#define RC_PID_EVENT_RING_SIZE 32
-
-struct rc_pid_event_buffer {
- /* Counter that generates event IDs */
- unsigned int ev_count;
-
- /* Ring buffer of events */
- struct rc_pid_event ring[RC_PID_EVENT_RING_SIZE];
-
- /* Index to the entry in events_buf to be reused */
- unsigned int next_entry;
-
- /* Lock that guards against concurrent access to this buffer struct */
- spinlock_t lock;
-
- /* Wait queue for poll/select and blocking I/O */
- wait_queue_head_t waitqueue;
-};
-
-struct rc_pid_events_file_info {
- /* The event buffer we read */
- struct rc_pid_event_buffer *events;
-
- /* The entry we have should read next */
- unsigned int next_entry;
-};
-
-/**
- * struct rc_pid_debugfs_entries - tunable parameters
- *
- * Algorithm parameters, tunable via debugfs.
- * @target: target percentage for failed frames
- * @sampling_period: error sampling interval in milliseconds
- * @coeff_p: absolute value of the proportional coefficient
- * @coeff_i: absolute value of the integral coefficient
- * @coeff_d: absolute value of the derivative coefficient
- * @smoothing_shift: absolute value of the integral smoothing factor (i.e.
- * amount of smoothing introduced by the exponential moving average)
- * @sharpen_factor: absolute value of the derivative sharpening factor (i.e.
- * amount of emphasis given to the derivative term after low activity
- * events)
- * @sharpen_duration: duration of the sharpening effect after the detected low
- * activity event, relative to sampling_period
- * @norm_offset: amount of normalization periodically performed on the learnt
- * rate behaviour values (lower means we should trust more what we learnt
- * about behaviour of rates, higher means we should trust more the natural
- * ordering of rates)
- */
-struct rc_pid_debugfs_entries {
- struct dentry *target;
- struct dentry *sampling_period;
- struct dentry *coeff_p;
- struct dentry *coeff_i;
- struct dentry *coeff_d;
- struct dentry *smoothing_shift;
- struct dentry *sharpen_factor;
- struct dentry *sharpen_duration;
- struct dentry *norm_offset;
-};
-
-void rate_control_pid_event_tx_status(struct rc_pid_event_buffer *buf,
- struct ieee80211_tx_info *stat);
-
-void rate_control_pid_event_rate_change(struct rc_pid_event_buffer *buf,
- int index, int rate);
-
-void rate_control_pid_event_tx_rate(struct rc_pid_event_buffer *buf,
- int index, int rate);
-
-void rate_control_pid_event_pf_sample(struct rc_pid_event_buffer *buf,
- s32 pf_sample, s32 prop_err,
- s32 int_err, s32 der_err);
-
-void rate_control_pid_add_sta_debugfs(void *priv, void *priv_sta,
- struct dentry *dir);
-
-void rate_control_pid_remove_sta_debugfs(void *priv, void *priv_sta);
-
-struct rc_pid_sta_info {
- unsigned long last_change;
- unsigned long last_sample;
-
- u32 tx_num_failed;
- u32 tx_num_xmit;
-
- int txrate_idx;
-
- /* Average failed frames percentage error (i.e. actual vs. target
- * percentage), scaled by RC_PID_SMOOTHING. This value is computed
- * using using an exponential weighted average technique:
- *
- * (RC_PID_SMOOTHING - 1) * err_avg_old + err
- * err_avg = ------------------------------------------
- * RC_PID_SMOOTHING
- *
- * where err_avg is the new approximation, err_avg_old the previous one
- * and err is the error w.r.t. to the current failed frames percentage
- * sample. Note that the bigger RC_PID_SMOOTHING the more weight is
- * given to the previous estimate, resulting in smoother behavior (i.e.
- * corresponding to a longer integration window).
- *
- * For computation, we actually don't use the above formula, but this
- * one:
- *
- * err_avg_scaled = err_avg_old_scaled - err_avg_old + err
- *
- * where:
- * err_avg_scaled = err * RC_PID_SMOOTHING
- * err_avg_old_scaled = err_avg_old * RC_PID_SMOOTHING
- *
- * This avoids floating point numbers and the per_failed_old value can
- * easily be obtained by shifting per_failed_old_scaled right by
- * RC_PID_SMOOTHING_SHIFT.
- */
- s32 err_avg_sc;
-
- /* Last framed failes percentage sample. */
- u32 last_pf;
-
- /* Sharpening needed. */
- u8 sharp_cnt;
-
-#ifdef CONFIG_MAC80211_DEBUGFS
- /* Event buffer */
- struct rc_pid_event_buffer events;
-
- /* Events debugfs file entry */
- struct dentry *events_entry;
-#endif
-};
-
-/* Algorithm parameters. We keep them on a per-algorithm approach, so they can
- * be tuned individually for each interface.
- */
-struct rc_pid_rateinfo {
-
- /* Map sorted rates to rates in ieee80211_hw_mode. */
- int index;
-
- /* Map rates in ieee80211_hw_mode to sorted rates. */
- int rev_index;
-
- /* Did we do any measurement on this rate? */
- bool valid;
-
- /* Comparison with the lowest rate. */
- int diff;
-};
-
-struct rc_pid_info {
-
- /* The failed frames percentage target. */
- unsigned int target;
-
- /* Rate at which failed frames percentage is sampled in 0.001s. */
- unsigned int sampling_period;
-
- /* P, I and D coefficients. */
- int coeff_p;
- int coeff_i;
- int coeff_d;
-
- /* Exponential averaging shift. */
- unsigned int smoothing_shift;
-
- /* Sharpening factor and duration. */
- unsigned int sharpen_factor;
- unsigned int sharpen_duration;
-
- /* Normalization offset. */
- unsigned int norm_offset;
-
- /* Rates information. */
- struct rc_pid_rateinfo *rinfo;
-
- /* Index of the last used rate. */
- int oldrate;
-
-#ifdef CONFIG_MAC80211_DEBUGFS
- /* Debugfs entries created for the parameters above. */
- struct rc_pid_debugfs_entries dentries;
-#endif
-};
-
-#endif /* RC80211_PID_H */
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
deleted file mode 100644
index d0da2a70fe68..000000000000
--- a/net/mac80211/rc80211_pid_algo.c
+++ /dev/null
@@ -1,478 +0,0 @@
-/*
- * Copyright 2002-2005, Instant802 Networks, Inc.
- * Copyright 2005, Devicescape Software, Inc.
- * Copyright 2007, Mattias Nissler <mattias.nissler@gmx.de>
- * Copyright 2007-2008, Stefano Brivio <stefano.brivio@polimi.it>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/netdevice.h>
-#include <linux/types.h>
-#include <linux/skbuff.h>
-#include <linux/debugfs.h>
-#include <linux/slab.h>
-#include <net/mac80211.h>
-#include "rate.h"
-#include "mesh.h"
-#include "rc80211_pid.h"
-
-
-/* This is an implementation of a TX rate control algorithm that uses a PID
- * controller. Given a target failed frames rate, the controller decides about
- * TX rate changes to meet the target failed frames rate.
- *
- * The controller basically computes the following:
- *
- * adj = CP * err + CI * err_avg + CD * (err - last_err) * (1 + sharpening)
- *
- * where
- * adj adjustment value that is used to switch TX rate (see below)
- * err current error: target vs. current failed frames percentage
- * last_err last error
- * err_avg average (i.e. poor man's integral) of recent errors
- * sharpening non-zero when fast response is needed (i.e. right after
- * association or no frames sent for a long time), heading
- * to zero over time
- * CP Proportional coefficient
- * CI Integral coefficient
- * CD Derivative coefficient
- *
- * CP, CI, CD are subject to careful tuning.
- *
- * The integral component uses a exponential moving average approach instead of
- * an actual sliding window. The advantage is that we don't need to keep an
- * array of the last N error values and computation is easier.
- *
- * Once we have the adj value, we map it to a rate by means of a learning
- * algorithm. This algorithm keeps the state of the percentual failed frames
- * difference between rates. The behaviour of the lowest available rate is kept
- * as a reference value, and every time we switch between two rates, we compute
- * the difference between the failed frames each rate exhibited. By doing so,
- * we compare behaviours which different rates exhibited in adjacent timeslices,
- * thus the comparison is minimally affected by external conditions. This
- * difference gets propagated to the whole set of measurements, so that the
- * reference is always the same. Periodically, we normalize this set so that
- * recent events weigh the most. By comparing the adj value with this set, we
- * avoid pejorative switches to lower rates and allow for switches to higher
- * rates if they behaved well.
- *
- * Note that for the computations we use a fixed-point representation to avoid
- * floating point arithmetic. Hence, all values are shifted left by
- * RC_PID_ARITH_SHIFT.
- */
-
-
-/* Adjust the rate while ensuring that we won't switch to a lower rate if it
- * exhibited a worse failed frames behaviour and we'll choose the highest rate
- * whose failed frames behaviour is not worse than the one of the original rate
- * target. While at it, check that the new rate is valid. */
-static void rate_control_pid_adjust_rate(struct ieee80211_supported_band *sband,
- struct ieee80211_sta *sta,
- struct rc_pid_sta_info *spinfo, int adj,
- struct rc_pid_rateinfo *rinfo)
-{
- int cur_sorted, new_sorted, probe, tmp, n_bitrates, band;
- int cur = spinfo->txrate_idx;
-
- band = sband->band;
- n_bitrates = sband->n_bitrates;
-
- /* Map passed arguments to sorted values. */
- cur_sorted = rinfo[cur].rev_index;
- new_sorted = cur_sorted + adj;
-
- /* Check limits. */
- if (new_sorted < 0)
- new_sorted = rinfo[0].rev_index;
- else if (new_sorted >= n_bitrates)
- new_sorted = rinfo[n_bitrates - 1].rev_index;
-
- tmp = new_sorted;
-
- if (adj < 0) {
- /* Ensure that the rate decrease isn't disadvantageous. */
- for (probe = cur_sorted; probe >= new_sorted; probe--)
- if (rinfo[probe].diff <= rinfo[cur_sorted].diff &&
- rate_supported(sta, band, rinfo[probe].index))
- tmp = probe;
- } else {
- /* Look for rate increase with zero (or below) cost. */
- for (probe = new_sorted + 1; probe < n_bitrates; probe++)
- if (rinfo[probe].diff <= rinfo[new_sorted].diff &&
- rate_supported(sta, band, rinfo[probe].index))
- tmp = probe;
- }
-
- /* Fit the rate found to the nearest supported rate. */
- do {
- if (rate_supported(sta, band, rinfo[tmp].index)) {
- spinfo->txrate_idx = rinfo[tmp].index;
- break;
- }
- if (adj < 0)
- tmp--;
- else
- tmp++;
- } while (tmp < n_bitrates && tmp >= 0);
-
-#ifdef CONFIG_MAC80211_DEBUGFS
- rate_control_pid_event_rate_change(&spinfo->events,
- spinfo->txrate_idx,
- sband->bitrates[spinfo->txrate_idx].bitrate);
-#endif
-}
-
-/* Normalize the failed frames per-rate differences. */
-static void rate_control_pid_normalize(struct rc_pid_info *pinfo, int l)
-{
- int i, norm_offset = pinfo->norm_offset;
- struct rc_pid_rateinfo *r = pinfo->rinfo;
-
- if (r[0].diff > norm_offset)
- r[0].diff -= norm_offset;
- else if (r[0].diff < -norm_offset)
- r[0].diff += norm_offset;
- for (i = 0; i < l - 1; i++)
- if (r[i + 1].diff > r[i].diff + norm_offset)
- r[i + 1].diff -= norm_offset;
- else if (r[i + 1].diff <= r[i].diff)
- r[i + 1].diff += norm_offset;
-}
-
-static void rate_control_pid_sample(struct rc_pid_info *pinfo,
- struct ieee80211_supported_band *sband,
- struct ieee80211_sta *sta,
- struct rc_pid_sta_info *spinfo)
-{
- struct rc_pid_rateinfo *rinfo = pinfo->rinfo;
- u32 pf;
- s32 err_avg;
- u32 err_prop;
- u32 err_int;
- u32 err_der;
- int adj, i, j, tmp;
- unsigned long period;
-
- /* In case nothing happened during the previous control interval, turn
- * the sharpening factor on. */
- period = msecs_to_jiffies(pinfo->sampling_period);
- if (jiffies - spinfo->last_sample > 2 * period)
- spinfo->sharp_cnt = pinfo->sharpen_duration;
-
- spinfo->last_sample = jiffies;
-
- /* This should never happen, but in case, we assume the old sample is
- * still a good measurement and copy it. */
- if (unlikely(spinfo->tx_num_xmit == 0))
- pf = spinfo->last_pf;
- else
- pf = spinfo->tx_num_failed * 100 / spinfo->tx_num_xmit;
-
- spinfo->tx_num_xmit = 0;
- spinfo->tx_num_failed = 0;
-
- /* If we just switched rate, update the rate behaviour info. */
- if (pinfo->oldrate != spinfo->txrate_idx) {
-
- i = rinfo[pinfo->oldrate].rev_index;
- j = rinfo[spinfo->txrate_idx].rev_index;
-
- tmp = (pf - spinfo->last_pf);
- tmp = RC_PID_DO_ARITH_RIGHT_SHIFT(tmp, RC_PID_ARITH_SHIFT);
-
- rinfo[j].diff = rinfo[i].diff + tmp;
- pinfo->oldrate = spinfo->txrate_idx;
- }
- rate_control_pid_normalize(pinfo, sband->n_bitrates);
-
- /* Compute the proportional, integral and derivative errors. */
- err_prop = (pinfo->target - pf) << RC_PID_ARITH_SHIFT;
-
- err_avg = spinfo->err_avg_sc >> pinfo->smoothing_shift;
- spinfo->err_avg_sc = spinfo->err_avg_sc - err_avg + err_prop;
- err_int = spinfo->err_avg_sc >> pinfo->smoothing_shift;
-
- err_der = (pf - spinfo->last_pf) *
- (1 + pinfo->sharpen_factor * spinfo->sharp_cnt);
- spinfo->last_pf = pf;
- if (spinfo->sharp_cnt)
- spinfo->sharp_cnt--;
-
-#ifdef CONFIG_MAC80211_DEBUGFS
- rate_control_pid_event_pf_sample(&spinfo->events, pf, err_prop, err_int,
- err_der);
-#endif
-
- /* Compute the controller output. */
- adj = (err_prop * pinfo->coeff_p + err_int * pinfo->coeff_i
- + err_der * pinfo->coeff_d);
- adj = RC_PID_DO_ARITH_RIGHT_SHIFT(adj, 2 * RC_PID_ARITH_SHIFT);
-
- /* Change rate. */
- if (adj)
- rate_control_pid_adjust_rate(sband, sta, spinfo, adj, rinfo);
-}
-
-static void rate_control_pid_tx_status(void *priv, struct ieee80211_supported_band *sband,
- struct ieee80211_sta *sta, void *priv_sta,
- struct sk_buff *skb)
-{
- struct rc_pid_info *pinfo = priv;
- struct rc_pid_sta_info *spinfo = priv_sta;
- unsigned long period;
- struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-
- if (!spinfo)
- return;
-
- /* Ignore all frames that were sent with a different rate than the rate
- * we currently advise mac80211 to use. */
- if (info->status.rates[0].idx != spinfo->txrate_idx)
- return;
-
- spinfo->tx_num_xmit++;
-
-#ifdef CONFIG_MAC80211_DEBUGFS
- rate_control_pid_event_tx_status(&spinfo->events, info);
-#endif
-
- /* We count frames that totally failed to be transmitted as two bad
- * frames, those that made it out but had some retries as one good and
- * one bad frame. */
- if (!(info->flags & IEEE80211_TX_STAT_ACK)) {
- spinfo->tx_num_failed += 2;
- spinfo->tx_num_xmit++;
- } else if (info->status.rates[0].count > 1) {
- spinfo->tx_num_failed++;
- spinfo->tx_num_xmit++;
- }
-
- /* Update PID controller state. */
- period = msecs_to_jiffies(pinfo->sampling_period);
- if (time_after(jiffies, spinfo->last_sample + period))
- rate_control_pid_sample(pinfo, sband, sta, spinfo);
-}
-
-static void
-rate_control_pid_get_rate(void *priv, struct ieee80211_sta *sta,
- void *priv_sta,
- struct ieee80211_tx_rate_control *txrc)
-{
- struct sk_buff *skb = txrc->skb;
- struct ieee80211_supported_band *sband = txrc->sband;
- struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
- struct rc_pid_sta_info *spinfo = priv_sta;
- int rateidx;
-
- if (txrc->rts)
- info->control.rates[0].count =
- txrc->hw->conf.long_frame_max_tx_count;
- else
- info->control.rates[0].count =
- txrc->hw->conf.short_frame_max_tx_count;
-
- /* Send management frames and NO_ACK data using lowest rate. */
- if (rate_control_send_low(sta, priv_sta, txrc))
- return;
-
- rateidx = spinfo->txrate_idx;
-
- if (rateidx >= sband->n_bitrates)
- rateidx = sband->n_bitrates - 1;
-
- info->control.rates[0].idx = rateidx;
-
-#ifdef CONFIG_MAC80211_DEBUGFS
- rate_control_pid_event_tx_rate(&spinfo->events,
- rateidx, sband->bitrates[rateidx].bitrate);
-#endif
-}
-
-static void
-rate_control_pid_rate_init(void *priv, struct ieee80211_supported_band *sband,
- struct cfg80211_chan_def *chandef,
- struct ieee80211_sta *sta, void *priv_sta)
-{
- struct rc_pid_sta_info *spinfo = priv_sta;
- struct rc_pid_info *pinfo = priv;
- struct rc_pid_rateinfo *rinfo = pinfo->rinfo;
- int i, j, tmp;
- bool s;
-
- /* TODO: This routine should consider using RSSI from previous packets
- * as we need to have IEEE 802.1X auth succeed immediately after assoc..
- * Until that method is implemented, we will use the lowest supported
- * rate as a workaround. */
-
- /* Sort the rates. This is optimized for the most common case (i.e.
- * almost-sorted CCK+OFDM rates). Kind of bubble-sort with reversed
- * mapping too. */
- for (i = 0; i < sband->n_bitrates; i++) {
- rinfo[i].index = i;
- rinfo[i].rev_index = i;
- if (RC_PID_FAST_START)
- rinfo[i].diff = 0;
- else
- rinfo[i].diff = i * pinfo->norm_offset;
- }
- for (i = 1; i < sband->n_bitrates; i++) {
- s = false;
- for (j = 0; j < sband->n_bitrates - i; j++)
- if (unlikely(sband->bitrates[rinfo[j].index].bitrate >
- sband->bitrates[rinfo[j + 1].index].bitrate)) {
- tmp = rinfo[j].index;
- rinfo[j].index = rinfo[j + 1].index;
- rinfo[j + 1].index = tmp;
- rinfo[rinfo[j].index].rev_index = j;
- rinfo[rinfo[j + 1].index].rev_index = j + 1;
- s = true;
- }
- if (!s)
- break;
- }
-
- spinfo->txrate_idx = rate_lowest_index(sband, sta);
-}
-
-static void *rate_control_pid_alloc(struct ieee80211_hw *hw,
- struct dentry *debugfsdir)
-{
- struct rc_pid_info *pinfo;
- struct rc_pid_rateinfo *rinfo;
- struct ieee80211_supported_band *sband;
- int i, max_rates = 0;
-#ifdef CONFIG_MAC80211_DEBUGFS
- struct rc_pid_debugfs_entries *de;
-#endif
-
- pinfo = kmalloc(sizeof(*pinfo), GFP_ATOMIC);
- if (!pinfo)
- return NULL;
-
- for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
- sband = hw->wiphy->bands[i];
- if (sband && sband->n_bitrates > max_rates)
- max_rates = sband->n_bitrates;
- }
-
- rinfo = kmalloc(sizeof(*rinfo) * max_rates, GFP_ATOMIC);
- if (!rinfo) {
- kfree(pinfo);
- return NULL;
- }
-
- pinfo->target = RC_PID_TARGET_PF;
- pinfo->sampling_period = RC_PID_INTERVAL;
- pinfo->coeff_p = RC_PID_COEFF_P;
- pinfo->coeff_i = RC_PID_COEFF_I;
- pinfo->coeff_d = RC_PID_COEFF_D;
- pinfo->smoothing_shift = RC_PID_SMOOTHING_SHIFT;
- pinfo->sharpen_factor = RC_PID_SHARPENING_FACTOR;
- pinfo->sharpen_duration = RC_PID_SHARPENING_DURATION;
- pinfo->norm_offset = RC_PID_NORM_OFFSET;
- pinfo->rinfo = rinfo;
- pinfo->oldrate = 0;
-
-#ifdef CONFIG_MAC80211_DEBUGFS
- de = &pinfo->dentries;
- de->target = debugfs_create_u32("target_pf", S_IRUSR | S_IWUSR,
- debugfsdir, &pinfo->target);
- de->sampling_period = debugfs_create_u32("sampling_period",
- S_IRUSR | S_IWUSR, debugfsdir,
- &pinfo->sampling_period);
- de->coeff_p = debugfs_create_u32("coeff_p", S_IRUSR | S_IWUSR,
- debugfsdir, (u32 *)&pinfo->coeff_p);
- de->coeff_i = debugfs_create_u32("coeff_i", S_IRUSR | S_IWUSR,
- debugfsdir, (u32 *)&pinfo->coeff_i);
- de->coeff_d = debugfs_create_u32("coeff_d", S_IRUSR | S_IWUSR,
- debugfsdir, (u32 *)&pinfo->coeff_d);
- de->smoothing_shift = debugfs_create_u32("smoothing_shift",
- S_IRUSR | S_IWUSR, debugfsdir,
- &pinfo->smoothing_shift);
- de->sharpen_factor = debugfs_create_u32("sharpen_factor",
- S_IRUSR | S_IWUSR, debugfsdir,
- &pinfo->sharpen_factor);
- de->sharpen_duration = debugfs_create_u32("sharpen_duration",
- S_IRUSR | S_IWUSR, debugfsdir,
- &pinfo->sharpen_duration);
- de->norm_offset = debugfs_create_u32("norm_offset",
- S_IRUSR | S_IWUSR, debugfsdir,
- &pinfo->norm_offset);
-#endif
-
- return pinfo;
-}
-
-static void rate_control_pid_free(void *priv)
-{
- struct rc_pid_info *pinfo = priv;
-#ifdef CONFIG_MAC80211_DEBUGFS
- struct rc_pid_debugfs_entries *de = &pinfo->dentries;
-
- debugfs_remove(de->norm_offset);
- debugfs_remove(de->sharpen_duration);
- debugfs_remove(de->sharpen_factor);
- debugfs_remove(de->smoothing_shift);
- debugfs_remove(de->coeff_d);
- debugfs_remove(de->coeff_i);
- debugfs_remove(de->coeff_p);
- debugfs_remove(de->sampling_period);
- debugfs_remove(de->target);
-#endif
-
- kfree(pinfo->rinfo);
- kfree(pinfo);
-}
-
-static void *rate_control_pid_alloc_sta(void *priv, struct ieee80211_sta *sta,
- gfp_t gfp)
-{
- struct rc_pid_sta_info *spinfo;
-
- spinfo = kzalloc(sizeof(*spinfo), gfp);
- if (spinfo == NULL)
- return NULL;
-
- spinfo->last_sample = jiffies;
-
-#ifdef CONFIG_MAC80211_DEBUGFS
- spin_lock_init(&spinfo->events.lock);
- init_waitqueue_head(&spinfo->events.waitqueue);
-#endif
-
- return spinfo;
-}
-
-static void rate_control_pid_free_sta(void *priv, struct ieee80211_sta *sta,
- void *priv_sta)
-{
- kfree(priv_sta);
-}
-
-static const struct rate_control_ops mac80211_rcpid = {
- .name = "pid",
- .tx_status = rate_control_pid_tx_status,
- .get_rate = rate_control_pid_get_rate,
- .rate_init = rate_control_pid_rate_init,
- .alloc = rate_control_pid_alloc,
- .free = rate_control_pid_free,
- .alloc_sta = rate_control_pid_alloc_sta,
- .free_sta = rate_control_pid_free_sta,
-#ifdef CONFIG_MAC80211_DEBUGFS
- .add_sta_debugfs = rate_control_pid_add_sta_debugfs,
- .remove_sta_debugfs = rate_control_pid_remove_sta_debugfs,
-#endif
-};
-
-int __init rc80211_pid_init(void)
-{
- return ieee80211_rate_control_register(&mac80211_rcpid);
-}
-
-void rc80211_pid_exit(void)
-{
- ieee80211_rate_control_unregister(&mac80211_rcpid);
-}
diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c
deleted file mode 100644
index 6ff134650a84..000000000000
--- a/net/mac80211/rc80211_pid_debugfs.c
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * Copyright 2007, Mattias Nissler <mattias.nissler@gmx.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/sched.h>
-#include <linux/spinlock.h>
-#include <linux/poll.h>
-#include <linux/netdevice.h>
-#include <linux/types.h>
-#include <linux/skbuff.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-
-#include <net/mac80211.h>
-#include "rate.h"
-
-#include "rc80211_pid.h"
-
-static void rate_control_pid_event(struct rc_pid_event_buffer *buf,
- enum rc_pid_event_type type,
- union rc_pid_event_data *data)
-{
- struct rc_pid_event *ev;
- unsigned long status;
-
- spin_lock_irqsave(&buf->lock, status);
- ev = &(buf->ring[buf->next_entry]);
- buf->next_entry = (buf->next_entry + 1) % RC_PID_EVENT_RING_SIZE;
-
- ev->timestamp = jiffies;
- ev->id = buf->ev_count++;
- ev->type = type;
- ev->data = *data;
-
- spin_unlock_irqrestore(&buf->lock, status);
-
- wake_up_all(&buf->waitqueue);
-}
-
-void rate_control_pid_event_tx_status(struct rc_pid_event_buffer *buf,
- struct ieee80211_tx_info *stat)
-{
- union rc_pid_event_data evd;
-
- evd.flags = stat->flags;
- memcpy(&evd.tx_status, stat, sizeof(struct ieee80211_tx_info));
- rate_control_pid_event(buf, RC_PID_EVENT_TYPE_TX_STATUS, &evd);
-}
-
-void rate_control_pid_event_rate_change(struct rc_pid_event_buffer *buf,
- int index, int rate)
-{
- union rc_pid_event_data evd;
-
- evd.index = index;
- evd.rate = rate;
- rate_control_pid_event(buf, RC_PID_EVENT_TYPE_RATE_CHANGE, &evd);
-}
-
-void rate_control_pid_event_tx_rate(struct rc_pid_event_buffer *buf,
- int index, int rate)
-{
- union rc_pid_event_data evd;
-
- evd.index = index;
- evd.rate = rate;
- rate_control_pid_event(buf, RC_PID_EVENT_TYPE_TX_RATE, &evd);
-}
-
-void rate_control_pid_event_pf_sample(struct rc_pid_event_buffer *buf,
- s32 pf_sample, s32 prop_err,
- s32 int_err, s32 der_err)
-{
- union rc_pid_event_data evd;
-
- evd.pf_sample = pf_sample;
- evd.prop_err = prop_err;
- evd.int_err = int_err;
- evd.der_err = der_err;
- rate_control_pid_event(buf, RC_PID_EVENT_TYPE_PF_SAMPLE, &evd);
-}
-
-static int rate_control_pid_events_open(struct inode *inode, struct file *file)
-{
- struct rc_pid_sta_info *sinfo = inode->i_private;
- struct rc_pid_event_buffer *events = &sinfo->events;
- struct rc_pid_events_file_info *file_info;
- unsigned long status;
-
- /* Allocate a state struct */
- file_info = kmalloc(sizeof(*file_info), GFP_KERNEL);
- if (file_info == NULL)
- return -ENOMEM;
-
- spin_lock_irqsave(&events->lock, status);
-
- file_info->next_entry = events->next_entry;
- file_info->events = events;
-
- spin_unlock_irqrestore(&events->lock, status);
-
- file->private_data = file_info;
-
- return 0;
-}
-
-static int rate_control_pid_events_release(struct inode *inode,
- struct file *file)
-{
- struct rc_pid_events_file_info *file_info = file->private_data;
-
- kfree(file_info);
-
- return 0;
-}
-
-static unsigned int rate_control_pid_events_poll(struct file *file,
- poll_table *wait)
-{
- struct rc_pid_events_file_info *file_info = file->private_data;
-
- poll_wait(file, &file_info->events->waitqueue, wait);
-
- return POLLIN | POLLRDNORM;
-}
-
-#define RC_PID_PRINT_BUF_SIZE 64
-
-static ssize_t rate_control_pid_events_read(struct file *file, char __user *buf,
- size_t length, loff_t *offset)
-{
- struct rc_pid_events_file_info *file_info = file->private_data;
- struct rc_pid_event_buffer *events = file_info->events;
- struct rc_pid_event *ev;
- char pb[RC_PID_PRINT_BUF_SIZE];
- int ret;
- int p;
- unsigned long status;
-
- /* Check if there is something to read. */
- if (events->next_entry == file_info->next_entry) {
- if (file->f_flags & O_NONBLOCK)
- return -EAGAIN;
-
- /* Wait */
- ret = wait_event_interruptible(events->waitqueue,
- events->next_entry != file_info->next_entry);
-
- if (ret)
- return ret;
- }
-
- /* Write out one event per call. I don't care whether it's a little
- * inefficient, this is debugging code anyway. */
- spin_lock_irqsave(&events->lock, status);
-
- /* Get an event */
- ev = &(events->ring[file_info->next_entry]);
- file_info->next_entry = (file_info->next_entry + 1) %
- RC_PID_EVENT_RING_SIZE;
-
- /* Print information about the event. Note that userspace needs to
- * provide large enough buffers. */
- length = length < RC_PID_PRINT_BUF_SIZE ?
- length : RC_PID_PRINT_BUF_SIZE;
- p = scnprintf(pb, length, "%u %lu ", ev->id, ev->timestamp);
- switch (ev->type) {
- case RC_PID_EVENT_TYPE_TX_STATUS:
- p += scnprintf(pb + p, length - p, "tx_status %u %u",
- !(ev->data.flags & IEEE80211_TX_STAT_ACK),
- ev->data.tx_status.status.rates[0].idx);
- break;
- case RC_PID_EVENT_TYPE_RATE_CHANGE:
- p += scnprintf(pb + p, length - p, "rate_change %d %d",
- ev->data.index, ev->data.rate);
- break;
- case RC_PID_EVENT_TYPE_TX_RATE:
- p += scnprintf(pb + p, length - p, "tx_rate %d %d",
- ev->data.index, ev->data.rate);
- break;
- case RC_PID_EVENT_TYPE_PF_SAMPLE:
- p += scnprintf(pb + p, length - p,
- "pf_sample %d %d %d %d",
- ev->data.pf_sample, ev->data.prop_err,
- ev->data.int_err, ev->data.der_err);
- break;
- }
- p += scnprintf(pb + p, length - p, "\n");
-
- spin_unlock_irqrestore(&events->lock, status);
-
- if (copy_to_user(buf, pb, p))
- return -EFAULT;
-
- return p;
-}
-
-#undef RC_PID_PRINT_BUF_SIZE
-
-static const struct file_operations rc_pid_fop_events = {
- .owner = THIS_MODULE,
- .read = rate_control_pid_events_read,
- .poll = rate_control_pid_events_poll,
- .open = rate_control_pid_events_open,
- .release = rate_control_pid_events_release,
- .llseek = noop_llseek,
-};
-
-void rate_control_pid_add_sta_debugfs(void *priv, void *priv_sta,
- struct dentry *dir)
-{
- struct rc_pid_sta_info *spinfo = priv_sta;
-
- spinfo->events_entry = debugfs_create_file("rc_pid_events", S_IRUGO,
- dir, spinfo,
- &rc_pid_fop_events);
-}
-
-void rate_control_pid_remove_sta_debugfs(void *priv, void *priv_sta)
-{
- struct rc_pid_sta_info *spinfo = priv_sta;
-
- debugfs_remove(spinfo->events_entry);
-}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 394e201cde6d..b04ca4049c95 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3,6 +3,7 @@
* Copyright 2005-2006, Devicescape Software, Inc.
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -688,20 +689,27 @@ static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata,
int index,
struct sk_buff_head *frames)
{
- struct sk_buff *skb = tid_agg_rx->reorder_buf[index];
+ struct sk_buff_head *skb_list = &tid_agg_rx->reorder_buf[index];
+ struct sk_buff *skb;
struct ieee80211_rx_status *status;
lockdep_assert_held(&tid_agg_rx->reorder_lock);
- if (!skb)
+ if (skb_queue_empty(skb_list))
+ goto no_frame;
+
+ if (!ieee80211_rx_reorder_ready(skb_list)) {
+ __skb_queue_purge(skb_list);
goto no_frame;
+ }
- /* release the frame from the reorder ring buffer */
+ /* release frames from the reorder ring buffer */
tid_agg_rx->stored_mpdu_num--;
- tid_agg_rx->reorder_buf[index] = NULL;
- status = IEEE80211_SKB_RXCB(skb);
- status->rx_flags |= IEEE80211_RX_DEFERRED_RELEASE;
- __skb_queue_tail(frames, skb);
+ while ((skb = __skb_dequeue(skb_list))) {
+ status = IEEE80211_SKB_RXCB(skb);
+ status->rx_flags |= IEEE80211_RX_DEFERRED_RELEASE;
+ __skb_queue_tail(frames, skb);
+ }
no_frame:
tid_agg_rx->head_seq_num = ieee80211_sn_inc(tid_agg_rx->head_seq_num);
@@ -738,13 +746,13 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata,
struct tid_ampdu_rx *tid_agg_rx,
struct sk_buff_head *frames)
{
- int index, j;
+ int index, i, j;
lockdep_assert_held(&tid_agg_rx->reorder_lock);
/* release the buffer until next missing frame */
index = tid_agg_rx->head_seq_num % tid_agg_rx->buf_size;
- if (!tid_agg_rx->reorder_buf[index] &&
+ if (!ieee80211_rx_reorder_ready(&tid_agg_rx->reorder_buf[index]) &&
tid_agg_rx->stored_mpdu_num) {
/*
* No buffers ready to be released, but check whether any
@@ -753,7 +761,8 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata,
int skipped = 1;
for (j = (index + 1) % tid_agg_rx->buf_size; j != index;
j = (j + 1) % tid_agg_rx->buf_size) {
- if (!tid_agg_rx->reorder_buf[j]) {
+ if (!ieee80211_rx_reorder_ready(
+ &tid_agg_rx->reorder_buf[j])) {
skipped++;
continue;
}
@@ -762,6 +771,11 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata,
HT_RX_REORDER_BUF_TIMEOUT))
goto set_release_timer;
+ /* don't leave incomplete A-MSDUs around */
+ for (i = (index + 1) % tid_agg_rx->buf_size; i != j;
+ i = (i + 1) % tid_agg_rx->buf_size)
+ __skb_queue_purge(&tid_agg_rx->reorder_buf[i]);
+
ht_dbg_ratelimited(sdata,
"release an RX reorder frame due to timeout on earlier frames\n");
ieee80211_release_reorder_frame(sdata, tid_agg_rx, j,
@@ -775,7 +789,8 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata,
skipped) & IEEE80211_SN_MASK;
skipped = 0;
}
- } else while (tid_agg_rx->reorder_buf[index]) {
+ } else while (ieee80211_rx_reorder_ready(
+ &tid_agg_rx->reorder_buf[index])) {
ieee80211_release_reorder_frame(sdata, tid_agg_rx, index,
frames);
index = tid_agg_rx->head_seq_num % tid_agg_rx->buf_size;
@@ -786,7 +801,8 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata,
for (; j != (index - 1) % tid_agg_rx->buf_size;
j = (j + 1) % tid_agg_rx->buf_size) {
- if (tid_agg_rx->reorder_buf[j])
+ if (ieee80211_rx_reorder_ready(
+ &tid_agg_rx->reorder_buf[j]))
break;
}
@@ -811,6 +827,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
struct sk_buff_head *frames)
{
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+ struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
u16 sc = le16_to_cpu(hdr->seq_ctrl);
u16 mpdu_seq_num = (sc & IEEE80211_SCTL_SEQ) >> 4;
u16 head_seq_num, buf_size;
@@ -819,6 +836,16 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
spin_lock(&tid_agg_rx->reorder_lock);
+ /*
+ * Offloaded BA sessions have no known starting sequence number so pick
+ * one from first Rxed frame for this tid after BA was started.
+ */
+ if (unlikely(tid_agg_rx->auto_seq)) {
+ tid_agg_rx->auto_seq = false;
+ tid_agg_rx->ssn = mpdu_seq_num;
+ tid_agg_rx->head_seq_num = mpdu_seq_num;
+ }
+
buf_size = tid_agg_rx->buf_size;
head_seq_num = tid_agg_rx->head_seq_num;
@@ -845,7 +872,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
index = mpdu_seq_num % tid_agg_rx->buf_size;
/* check if we already stored this frame */
- if (tid_agg_rx->reorder_buf[index]) {
+ if (ieee80211_rx_reorder_ready(&tid_agg_rx->reorder_buf[index])) {
dev_kfree_skb(skb);
goto out;
}
@@ -858,17 +885,20 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
*/
if (mpdu_seq_num == tid_agg_rx->head_seq_num &&
tid_agg_rx->stored_mpdu_num == 0) {
- tid_agg_rx->head_seq_num =
- ieee80211_sn_inc(tid_agg_rx->head_seq_num);
+ if (!(status->flag & RX_FLAG_AMSDU_MORE))
+ tid_agg_rx->head_seq_num =
+ ieee80211_sn_inc(tid_agg_rx->head_seq_num);
ret = false;
goto out;
}
/* put the frame in the reordering buffer */
- tid_agg_rx->reorder_buf[index] = skb;
- tid_agg_rx->reorder_time[index] = jiffies;
- tid_agg_rx->stored_mpdu_num++;
- ieee80211_sta_reorder_release(sdata, tid_agg_rx, frames);
+ __skb_queue_tail(&tid_agg_rx->reorder_buf[index], skb);
+ if (!(status->flag & RX_FLAG_AMSDU_MORE)) {
+ tid_agg_rx->reorder_time[index] = jiffies;
+ tid_agg_rx->stored_mpdu_num++;
+ ieee80211_sta_reorder_release(sdata, tid_agg_rx, frames);
+ }
out:
spin_unlock(&tid_agg_rx->reorder_lock);
@@ -1107,6 +1137,8 @@ static void sta_ps_end(struct sta_info *sta)
return;
}
+ set_sta_flag(sta, WLAN_STA_PS_DELIVER);
+ clear_sta_flag(sta, WLAN_STA_PS_STA);
ieee80211_sta_ps_deliver_wakeup(sta);
}
@@ -2704,7 +2736,7 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx)
sig = status->signal;
if (cfg80211_rx_mgmt(&rx->sdata->wdev, status->freq, sig,
- rx->skb->data, rx->skb->len, 0, GFP_ATOMIC)) {
+ rx->skb->data, rx->skb->len, 0)) {
if (rx->sta)
rx->sta->rx_packets++;
dev_kfree_skb(rx->skb);
@@ -3127,6 +3159,14 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx,
if (!ieee80211_is_beacon(hdr->frame_control))
return false;
status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
+ } else if (!ieee80211_has_tods(hdr->frame_control)) {
+ /* ignore data frames to TDLS-peers */
+ if (ieee80211_is_data(hdr->frame_control))
+ return false;
+ /* ignore action frames to TDLS-peers */
+ if (ieee80211_is_action(hdr->frame_control) &&
+ !ether_addr_equal(bssid, hdr->addr1))
+ return false;
}
break;
case NL80211_IFTYPE_WDS:
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index f40661eb75b5..af0d094b2f2f 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -6,6 +6,7 @@
* Copyright 2005, Devicescape Software, Inc.
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -235,38 +236,51 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
{
struct cfg80211_scan_request *req = local->scan_req;
struct cfg80211_chan_def chandef;
- enum ieee80211_band band;
+ u8 bands_used = 0;
int i, ielen, n_chans;
if (test_bit(SCAN_HW_CANCELLED, &local->scanning))
return false;
- do {
- if (local->hw_scan_band == IEEE80211_NUM_BANDS)
- return false;
-
- band = local->hw_scan_band;
- n_chans = 0;
+ if (local->hw.flags & IEEE80211_SINGLE_HW_SCAN_ON_ALL_BANDS) {
for (i = 0; i < req->n_channels; i++) {
- if (req->channels[i]->band == band) {
- local->hw_scan_req->channels[n_chans] =
+ local->hw_scan_req->req.channels[i] = req->channels[i];
+ bands_used |= BIT(req->channels[i]->band);
+ }
+
+ n_chans = req->n_channels;
+ } else {
+ do {
+ if (local->hw_scan_band == IEEE80211_NUM_BANDS)
+ return false;
+
+ n_chans = 0;
+
+ for (i = 0; i < req->n_channels; i++) {
+ if (req->channels[i]->band !=
+ local->hw_scan_band)
+ continue;
+ local->hw_scan_req->req.channels[n_chans] =
req->channels[i];
n_chans++;
+ bands_used |= BIT(req->channels[i]->band);
}
- }
- local->hw_scan_band++;
- } while (!n_chans);
+ local->hw_scan_band++;
+ } while (!n_chans);
+ }
- local->hw_scan_req->n_channels = n_chans;
+ local->hw_scan_req->req.n_channels = n_chans;
ieee80211_prepare_scan_chandef(&chandef, req->scan_width);
- ielen = ieee80211_build_preq_ies(local, (u8 *)local->hw_scan_req->ie,
+ ielen = ieee80211_build_preq_ies(local,
+ (u8 *)local->hw_scan_req->req.ie,
local->hw_scan_ies_bufsize,
- req->ie, req->ie_len, band,
- req->rates[band], &chandef);
- local->hw_scan_req->ie_len = ielen;
- local->hw_scan_req->no_cck = req->no_cck;
+ &local->hw_scan_req->ies,
+ req->ie, req->ie_len,
+ bands_used, req->rates, &chandef);
+ local->hw_scan_req->req.ie_len = ielen;
+ local->hw_scan_req->req.no_cck = req->no_cck;
return true;
}
@@ -291,7 +305,9 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
if (WARN_ON(!local->scan_req))
return;
- if (hw_scan && !aborted && ieee80211_prep_hw_scan(local)) {
+ if (hw_scan && !aborted &&
+ !(local->hw.flags & IEEE80211_SINGLE_HW_SCAN_ON_ALL_BANDS) &&
+ ieee80211_prep_hw_scan(local)) {
int rc;
rc = drv_hw_scan(local,
@@ -473,6 +489,21 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
u8 *ies;
local->hw_scan_ies_bufsize = local->scan_ies_len + req->ie_len;
+
+ if (local->hw.flags & IEEE80211_SINGLE_HW_SCAN_ON_ALL_BANDS) {
+ int i, n_bands = 0;
+ u8 bands_counted = 0;
+
+ for (i = 0; i < req->n_channels; i++) {
+ if (bands_counted & BIT(req->channels[i]->band))
+ continue;
+ bands_counted |= BIT(req->channels[i]->band);
+ n_bands++;
+ }
+
+ local->hw_scan_ies_bufsize *= n_bands;
+ }
+
local->hw_scan_req = kmalloc(
sizeof(*local->hw_scan_req) +
req->n_channels * sizeof(req->channels[0]) +
@@ -480,13 +511,13 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
if (!local->hw_scan_req)
return -ENOMEM;
- local->hw_scan_req->ssids = req->ssids;
- local->hw_scan_req->n_ssids = req->n_ssids;
+ local->hw_scan_req->req.ssids = req->ssids;
+ local->hw_scan_req->req.n_ssids = req->n_ssids;
ies = (u8 *)local->hw_scan_req +
sizeof(*local->hw_scan_req) +
req->n_channels * sizeof(req->channels[0]);
- local->hw_scan_req->ie = ies;
- local->hw_scan_req->flags = req->flags;
+ local->hw_scan_req->req.ie = ies;
+ local->hw_scan_req->req.flags = req->flags;
local->hw_scan_band = 0;
@@ -973,9 +1004,13 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
struct cfg80211_sched_scan_request *req)
{
struct ieee80211_local *local = sdata->local;
- struct ieee80211_sched_scan_ies sched_scan_ies = {};
+ struct ieee80211_scan_ies sched_scan_ies = {};
struct cfg80211_chan_def chandef;
- int ret, i, iebufsz;
+ int ret, i, iebufsz, num_bands = 0;
+ u32 rate_masks[IEEE80211_NUM_BANDS] = {};
+ u8 bands_used = 0;
+ u8 *ie;
+ size_t len;
iebufsz = local->scan_ies_len + req->ie_len;
@@ -985,33 +1020,35 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
return -ENOTSUPP;
for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
- if (!local->hw.wiphy->bands[i])
- continue;
-
- sched_scan_ies.ie[i] = kzalloc(iebufsz, GFP_KERNEL);
- if (!sched_scan_ies.ie[i]) {
- ret = -ENOMEM;
- goto out_free;
+ if (local->hw.wiphy->bands[i]) {
+ bands_used |= BIT(i);
+ rate_masks[i] = (u32) -1;
+ num_bands++;
}
+ }
- ieee80211_prepare_scan_chandef(&chandef, req->scan_width);
-
- sched_scan_ies.len[i] =
- ieee80211_build_preq_ies(local, sched_scan_ies.ie[i],
- iebufsz, req->ie, req->ie_len,
- i, (u32) -1, &chandef);
+ ie = kzalloc(num_bands * iebufsz, GFP_KERNEL);
+ if (!ie) {
+ ret = -ENOMEM;
+ goto out;
}
+ ieee80211_prepare_scan_chandef(&chandef, req->scan_width);
+
+ len = ieee80211_build_preq_ies(local, ie, num_bands * iebufsz,
+ &sched_scan_ies, req->ie,
+ req->ie_len, bands_used,
+ rate_masks, &chandef);
+
ret = drv_sched_scan_start(local, sdata, req, &sched_scan_ies);
if (ret == 0) {
rcu_assign_pointer(local->sched_scan_sdata, sdata);
local->sched_scan_req = req;
}
-out_free:
- while (i > 0)
- kfree(sched_scan_ies.ie[--i]);
+ kfree(ie);
+out:
if (ret) {
/* Clean in case of failure after HW restart or upon resume. */
RCU_INIT_POINTER(local->sched_scan_sdata, NULL);
@@ -1058,7 +1095,7 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata)
if (rcu_access_pointer(local->sched_scan_sdata)) {
ret = drv_sched_scan_stop(local, sdata);
if (!ret)
- rcu_assign_pointer(local->sched_scan_sdata, NULL);
+ RCU_INIT_POINTER(local->sched_scan_sdata, NULL);
}
out:
mutex_unlock(&local->mtx);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index a9b46d8ea22f..de494df3bab8 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1,6 +1,7 @@
/*
* Copyright 2002-2005, Instant802 Networks, Inc.
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -100,7 +101,8 @@ static void __cleanup_single_sta(struct sta_info *sta)
struct ps_data *ps;
if (test_sta_flag(sta, WLAN_STA_PS_STA) ||
- test_sta_flag(sta, WLAN_STA_PS_DRIVER)) {
+ test_sta_flag(sta, WLAN_STA_PS_DRIVER) ||
+ test_sta_flag(sta, WLAN_STA_PS_DELIVER)) {
if (sta->sdata->vif.type == NL80211_IFTYPE_AP ||
sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
ps = &sdata->bss->ps;
@@ -111,6 +113,7 @@ static void __cleanup_single_sta(struct sta_info *sta)
clear_sta_flag(sta, WLAN_STA_PS_STA);
clear_sta_flag(sta, WLAN_STA_PS_DRIVER);
+ clear_sta_flag(sta, WLAN_STA_PS_DELIVER);
atomic_dec(&ps->num_sta_ps);
sta_info_recalc_tim(sta);
@@ -125,7 +128,7 @@ static void __cleanup_single_sta(struct sta_info *sta)
if (ieee80211_vif_is_mesh(&sdata->vif))
mesh_sta_cleanup(sta);
- cancel_work_sync(&sta->drv_unblock_wk);
+ cancel_work_sync(&sta->drv_deliver_wk);
/*
* Destroy aggregation state here. It would be nice to wait for the
@@ -253,33 +256,23 @@ static void sta_info_hash_add(struct ieee80211_local *local,
rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)], sta);
}
-static void sta_unblock(struct work_struct *wk)
+static void sta_deliver_ps_frames(struct work_struct *wk)
{
struct sta_info *sta;
- sta = container_of(wk, struct sta_info, drv_unblock_wk);
+ sta = container_of(wk, struct sta_info, drv_deliver_wk);
if (sta->dead)
return;
- if (!test_sta_flag(sta, WLAN_STA_PS_STA)) {
- local_bh_disable();
+ local_bh_disable();
+ if (!test_sta_flag(sta, WLAN_STA_PS_STA))
ieee80211_sta_ps_deliver_wakeup(sta);
- local_bh_enable();
- } else if (test_and_clear_sta_flag(sta, WLAN_STA_PSPOLL)) {
- clear_sta_flag(sta, WLAN_STA_PS_DRIVER);
-
- local_bh_disable();
+ else if (test_and_clear_sta_flag(sta, WLAN_STA_PSPOLL))
ieee80211_sta_ps_deliver_poll_response(sta);
- local_bh_enable();
- } else if (test_and_clear_sta_flag(sta, WLAN_STA_UAPSD)) {
- clear_sta_flag(sta, WLAN_STA_PS_DRIVER);
-
- local_bh_disable();
+ else if (test_and_clear_sta_flag(sta, WLAN_STA_UAPSD))
ieee80211_sta_ps_deliver_uapsd(sta);
- local_bh_enable();
- } else
- clear_sta_flag(sta, WLAN_STA_PS_DRIVER);
+ local_bh_enable();
}
static int sta_prepare_rate_control(struct ieee80211_local *local,
@@ -341,7 +334,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
spin_lock_init(&sta->lock);
spin_lock_init(&sta->ps_lock);
- INIT_WORK(&sta->drv_unblock_wk, sta_unblock);
+ INIT_WORK(&sta->drv_deliver_wk, sta_deliver_ps_frames);
INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work);
mutex_init(&sta->ampdu_mlme.mtx);
#ifdef CONFIG_MAC80211_MESH
@@ -358,7 +351,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
sta->sta_state = IEEE80211_STA_NONE;
- do_posix_clock_monotonic_gettime(&uptime);
+ ktime_get_ts(&uptime);
sta->last_connected = uptime.tv_sec;
ewma_init(&sta->avg_signal, 1024, 8);
for (i = 0; i < ARRAY_SIZE(sta->chain_signal_avg); i++)
@@ -1102,8 +1095,11 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
unsigned long flags;
struct ps_data *ps;
- if (sdata->vif.type == NL80211_IFTYPE_AP ||
- sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+ if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+ sdata = container_of(sdata->bss, struct ieee80211_sub_if_data,
+ u.ap);
+
+ if (sdata->vif.type == NL80211_IFTYPE_AP)
ps = &sdata->bss->ps;
else if (ieee80211_vif_is_mesh(&sdata->vif))
ps = &sdata->u.mesh.ps;
@@ -1141,8 +1137,15 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
}
ieee80211_add_pending_skbs(local, &pending);
- clear_sta_flag(sta, WLAN_STA_PS_DRIVER);
- clear_sta_flag(sta, WLAN_STA_PS_STA);
+
+ /* now we're no longer in the deliver code */
+ clear_sta_flag(sta, WLAN_STA_PS_DELIVER);
+
+ /* The station might have polled and then woken up before we responded,
+ * so clear these flags now to avoid them sticking around.
+ */
+ clear_sta_flag(sta, WLAN_STA_PSPOLL);
+ clear_sta_flag(sta, WLAN_STA_UAPSD);
spin_unlock(&sta->ps_lock);
atomic_dec(&ps->num_sta_ps);
@@ -1180,7 +1183,7 @@ static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb;
int size = sizeof(*nullfunc);
__le16 fc;
- bool qos = test_sta_flag(sta, WLAN_STA_WME);
+ bool qos = sta->sta.wme;
struct ieee80211_tx_info *info;
struct ieee80211_chanctx_conf *chanctx_conf;
@@ -1543,10 +1546,26 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw,
trace_api_sta_block_awake(sta->local, pubsta, block);
- if (block)
+ if (block) {
set_sta_flag(sta, WLAN_STA_PS_DRIVER);
- else if (test_sta_flag(sta, WLAN_STA_PS_DRIVER))
- ieee80211_queue_work(hw, &sta->drv_unblock_wk);
+ return;
+ }
+
+ if (!test_sta_flag(sta, WLAN_STA_PS_DRIVER))
+ return;
+
+ if (!test_sta_flag(sta, WLAN_STA_PS_STA)) {
+ set_sta_flag(sta, WLAN_STA_PS_DELIVER);
+ clear_sta_flag(sta, WLAN_STA_PS_DRIVER);
+ ieee80211_queue_work(hw, &sta->drv_deliver_wk);
+ } else if (test_sta_flag(sta, WLAN_STA_PSPOLL) ||
+ test_sta_flag(sta, WLAN_STA_UAPSD)) {
+ /* must be asleep in this case */
+ clear_sta_flag(sta, WLAN_STA_PS_DRIVER);
+ ieee80211_queue_work(hw, &sta->drv_deliver_wk);
+ } else {
+ clear_sta_flag(sta, WLAN_STA_PS_DRIVER);
+ }
}
EXPORT_SYMBOL(ieee80211_sta_block_awake);
@@ -1704,3 +1723,140 @@ u8 sta_info_tx_streams(struct sta_info *sta)
return ((ht_cap->mcs.tx_params & IEEE80211_HT_MCS_TX_MAX_STREAMS_MASK)
>> IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT) + 1;
}
+
+void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
+{
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
+ struct ieee80211_local *local = sdata->local;
+ struct rate_control_ref *ref = NULL;
+ struct timespec uptime;
+ u64 packets = 0;
+ u32 thr = 0;
+ int i, ac;
+
+ if (test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
+ ref = local->rate_ctrl;
+
+ sinfo->generation = sdata->local->sta_generation;
+
+ sinfo->filled = STATION_INFO_INACTIVE_TIME |
+ STATION_INFO_RX_BYTES64 |
+ STATION_INFO_TX_BYTES64 |
+ STATION_INFO_RX_PACKETS |
+ STATION_INFO_TX_PACKETS |
+ STATION_INFO_TX_RETRIES |
+ STATION_INFO_TX_FAILED |
+ STATION_INFO_TX_BITRATE |
+ STATION_INFO_RX_BITRATE |
+ STATION_INFO_RX_DROP_MISC |
+ STATION_INFO_BSS_PARAM |
+ STATION_INFO_CONNECTED_TIME |
+ STATION_INFO_STA_FLAGS |
+ STATION_INFO_BEACON_LOSS_COUNT;
+
+ ktime_get_ts(&uptime);
+ sinfo->connected_time = uptime.tv_sec - sta->last_connected;
+
+ sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
+ sinfo->tx_bytes = 0;
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+ sinfo->tx_bytes += sta->tx_bytes[ac];
+ packets += sta->tx_packets[ac];
+ }
+ sinfo->tx_packets = packets;
+ sinfo->rx_bytes = sta->rx_bytes;
+ sinfo->rx_packets = sta->rx_packets;
+ sinfo->tx_retries = sta->tx_retry_count;
+ sinfo->tx_failed = sta->tx_retry_failed;
+ sinfo->rx_dropped_misc = sta->rx_dropped;
+ sinfo->beacon_loss_count = sta->beacon_loss_count;
+
+ if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) ||
+ (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) {
+ sinfo->filled |= STATION_INFO_SIGNAL | STATION_INFO_SIGNAL_AVG;
+ if (!local->ops->get_rssi ||
+ drv_get_rssi(local, sdata, &sta->sta, &sinfo->signal))
+ sinfo->signal = (s8)sta->last_signal;
+ sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal);
+ }
+ if (sta->chains) {
+ sinfo->filled |= STATION_INFO_CHAIN_SIGNAL |
+ STATION_INFO_CHAIN_SIGNAL_AVG;
+
+ sinfo->chains = sta->chains;
+ for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) {
+ sinfo->chain_signal[i] = sta->chain_signal_last[i];
+ sinfo->chain_signal_avg[i] =
+ (s8) -ewma_read(&sta->chain_signal_avg[i]);
+ }
+ }
+
+ sta_set_rate_info_tx(sta, &sta->last_tx_rate, &sinfo->txrate);
+ sta_set_rate_info_rx(sta, &sinfo->rxrate);
+
+ if (ieee80211_vif_is_mesh(&sdata->vif)) {
+#ifdef CONFIG_MAC80211_MESH
+ sinfo->filled |= STATION_INFO_LLID |
+ STATION_INFO_PLID |
+ STATION_INFO_PLINK_STATE |
+ STATION_INFO_LOCAL_PM |
+ STATION_INFO_PEER_PM |
+ STATION_INFO_NONPEER_PM;
+
+ sinfo->llid = sta->llid;
+ sinfo->plid = sta->plid;
+ sinfo->plink_state = sta->plink_state;
+ if (test_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN)) {
+ sinfo->filled |= STATION_INFO_T_OFFSET;
+ sinfo->t_offset = sta->t_offset;
+ }
+ sinfo->local_pm = sta->local_pm;
+ sinfo->peer_pm = sta->peer_pm;
+ sinfo->nonpeer_pm = sta->nonpeer_pm;
+#endif
+ }
+
+ sinfo->bss_param.flags = 0;
+ if (sdata->vif.bss_conf.use_cts_prot)
+ sinfo->bss_param.flags |= BSS_PARAM_FLAGS_CTS_PROT;
+ if (sdata->vif.bss_conf.use_short_preamble)
+ sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_PREAMBLE;
+ if (sdata->vif.bss_conf.use_short_slot)
+ sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_SLOT_TIME;
+ sinfo->bss_param.dtim_period = sdata->vif.bss_conf.dtim_period;
+ sinfo->bss_param.beacon_interval = sdata->vif.bss_conf.beacon_int;
+
+ sinfo->sta_flags.set = 0;
+ sinfo->sta_flags.mask = BIT(NL80211_STA_FLAG_AUTHORIZED) |
+ BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) |
+ BIT(NL80211_STA_FLAG_WME) |
+ BIT(NL80211_STA_FLAG_MFP) |
+ BIT(NL80211_STA_FLAG_AUTHENTICATED) |
+ BIT(NL80211_STA_FLAG_ASSOCIATED) |
+ BIT(NL80211_STA_FLAG_TDLS_PEER);
+ if (test_sta_flag(sta, WLAN_STA_AUTHORIZED))
+ sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHORIZED);
+ if (test_sta_flag(sta, WLAN_STA_SHORT_PREAMBLE))
+ sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_SHORT_PREAMBLE);
+ if (sta->sta.wme)
+ sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_WME);
+ if (test_sta_flag(sta, WLAN_STA_MFP))
+ sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_MFP);
+ if (test_sta_flag(sta, WLAN_STA_AUTH))
+ sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHENTICATED);
+ if (test_sta_flag(sta, WLAN_STA_ASSOC))
+ sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_ASSOCIATED);
+ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER))
+ sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER);
+
+ /* check if the driver has a SW RC implementation */
+ if (ref && ref->ops->get_expected_throughput)
+ thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv);
+ else
+ thr = drv_get_expected_throughput(local, &sta->sta);
+
+ if (thr != 0) {
+ sinfo->filled |= STATION_INFO_EXPECTED_THROUGHPUT;
+ sinfo->expected_throughput = thr;
+ }
+}
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 4acc5fc402fa..42f68cb8957e 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -1,5 +1,6 @@
/*
* Copyright 2002-2005, Devicescape Software, Inc.
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -31,7 +32,6 @@
* when virtual port control is not in use.
* @WLAN_STA_SHORT_PREAMBLE: Station is capable of receiving short-preamble
* frames.
- * @WLAN_STA_WME: Station is a QoS-STA.
* @WLAN_STA_WDS: Station is one of our WDS peers.
* @WLAN_STA_CLEAR_PS_FILT: Clear PS filter in hardware (using the
* IEEE80211_TX_CTL_CLEAR_PS_FILT control flag) when the next
@@ -47,6 +47,8 @@
* @WLAN_STA_TDLS_PEER: Station is a TDLS peer.
* @WLAN_STA_TDLS_PEER_AUTH: This TDLS peer is authorized to send direct
* packets. This means the link is enabled.
+ * @WLAN_STA_TDLS_INITIATOR: We are the initiator of the TDLS link with this
+ * station.
* @WLAN_STA_UAPSD: Station requested unscheduled SP while driver was
* keeping station in power-save mode, reply when the driver
* unblocks the station.
@@ -58,6 +60,8 @@
* @WLAN_STA_TOFFSET_KNOWN: toffset calculated for this station is valid.
* @WLAN_STA_MPSP_OWNER: local STA is owner of a mesh Peer Service Period.
* @WLAN_STA_MPSP_RECIPIENT: local STA is recipient of a MPSP.
+ * @WLAN_STA_PS_DELIVER: station woke up, but we're still blocking TX
+ * until pending frames are delivered
*/
enum ieee80211_sta_info_flags {
WLAN_STA_AUTH,
@@ -65,7 +69,6 @@ enum ieee80211_sta_info_flags {
WLAN_STA_PS_STA,
WLAN_STA_AUTHORIZED,
WLAN_STA_SHORT_PREAMBLE,
- WLAN_STA_WME,
WLAN_STA_WDS,
WLAN_STA_CLEAR_PS_FILT,
WLAN_STA_MFP,
@@ -74,6 +77,7 @@ enum ieee80211_sta_info_flags {
WLAN_STA_PSPOLL,
WLAN_STA_TDLS_PEER,
WLAN_STA_TDLS_PEER_AUTH,
+ WLAN_STA_TDLS_INITIATOR,
WLAN_STA_UAPSD,
WLAN_STA_SP,
WLAN_STA_4ADDR_EVENT,
@@ -82,6 +86,7 @@ enum ieee80211_sta_info_flags {
WLAN_STA_TOFFSET_KNOWN,
WLAN_STA_MPSP_OWNER,
WLAN_STA_MPSP_RECIPIENT,
+ WLAN_STA_PS_DELIVER,
};
#define ADDBA_RESP_INTERVAL HZ
@@ -149,7 +154,8 @@ struct tid_ampdu_tx {
/**
* struct tid_ampdu_rx - TID aggregation information (Rx).
*
- * @reorder_buf: buffer to reorder incoming aggregated MPDUs
+ * @reorder_buf: buffer to reorder incoming aggregated MPDUs. An MPDU may be an
+ * A-MSDU with individually reported subframes.
* @reorder_time: jiffies when skb was added
* @session_timer: check if peer keeps Tx-ing on the TID (by timeout value)
* @reorder_timer: releases expired frames from the reorder buffer.
@@ -162,6 +168,8 @@ struct tid_ampdu_tx {
* @dialog_token: dialog token for aggregation session
* @rcu_head: RCU head used for freeing this struct
* @reorder_lock: serializes access to reorder buffer, see below.
+ * @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and
+ * and ssn.
*
* This structure's lifetime is managed by RCU, assignments to
* the array holding it must hold the aggregation mutex.
@@ -174,7 +182,7 @@ struct tid_ampdu_tx {
struct tid_ampdu_rx {
struct rcu_head rcu_head;
spinlock_t reorder_lock;
- struct sk_buff **reorder_buf;
+ struct sk_buff_head *reorder_buf;
unsigned long *reorder_time;
struct timer_list session_timer;
struct timer_list reorder_timer;
@@ -185,6 +193,7 @@ struct tid_ampdu_rx {
u16 buf_size;
u16 timeout;
u8 dialog_token;
+ bool auto_seq;
};
/**
@@ -265,7 +274,7 @@ struct ieee80211_tx_latency_stat {
* @last_rx_rate_vht_nss: rx status nss of last data packet
* @lock: used for locking all fields that require locking, see comments
* in the header file.
- * @drv_unblock_wk: used for driver PS unblocking
+ * @drv_deliver_wk: used for delivering frames after driver PS unblocking
* @listen_interval: listen interval of this station, when we're acting as AP
* @_flags: STA flags, see &enum ieee80211_sta_info_flags, do not use directly
* @ps_lock: used for powersave (when mac80211 is the AP) related locking
@@ -278,7 +287,6 @@ struct ieee80211_tx_latency_stat {
* @driver_buffered_tids: bitmap of TIDs the driver has data buffered on
* @rx_packets: Number of MSDUs received from this STA
* @rx_bytes: Number of bytes received from this STA
- * @wep_weak_iv_count: number of weak WEP IVs received from this station
* @last_rx: time (in jiffies) when last frame was received from this STA
* @last_connected: time (in seconds) when a station got connected
* @num_duplicates: number of duplicate frames received from this STA
@@ -303,7 +311,6 @@ struct ieee80211_tx_latency_stat {
* @plid: Peer link ID
* @reason: Cancel reason on PLINK_HOLDING state
* @plink_retries: Retries in establishment
- * @ignore_plink_timer: ignore the peer-link timer (used internally)
* @plink_state: peer link state
* @plink_timeout: timeout of peer link
* @plink_timer: peer link watch timer
@@ -345,7 +352,7 @@ struct sta_info {
void *rate_ctrl_priv;
spinlock_t lock;
- struct work_struct drv_unblock_wk;
+ struct work_struct drv_deliver_wk;
u16 listen_interval;
@@ -367,7 +374,6 @@ struct sta_info {
/* Updated from RX path only, no locking requirements */
unsigned long rx_packets;
u64 rx_bytes;
- unsigned long wep_weak_iv_count;
unsigned long last_rx;
long last_connected;
unsigned long num_duplicates;
@@ -418,7 +424,6 @@ struct sta_info {
u16 plid;
u16 reason;
u8 plink_retries;
- bool ignore_plink_timer;
enum nl80211_plink_state plink_state;
u32 plink_timeout;
struct timer_list plink_timer;
@@ -445,6 +450,9 @@ struct sta_info {
enum ieee80211_smps_mode known_smps_mode;
const struct ieee80211_cipher_scheme *cipher_scheme;
+ /* TDLS timeout data */
+ unsigned long last_tdls_pkt_time;
+
/* keep last! */
struct ieee80211_sta sta;
};
@@ -628,6 +636,8 @@ void sta_set_rate_info_tx(struct sta_info *sta,
struct rate_info *rinfo);
void sta_set_rate_info_rx(struct sta_info *sta,
struct rate_info *rinfo);
+void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo);
+
void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
unsigned long exp_time);
u8 sta_info_tx_streams(struct sta_info *sta);
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index ba29ebc86141..89290e33dafe 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -3,6 +3,7 @@
* Copyright 2005-2006, Devicescape Software, Inc.
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2008-2010 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -473,8 +474,6 @@ static void ieee80211_tx_latency_end_msrmnt(struct ieee80211_local *local,
struct sta_info *sta,
struct ieee80211_hdr *hdr)
{
- ktime_t skb_dprt;
- struct timespec dprt_time;
u32 msrmnt;
u16 tid;
u8 *qc;
@@ -506,9 +505,8 @@ static void ieee80211_tx_latency_end_msrmnt(struct ieee80211_local *local,
tx_lat = &sta->tx_lat[tid];
- ktime_get_ts(&dprt_time); /* time stamp completion time */
- skb_dprt = ktime_set(dprt_time.tv_sec, dprt_time.tv_nsec);
- msrmnt = ktime_to_ms(ktime_sub(skb_dprt, skb_arv));
+ /* Calculate the latency */
+ msrmnt = ktime_to_ms(ktime_sub(ktime_get(), skb_arv));
if (tx_lat->max < msrmnt) /* update stats */
tx_lat->max = msrmnt;
@@ -540,6 +538,8 @@ static void ieee80211_tx_latency_end_msrmnt(struct ieee80211_local *local,
* - current throughput (higher value for higher tpt)?
*/
#define STA_LOST_PKT_THRESHOLD 50
+#define STA_LOST_TDLS_PKT_THRESHOLD 10
+#define STA_LOST_TDLS_PKT_TIME (10*HZ) /* 10secs since last ACK */
static void ieee80211_lost_packet(struct sta_info *sta, struct sk_buff *skb)
{
@@ -550,7 +550,20 @@ static void ieee80211_lost_packet(struct sta_info *sta, struct sk_buff *skb)
!(info->flags & IEEE80211_TX_STAT_AMPDU))
return;
- if (++sta->lost_packets < STA_LOST_PKT_THRESHOLD)
+ sta->lost_packets++;
+ if (!sta->sta.tdls && sta->lost_packets < STA_LOST_PKT_THRESHOLD)
+ return;
+
+ /*
+ * If we're in TDLS mode, make sure that all STA_LOST_TDLS_PKT_THRESHOLD
+ * of the last packets were lost, and that no ACK was received in the
+ * last STA_LOST_TDLS_PKT_TIME ms, before triggering the CQM packet-loss
+ * mechanism.
+ */
+ if (sta->sta.tdls &&
+ (sta->lost_packets < STA_LOST_TDLS_PKT_THRESHOLD ||
+ time_before(jiffies,
+ sta->last_tdls_pkt_time + STA_LOST_TDLS_PKT_TIME)))
return;
cfg80211_cqm_pktloss_notify(sta->sdata->dev, sta->sta.addr,
@@ -697,6 +710,10 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
if (info->flags & IEEE80211_TX_STAT_ACK) {
if (sta->lost_packets)
sta->lost_packets = 0;
+
+ /* Track when last TDLS packet was ACKed */
+ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH))
+ sta->last_tdls_pkt_time = jiffies;
} else {
ieee80211_lost_packet(sta, skb);
}
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 652813b2d3df..4ea25dec0698 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -3,12 +3,37 @@
*
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2014, Intel Corporation
+ * Copyright 2014 Intel Mobile Communications GmbH
*
* This file is GPLv2 as found in COPYING.
*/
#include <linux/ieee80211.h>
+#include <linux/log2.h>
+#include <net/cfg80211.h>
#include "ieee80211_i.h"
+#include "driver-ops.h"
+
+/* give usermode some time for retries in setting up the TDLS session */
+#define TDLS_PEER_SETUP_TIMEOUT (15 * HZ)
+
+void ieee80211_tdls_peer_del_work(struct work_struct *wk)
+{
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_local *local;
+
+ sdata = container_of(wk, struct ieee80211_sub_if_data,
+ u.mgd.tdls_peer_del_work.work);
+ local = sdata->local;
+
+ mutex_lock(&local->mtx);
+ if (!is_zero_ether_addr(sdata->u.mgd.tdls_peer)) {
+ tdls_dbg(sdata, "TDLS del peer %pM\n", sdata->u.mgd.tdls_peer);
+ sta_info_destroy_addr(sdata, sdata->u.mgd.tdls_peer);
+ eth_zero_addr(sdata->u.mgd.tdls_peer);
+ }
+ mutex_unlock(&local->mtx);
+}
static void ieee80211_tdls_add_ext_capab(struct sk_buff *skb)
{
@@ -23,11 +48,16 @@ static void ieee80211_tdls_add_ext_capab(struct sk_buff *skb)
*pos++ = WLAN_EXT_CAPA5_TDLS_ENABLED;
}
-static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata)
+static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata,
+ u16 status_code)
{
struct ieee80211_local *local = sdata->local;
u16 capab;
+ /* The capability will be 0 when sending a failure code */
+ if (status_code != 0)
+ return 0;
+
capab = 0;
if (ieee80211_get_sdata_band(sdata) != IEEE80211_BAND_2GHZ)
return capab;
@@ -40,19 +70,331 @@ static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata)
return capab;
}
-static void ieee80211_tdls_add_link_ie(struct sk_buff *skb, const u8 *src_addr,
- const u8 *peer, const u8 *bssid)
+static void ieee80211_tdls_add_link_ie(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb, const u8 *peer,
+ bool initiator)
{
struct ieee80211_tdls_lnkie *lnkid;
+ const u8 *init_addr, *rsp_addr;
+
+ if (initiator) {
+ init_addr = sdata->vif.addr;
+ rsp_addr = peer;
+ } else {
+ init_addr = peer;
+ rsp_addr = sdata->vif.addr;
+ }
lnkid = (void *)skb_put(skb, sizeof(struct ieee80211_tdls_lnkie));
lnkid->ie_type = WLAN_EID_LINK_ID;
lnkid->ie_len = sizeof(struct ieee80211_tdls_lnkie) - 2;
- memcpy(lnkid->bssid, bssid, ETH_ALEN);
- memcpy(lnkid->init_sta, src_addr, ETH_ALEN);
- memcpy(lnkid->resp_sta, peer, ETH_ALEN);
+ memcpy(lnkid->bssid, sdata->u.mgd.bssid, ETH_ALEN);
+ memcpy(lnkid->init_sta, init_addr, ETH_ALEN);
+ memcpy(lnkid->resp_sta, rsp_addr, ETH_ALEN);
+}
+
+/* translate numbering in the WMM parameter IE to the mac80211 notation */
+static enum ieee80211_ac_numbers ieee80211_ac_from_wmm(int ac)
+{
+ switch (ac) {
+ default:
+ WARN_ON_ONCE(1);
+ case 0:
+ return IEEE80211_AC_BE;
+ case 1:
+ return IEEE80211_AC_BK;
+ case 2:
+ return IEEE80211_AC_VI;
+ case 3:
+ return IEEE80211_AC_VO;
+ }
+}
+
+static u8 ieee80211_wmm_aci_aifsn(int aifsn, bool acm, int aci)
+{
+ u8 ret;
+
+ ret = aifsn & 0x0f;
+ if (acm)
+ ret |= 0x10;
+ ret |= (aci << 5) & 0x60;
+ return ret;
+}
+
+static u8 ieee80211_wmm_ecw(u16 cw_min, u16 cw_max)
+{
+ return ((ilog2(cw_min + 1) << 0x0) & 0x0f) |
+ ((ilog2(cw_max + 1) << 0x4) & 0xf0);
+}
+
+static void ieee80211_tdls_add_wmm_param_ie(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb)
+{
+ struct ieee80211_wmm_param_ie *wmm;
+ struct ieee80211_tx_queue_params *txq;
+ int i;
+
+ wmm = (void *)skb_put(skb, sizeof(*wmm));
+ memset(wmm, 0, sizeof(*wmm));
+
+ wmm->element_id = WLAN_EID_VENDOR_SPECIFIC;
+ wmm->len = sizeof(*wmm) - 2;
+
+ wmm->oui[0] = 0x00; /* Microsoft OUI 00:50:F2 */
+ wmm->oui[1] = 0x50;
+ wmm->oui[2] = 0xf2;
+ wmm->oui_type = 2; /* WME */
+ wmm->oui_subtype = 1; /* WME param */
+ wmm->version = 1; /* WME ver */
+ wmm->qos_info = 0; /* U-APSD not in use */
+
+ /*
+ * Use the EDCA parameters defined for the BSS, or default if the AP
+ * doesn't support it, as mandated by 802.11-2012 section 10.22.4
+ */
+ for (i = 0; i < IEEE80211_NUM_ACS; i++) {
+ txq = &sdata->tx_conf[ieee80211_ac_from_wmm(i)];
+ wmm->ac[i].aci_aifsn = ieee80211_wmm_aci_aifsn(txq->aifs,
+ txq->acm, i);
+ wmm->ac[i].cw = ieee80211_wmm_ecw(txq->cw_min, txq->cw_max);
+ wmm->ac[i].txop_limit = cpu_to_le16(txq->txop);
+ }
+}
+
+static void
+ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb, const u8 *peer,
+ u8 action_code, bool initiator,
+ const u8 *extra_ies, size_t extra_ies_len)
+{
+ enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_supported_band *sband;
+ struct ieee80211_sta_ht_cap ht_cap;
+ struct sta_info *sta = NULL;
+ size_t offset = 0, noffset;
+ u8 *pos;
+
+ rcu_read_lock();
+
+ /* we should have the peer STA if we're already responding */
+ if (action_code == WLAN_TDLS_SETUP_RESPONSE) {
+ sta = sta_info_get(sdata, peer);
+ if (WARN_ON_ONCE(!sta)) {
+ rcu_read_unlock();
+ return;
+ }
+ }
+
+ ieee80211_add_srates_ie(sdata, skb, false, band);
+ ieee80211_add_ext_srates_ie(sdata, skb, false, band);
+
+ /* add any custom IEs that go before Extended Capabilities */
+ if (extra_ies_len) {
+ static const u8 before_ext_cap[] = {
+ WLAN_EID_SUPP_RATES,
+ WLAN_EID_COUNTRY,
+ WLAN_EID_EXT_SUPP_RATES,
+ WLAN_EID_SUPPORTED_CHANNELS,
+ WLAN_EID_RSN,
+ };
+ noffset = ieee80211_ie_split(extra_ies, extra_ies_len,
+ before_ext_cap,
+ ARRAY_SIZE(before_ext_cap),
+ offset);
+ pos = skb_put(skb, noffset - offset);
+ memcpy(pos, extra_ies + offset, noffset - offset);
+ offset = noffset;
+ }
+
+ ieee80211_tdls_add_ext_capab(skb);
+
+ /* add the QoS element if we support it */
+ if (local->hw.queues >= IEEE80211_NUM_ACS &&
+ action_code != WLAN_PUB_ACTION_TDLS_DISCOVER_RES)
+ ieee80211_add_wmm_info_ie(skb_put(skb, 9), 0); /* no U-APSD */
+
+ /* add any custom IEs that go before HT capabilities */
+ if (extra_ies_len) {
+ static const u8 before_ht_cap[] = {
+ WLAN_EID_SUPP_RATES,
+ WLAN_EID_COUNTRY,
+ WLAN_EID_EXT_SUPP_RATES,
+ WLAN_EID_SUPPORTED_CHANNELS,
+ WLAN_EID_RSN,
+ WLAN_EID_EXT_CAPABILITY,
+ WLAN_EID_QOS_CAPA,
+ WLAN_EID_FAST_BSS_TRANSITION,
+ WLAN_EID_TIMEOUT_INTERVAL,
+ WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
+ };
+ noffset = ieee80211_ie_split(extra_ies, extra_ies_len,
+ before_ht_cap,
+ ARRAY_SIZE(before_ht_cap),
+ offset);
+ pos = skb_put(skb, noffset - offset);
+ memcpy(pos, extra_ies + offset, noffset - offset);
+ offset = noffset;
+ }
+
+ /*
+ * with TDLS we can switch channels, and HT-caps are not necessarily
+ * the same on all bands. The specification limits the setup to a
+ * single HT-cap, so use the current band for now.
+ */
+ sband = local->hw.wiphy->bands[band];
+ memcpy(&ht_cap, &sband->ht_cap, sizeof(ht_cap));
+ if ((action_code == WLAN_TDLS_SETUP_REQUEST ||
+ action_code == WLAN_TDLS_SETUP_RESPONSE) &&
+ ht_cap.ht_supported && (!sta || sta->sta.ht_cap.ht_supported)) {
+ if (action_code == WLAN_TDLS_SETUP_REQUEST) {
+ ieee80211_apply_htcap_overrides(sdata, &ht_cap);
+
+ /* disable SMPS in TDLS initiator */
+ ht_cap.cap |= (WLAN_HT_CAP_SM_PS_DISABLED
+ << IEEE80211_HT_CAP_SM_PS_SHIFT);
+ } else {
+ /* disable SMPS in TDLS responder */
+ sta->sta.ht_cap.cap |=
+ (WLAN_HT_CAP_SM_PS_DISABLED
+ << IEEE80211_HT_CAP_SM_PS_SHIFT);
+
+ /* the peer caps are already intersected with our own */
+ memcpy(&ht_cap, &sta->sta.ht_cap, sizeof(ht_cap));
+ }
+
+ pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2);
+ ieee80211_ie_build_ht_cap(pos, &ht_cap, ht_cap.cap);
+ }
+
+ rcu_read_unlock();
+
+ /* add any remaining IEs */
+ if (extra_ies_len) {
+ noffset = extra_ies_len;
+ pos = skb_put(skb, noffset - offset);
+ memcpy(pos, extra_ies + offset, noffset - offset);
+ }
+
+ ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator);
+}
+
+static void
+ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb, const u8 *peer,
+ bool initiator, const u8 *extra_ies,
+ size_t extra_ies_len)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+ size_t offset = 0, noffset;
+ struct sta_info *sta, *ap_sta;
+ u8 *pos;
+
+ rcu_read_lock();
+
+ sta = sta_info_get(sdata, peer);
+ ap_sta = sta_info_get(sdata, ifmgd->bssid);
+ if (WARN_ON_ONCE(!sta || !ap_sta)) {
+ rcu_read_unlock();
+ return;
+ }
+
+ /* add any custom IEs that go before the QoS IE */
+ if (extra_ies_len) {
+ static const u8 before_qos[] = {
+ WLAN_EID_RSN,
+ };
+ noffset = ieee80211_ie_split(extra_ies, extra_ies_len,
+ before_qos,
+ ARRAY_SIZE(before_qos),
+ offset);
+ pos = skb_put(skb, noffset - offset);
+ memcpy(pos, extra_ies + offset, noffset - offset);
+ offset = noffset;
+ }
+
+ /* add the QoS param IE if both the peer and we support it */
+ if (local->hw.queues >= IEEE80211_NUM_ACS && sta->sta.wme)
+ ieee80211_tdls_add_wmm_param_ie(sdata, skb);
+
+ /* add any custom IEs that go before HT operation */
+ if (extra_ies_len) {
+ static const u8 before_ht_op[] = {
+ WLAN_EID_RSN,
+ WLAN_EID_QOS_CAPA,
+ WLAN_EID_FAST_BSS_TRANSITION,
+ WLAN_EID_TIMEOUT_INTERVAL,
+ };
+ noffset = ieee80211_ie_split(extra_ies, extra_ies_len,
+ before_ht_op,
+ ARRAY_SIZE(before_ht_op),
+ offset);
+ pos = skb_put(skb, noffset - offset);
+ memcpy(pos, extra_ies + offset, noffset - offset);
+ offset = noffset;
+ }
+
+ /* if HT support is only added in TDLS, we need an HT-operation IE */
+ if (!ap_sta->sta.ht_cap.ht_supported && sta->sta.ht_cap.ht_supported) {
+ struct ieee80211_chanctx_conf *chanctx_conf =
+ rcu_dereference(sdata->vif.chanctx_conf);
+ if (!WARN_ON(!chanctx_conf)) {
+ pos = skb_put(skb, 2 +
+ sizeof(struct ieee80211_ht_operation));
+ /* send an empty HT operation IE */
+ ieee80211_ie_build_ht_oper(pos, &sta->sta.ht_cap,
+ &chanctx_conf->def, 0);
+ }
+ }
+
+ rcu_read_unlock();
+
+ /* add any remaining IEs */
+ if (extra_ies_len) {
+ noffset = extra_ies_len;
+ pos = skb_put(skb, noffset - offset);
+ memcpy(pos, extra_ies + offset, noffset - offset);
+ }
+
+ ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator);
+}
+
+static void ieee80211_tdls_add_ies(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb, const u8 *peer,
+ u8 action_code, u16 status_code,
+ bool initiator, const u8 *extra_ies,
+ size_t extra_ies_len)
+{
+ switch (action_code) {
+ case WLAN_TDLS_SETUP_REQUEST:
+ case WLAN_TDLS_SETUP_RESPONSE:
+ case WLAN_PUB_ACTION_TDLS_DISCOVER_RES:
+ if (status_code == 0)
+ ieee80211_tdls_add_setup_start_ies(sdata, skb, peer,
+ action_code,
+ initiator,
+ extra_ies,
+ extra_ies_len);
+ break;
+ case WLAN_TDLS_SETUP_CONFIRM:
+ if (status_code == 0)
+ ieee80211_tdls_add_setup_cfm_ies(sdata, skb, peer,
+ initiator, extra_ies,
+ extra_ies_len);
+ break;
+ case WLAN_TDLS_TEARDOWN:
+ case WLAN_TDLS_DISCOVERY_REQUEST:
+ if (extra_ies_len)
+ memcpy(skb_put(skb, extra_ies_len), extra_ies,
+ extra_ies_len);
+ if (status_code == 0 || action_code == WLAN_TDLS_TEARDOWN)
+ ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator);
+ break;
+ }
+
}
static int
@@ -61,7 +403,6 @@ ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev,
u16 status_code, struct sk_buff *skb)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
- enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
struct ieee80211_tdls_data *tf;
tf = (void *)skb_put(skb, offsetof(struct ieee80211_tdls_data, u));
@@ -71,6 +412,9 @@ ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev,
tf->ether_type = cpu_to_be16(ETH_P_TDLS);
tf->payload_type = WLAN_TDLS_SNAP_RFTYPE;
+ /* network header is after the ethernet header */
+ skb_set_network_header(skb, ETH_HLEN);
+
switch (action_code) {
case WLAN_TDLS_SETUP_REQUEST:
tf->category = WLAN_CATEGORY_TDLS;
@@ -79,11 +423,8 @@ ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev,
skb_put(skb, sizeof(tf->u.setup_req));
tf->u.setup_req.dialog_token = dialog_token;
tf->u.setup_req.capability =
- cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata));
-
- ieee80211_add_srates_ie(sdata, skb, false, band);
- ieee80211_add_ext_srates_ie(sdata, skb, false, band);
- ieee80211_tdls_add_ext_capab(skb);
+ cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata,
+ status_code));
break;
case WLAN_TDLS_SETUP_RESPONSE:
tf->category = WLAN_CATEGORY_TDLS;
@@ -93,11 +434,8 @@ ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev,
tf->u.setup_resp.status_code = cpu_to_le16(status_code);
tf->u.setup_resp.dialog_token = dialog_token;
tf->u.setup_resp.capability =
- cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata));
-
- ieee80211_add_srates_ie(sdata, skb, false, band);
- ieee80211_add_ext_srates_ie(sdata, skb, false, band);
- ieee80211_tdls_add_ext_capab(skb);
+ cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata,
+ status_code));
break;
case WLAN_TDLS_SETUP_CONFIRM:
tf->category = WLAN_CATEGORY_TDLS;
@@ -134,7 +472,6 @@ ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev,
u16 status_code, struct sk_buff *skb)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
- enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
struct ieee80211_mgmt *mgmt;
mgmt = (void *)skb_put(skb, 24);
@@ -155,11 +492,8 @@ ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev,
mgmt->u.action.u.tdls_discover_resp.dialog_token =
dialog_token;
mgmt->u.action.u.tdls_discover_resp.capability =
- cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata));
-
- ieee80211_add_srates_ie(sdata, skb, false, band);
- ieee80211_add_ext_srates_ie(sdata, skb, false, band);
- ieee80211_tdls_add_ext_capab(skb);
+ cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata,
+ status_code));
break;
default:
return -EINVAL;
@@ -168,33 +502,28 @@ ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev,
return 0;
}
-int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
- const u8 *peer, u8 action_code, u8 dialog_token,
- u16 status_code, u32 peer_capability,
- const u8 *extra_ies, size_t extra_ies_len)
+static int
+ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev,
+ const u8 *peer, u8 action_code,
+ u8 dialog_token, u16 status_code,
+ u32 peer_capability, bool initiator,
+ const u8 *extra_ies, size_t extra_ies_len)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
struct sk_buff *skb = NULL;
bool send_direct;
+ struct sta_info *sta;
int ret;
- if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS))
- return -ENOTSUPP;
-
- /* make sure we are in managed mode, and associated */
- if (sdata->vif.type != NL80211_IFTYPE_STATION ||
- !sdata->u.mgd.associated)
- return -EINVAL;
-
- tdls_dbg(sdata, "TDLS mgmt action %d peer %pM\n",
- action_code, peer);
-
skb = dev_alloc_skb(local->hw.extra_tx_headroom +
max(sizeof(struct ieee80211_mgmt),
sizeof(struct ieee80211_tdls_data)) +
50 + /* supported rates */
7 + /* ext capab */
+ 26 + /* max(WMM-info, WMM-param) */
+ 2 + max(sizeof(struct ieee80211_ht_cap),
+ sizeof(struct ieee80211_ht_operation)) +
extra_ies_len +
sizeof(struct ieee80211_tdls_lnkie));
if (!skb)
@@ -227,30 +556,48 @@ int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
if (ret < 0)
goto fail;
- if (extra_ies_len)
- memcpy(skb_put(skb, extra_ies_len), extra_ies, extra_ies_len);
+ rcu_read_lock();
+ sta = sta_info_get(sdata, peer);
- /* the TDLS link IE is always added last */
+ /* infer the initiator if we can, to support old userspace */
switch (action_code) {
case WLAN_TDLS_SETUP_REQUEST:
+ if (sta)
+ set_sta_flag(sta, WLAN_STA_TDLS_INITIATOR);
+ /* fall-through */
case WLAN_TDLS_SETUP_CONFIRM:
- case WLAN_TDLS_TEARDOWN:
case WLAN_TDLS_DISCOVERY_REQUEST:
- /* we are the initiator */
- ieee80211_tdls_add_link_ie(skb, sdata->vif.addr, peer,
- sdata->u.mgd.bssid);
+ initiator = true;
break;
case WLAN_TDLS_SETUP_RESPONSE:
+ /*
+ * In some testing scenarios, we send a request and response.
+ * Make the last packet sent take effect for the initiator
+ * value.
+ */
+ if (sta)
+ clear_sta_flag(sta, WLAN_STA_TDLS_INITIATOR);
+ /* fall-through */
case WLAN_PUB_ACTION_TDLS_DISCOVER_RES:
- /* we are the responder */
- ieee80211_tdls_add_link_ie(skb, peer, sdata->vif.addr,
- sdata->u.mgd.bssid);
+ initiator = false;
+ break;
+ case WLAN_TDLS_TEARDOWN:
+ /* any value is ok */
break;
default:
ret = -ENOTSUPP;
- goto fail;
+ break;
}
+ if (sta && test_sta_flag(sta, WLAN_STA_TDLS_INITIATOR))
+ initiator = true;
+
+ rcu_read_unlock();
+ if (ret < 0)
+ goto fail;
+
+ ieee80211_tdls_add_ies(sdata, skb, peer, action_code, status_code,
+ initiator, extra_ies, extra_ies_len);
if (send_direct) {
ieee80211_tx_skb(sdata, skb);
return 0;
@@ -284,11 +631,175 @@ fail:
return ret;
}
+static int
+ieee80211_tdls_mgmt_setup(struct wiphy *wiphy, struct net_device *dev,
+ const u8 *peer, u8 action_code, u8 dialog_token,
+ u16 status_code, u32 peer_capability, bool initiator,
+ const u8 *extra_ies, size_t extra_ies_len)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_local *local = sdata->local;
+ int ret;
+
+ mutex_lock(&local->mtx);
+
+ /* we don't support concurrent TDLS peer setups */
+ if (!is_zero_ether_addr(sdata->u.mgd.tdls_peer) &&
+ !ether_addr_equal(sdata->u.mgd.tdls_peer, peer)) {
+ ret = -EBUSY;
+ goto exit;
+ }
+
+ /*
+ * make sure we have a STA representing the peer so we drop or buffer
+ * non-TDLS-setup frames to the peer. We can't send other packets
+ * during setup through the AP path.
+ * Allow error packets to be sent - sometimes we don't even add a STA
+ * before failing the setup.
+ */
+ if (status_code == 0) {
+ rcu_read_lock();
+ if (!sta_info_get(sdata, peer)) {
+ rcu_read_unlock();
+ ret = -ENOLINK;
+ goto exit;
+ }
+ rcu_read_unlock();
+ }
+
+ ieee80211_flush_queues(local, sdata);
+
+ ret = ieee80211_tdls_prep_mgmt_packet(wiphy, dev, peer, action_code,
+ dialog_token, status_code,
+ peer_capability, initiator,
+ extra_ies, extra_ies_len);
+ if (ret < 0)
+ goto exit;
+
+ memcpy(sdata->u.mgd.tdls_peer, peer, ETH_ALEN);
+ ieee80211_queue_delayed_work(&sdata->local->hw,
+ &sdata->u.mgd.tdls_peer_del_work,
+ TDLS_PEER_SETUP_TIMEOUT);
+
+exit:
+ mutex_unlock(&local->mtx);
+ return ret;
+}
+
+static int
+ieee80211_tdls_mgmt_teardown(struct wiphy *wiphy, struct net_device *dev,
+ const u8 *peer, u8 action_code, u8 dialog_token,
+ u16 status_code, u32 peer_capability,
+ bool initiator, const u8 *extra_ies,
+ size_t extra_ies_len)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_local *local = sdata->local;
+ struct sta_info *sta;
+ int ret;
+
+ /*
+ * No packets can be transmitted to the peer via the AP during setup -
+ * the STA is set as a TDLS peer, but is not authorized.
+ * During teardown, we prevent direct transmissions by stopping the
+ * queues and flushing all direct packets.
+ */
+ ieee80211_stop_vif_queues(local, sdata,
+ IEEE80211_QUEUE_STOP_REASON_TDLS_TEARDOWN);
+ ieee80211_flush_queues(local, sdata);
+
+ ret = ieee80211_tdls_prep_mgmt_packet(wiphy, dev, peer, action_code,
+ dialog_token, status_code,
+ peer_capability, initiator,
+ extra_ies, extra_ies_len);
+ if (ret < 0)
+ sdata_err(sdata, "Failed sending TDLS teardown packet %d\n",
+ ret);
+
+ /*
+ * Remove the STA AUTH flag to force further traffic through the AP. If
+ * the STA was unreachable, it was already removed.
+ */
+ rcu_read_lock();
+ sta = sta_info_get(sdata, peer);
+ if (sta)
+ clear_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH);
+ rcu_read_unlock();
+
+ ieee80211_wake_vif_queues(local, sdata,
+ IEEE80211_QUEUE_STOP_REASON_TDLS_TEARDOWN);
+
+ return 0;
+}
+
+int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
+ const u8 *peer, u8 action_code, u8 dialog_token,
+ u16 status_code, u32 peer_capability,
+ bool initiator, const u8 *extra_ies,
+ size_t extra_ies_len)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ int ret;
+
+ if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS))
+ return -ENOTSUPP;
+
+ /* make sure we are in managed mode, and associated */
+ if (sdata->vif.type != NL80211_IFTYPE_STATION ||
+ !sdata->u.mgd.associated)
+ return -EINVAL;
+
+ switch (action_code) {
+ case WLAN_TDLS_SETUP_REQUEST:
+ case WLAN_TDLS_SETUP_RESPONSE:
+ ret = ieee80211_tdls_mgmt_setup(wiphy, dev, peer, action_code,
+ dialog_token, status_code,
+ peer_capability, initiator,
+ extra_ies, extra_ies_len);
+ break;
+ case WLAN_TDLS_TEARDOWN:
+ ret = ieee80211_tdls_mgmt_teardown(wiphy, dev, peer,
+ action_code, dialog_token,
+ status_code,
+ peer_capability, initiator,
+ extra_ies, extra_ies_len);
+ break;
+ case WLAN_TDLS_DISCOVERY_REQUEST:
+ /*
+ * Protect the discovery so we can hear the TDLS discovery
+ * response frame. It is transmitted directly and not buffered
+ * by the AP.
+ */
+ drv_mgd_protect_tdls_discover(sdata->local, sdata);
+ /* fall-through */
+ case WLAN_TDLS_SETUP_CONFIRM:
+ case WLAN_PUB_ACTION_TDLS_DISCOVER_RES:
+ /* no special handling */
+ ret = ieee80211_tdls_prep_mgmt_packet(wiphy, dev, peer,
+ action_code,
+ dialog_token,
+ status_code,
+ peer_capability,
+ initiator, extra_ies,
+ extra_ies_len);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ tdls_dbg(sdata, "TDLS mgmt action %d peer %pM status %d\n",
+ action_code, peer, ret);
+ return ret;
+}
+
int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
const u8 *peer, enum nl80211_tdls_operation oper)
{
struct sta_info *sta;
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_local *local = sdata->local;
+ int ret;
if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS))
return -ENOTSUPP;
@@ -296,6 +807,18 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
if (sdata->vif.type != NL80211_IFTYPE_STATION)
return -EINVAL;
+ switch (oper) {
+ case NL80211_TDLS_ENABLE_LINK:
+ case NL80211_TDLS_DISABLE_LINK:
+ break;
+ case NL80211_TDLS_TEARDOWN:
+ case NL80211_TDLS_SETUP:
+ case NL80211_TDLS_DISCOVERY_REQ:
+ /* We don't support in-driver setup/teardown/discovery */
+ return -ENOTSUPP;
+ }
+
+ mutex_lock(&local->mtx);
tdls_dbg(sdata, "TDLS oper %d peer %pM\n", oper, peer);
switch (oper) {
@@ -304,22 +827,60 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
sta = sta_info_get(sdata, peer);
if (!sta) {
rcu_read_unlock();
- return -ENOLINK;
+ ret = -ENOLINK;
+ break;
}
set_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH);
rcu_read_unlock();
+
+ WARN_ON_ONCE(is_zero_ether_addr(sdata->u.mgd.tdls_peer) ||
+ !ether_addr_equal(sdata->u.mgd.tdls_peer, peer));
+ ret = 0;
break;
case NL80211_TDLS_DISABLE_LINK:
- return sta_info_destroy_addr(sdata, peer);
- case NL80211_TDLS_TEARDOWN:
- case NL80211_TDLS_SETUP:
- case NL80211_TDLS_DISCOVERY_REQ:
- /* We don't support in-driver setup/teardown/discovery */
- return -ENOTSUPP;
+ /*
+ * The teardown message in ieee80211_tdls_mgmt_teardown() was
+ * created while the queues were stopped, so it might still be
+ * pending. Before flushing the queues we need to be sure the
+ * message is handled by the tasklet handling pending messages,
+ * otherwise we might start destroying the station before
+ * sending the teardown packet.
+ * Note that this only forces the tasklet to flush pendings -
+ * not to stop the tasklet from rescheduling itself.
+ */
+ tasklet_kill(&local->tx_pending_tasklet);
+ /* flush a potentially queued teardown packet */
+ ieee80211_flush_queues(local, sdata);
+
+ ret = sta_info_destroy_addr(sdata, peer);
+ break;
default:
- return -ENOTSUPP;
+ ret = -ENOTSUPP;
+ break;
}
- return 0;
+ if (ret == 0 && ether_addr_equal(sdata->u.mgd.tdls_peer, peer)) {
+ cancel_delayed_work(&sdata->u.mgd.tdls_peer_del_work);
+ eth_zero_addr(sdata->u.mgd.tdls_peer);
+ }
+
+ mutex_unlock(&local->mtx);
+ return ret;
+}
+
+void ieee80211_tdls_oper_request(struct ieee80211_vif *vif, const u8 *peer,
+ enum nl80211_tdls_operation oper,
+ u16 reason_code, gfp_t gfp)
+{
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+
+ if (vif->type != NL80211_IFTYPE_STATION || !vif->bss_conf.assoc) {
+ sdata_err(sdata, "Discarding TDLS oper %d - not STA or disconnected\n",
+ oper);
+ return;
+ }
+
+ cfg80211_tdls_oper_request(sdata->dev, peer, oper, reason_code, gfp);
}
+EXPORT_SYMBOL(ieee80211_tdls_oper_request);
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index cfe1a0688b5c..38fae7ebe984 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -672,13 +672,13 @@ DEFINE_EVENT(local_u32_evt, drv_set_rts_threshold,
);
TRACE_EVENT(drv_set_coverage_class,
- TP_PROTO(struct ieee80211_local *local, u8 value),
+ TP_PROTO(struct ieee80211_local *local, s16 value),
TP_ARGS(local, value),
TP_STRUCT__entry(
LOCAL_ENTRY
- __field(u8, value)
+ __field(s16, value)
),
TP_fast_assign(
@@ -1330,6 +1330,13 @@ DEFINE_EVENT(local_sdata_evt, drv_mgd_prepare_tx,
TP_ARGS(local, sdata)
);
+DEFINE_EVENT(local_sdata_evt, drv_mgd_protect_tdls_discover,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata),
+
+ TP_ARGS(local, sdata)
+);
+
DECLARE_EVENT_CLASS(local_chanctx,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx),
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 1a252c606ad0..900632a250ec 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3,6 +3,7 @@
* Copyright 2005-2006, Devicescape Software, Inc.
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -250,7 +251,8 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx)
if (local->hw.conf.flags & IEEE80211_CONF_PS) {
ieee80211_stop_queues_by_reason(&local->hw,
IEEE80211_MAX_QUEUE_MAP,
- IEEE80211_QUEUE_STOP_REASON_PS);
+ IEEE80211_QUEUE_STOP_REASON_PS,
+ false);
ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED;
ieee80211_queue_work(&local->hw,
&local->dynamic_ps_disable_work);
@@ -473,7 +475,8 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
return TX_CONTINUE;
if (unlikely((test_sta_flag(sta, WLAN_STA_PS_STA) ||
- test_sta_flag(sta, WLAN_STA_PS_DRIVER)) &&
+ test_sta_flag(sta, WLAN_STA_PS_DRIVER) ||
+ test_sta_flag(sta, WLAN_STA_PS_DELIVER)) &&
!(info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER))) {
int ac = skb_get_queue_mapping(tx->skb);
@@ -496,7 +499,8 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
* ahead and Tx the packet.
*/
if (!test_sta_flag(sta, WLAN_STA_PS_STA) &&
- !test_sta_flag(sta, WLAN_STA_PS_DRIVER)) {
+ !test_sta_flag(sta, WLAN_STA_PS_DRIVER) &&
+ !test_sta_flag(sta, WLAN_STA_PS_DELIVER)) {
spin_unlock(&sta->ps_lock);
return TX_CONTINUE;
}
@@ -1475,7 +1479,10 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata,
tail_need = max_t(int, tail_need, 0);
}
- if (skb_cloned(skb))
+ if (skb_cloned(skb) &&
+ (!(local->hw.flags & IEEE80211_HW_SUPPORTS_CLONED_SKBS) ||
+ !skb_clone_writable(skb, ETH_HLEN) ||
+ sdata->crypto_tx_tailroom_needed_cnt))
I802_DEBUG_INC(local->tx_expand_skb_head_cloned);
else if (head_need || tail_need)
I802_DEBUG_INC(local->tx_expand_skb_head);
@@ -1618,12 +1625,12 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
{
struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
struct ieee80211_chanctx_conf *chanctx_conf;
- struct ieee80211_channel *chan;
struct ieee80211_radiotap_header *prthdr =
(struct ieee80211_radiotap_header *)skb->data;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct ieee80211_hdr *hdr;
struct ieee80211_sub_if_data *tmp_sdata, *sdata;
+ struct cfg80211_chan_def *chandef;
u16 len_rthdr;
int hdrlen;
@@ -1721,9 +1728,9 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
}
if (chanctx_conf)
- chan = chanctx_conf->def.chan;
+ chandef = &chanctx_conf->def;
else if (!local->use_chanctx)
- chan = local->_oper_chandef.chan;
+ chandef = &local->_oper_chandef;
else
goto fail_rcu;
@@ -1743,10 +1750,11 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
* radar detection by itself. We can do that later by adding a
* monitor flag interfaces used for AP support.
*/
- if ((chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR)))
+ if (!cfg80211_reg_can_beacon(local->hw.wiphy, chandef,
+ sdata->vif.type))
goto fail_rcu;
- ieee80211_xmit(sdata, skb, chan->band);
+ ieee80211_xmit(sdata, skb, chandef->chan->band);
rcu_read_unlock();
return NETDEV_TX_OK;
@@ -1767,15 +1775,12 @@ fail:
static void ieee80211_tx_latency_start_msrmnt(struct ieee80211_local *local,
struct sk_buff *skb)
{
- struct timespec skb_arv;
struct ieee80211_tx_latency_bin_ranges *tx_latency;
tx_latency = rcu_dereference(local->tx_latency);
if (!tx_latency)
return;
-
- ktime_get_ts(&skb_arv);
- skb->tstamp = ktime_set(skb_arv.tv_sec, skb_arv.tv_nsec);
+ skb->tstamp = ktime_get();
}
/**
@@ -1784,9 +1789,8 @@ static void ieee80211_tx_latency_start_msrmnt(struct ieee80211_local *local,
* @skb: packet to be sent
* @dev: incoming interface
*
- * Returns: 0 on success (and frees skb in this case) or 1 on failure (skb will
- * not be freed, and caller is responsible for either retrying later or freeing
- * skb).
+ * Returns: NETDEV_TX_OK both on success and on failure. On failure skb will
+ * be freed.
*
* This function takes in an Ethernet header and encapsulates it with suitable
* IEEE 802.11 header based on which interface the packet is coming in. The
@@ -1810,7 +1814,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
int nh_pos, h_pos;
struct sta_info *sta = NULL;
bool wme_sta = false, authorized = false, tdls_auth = false;
- bool tdls_direct = false;
+ bool tdls_peer = false, tdls_setup_frame = false;
bool multicast;
u32 info_flags = 0;
u16 info_id = 0;
@@ -1843,7 +1847,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN);
hdrlen = 30;
authorized = test_sta_flag(sta, WLAN_STA_AUTHORIZED);
- wme_sta = test_sta_flag(sta, WLAN_STA_WME);
+ wme_sta = sta->sta.wme;
}
ap_sdata = container_of(sdata->bss, struct ieee80211_sub_if_data,
u.ap);
@@ -1952,34 +1956,35 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
#endif
case NL80211_IFTYPE_STATION:
if (sdata->wdev.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS) {
- bool tdls_peer = false;
-
sta = sta_info_get(sdata, skb->data);
if (sta) {
authorized = test_sta_flag(sta,
WLAN_STA_AUTHORIZED);
- wme_sta = test_sta_flag(sta, WLAN_STA_WME);
+ wme_sta = sta->sta.wme;
tdls_peer = test_sta_flag(sta,
- WLAN_STA_TDLS_PEER);
+ WLAN_STA_TDLS_PEER);
tdls_auth = test_sta_flag(sta,
WLAN_STA_TDLS_PEER_AUTH);
}
- /*
- * If the TDLS link is enabled, send everything
- * directly. Otherwise, allow TDLS setup frames
- * to be transmitted indirectly.
- */
- tdls_direct = tdls_peer && (tdls_auth ||
- !(ethertype == ETH_P_TDLS && skb->len > 14 &&
- skb->data[14] == WLAN_TDLS_SNAP_RFTYPE));
+ if (tdls_peer)
+ tdls_setup_frame =
+ ethertype == ETH_P_TDLS &&
+ skb->len > 14 &&
+ skb->data[14] == WLAN_TDLS_SNAP_RFTYPE;
}
- if (tdls_direct) {
- /* link during setup - throw out frames to peer */
- if (!tdls_auth)
- goto fail_rcu;
+ /*
+ * TDLS link during setup - throw out frames to peer. We allow
+ * TDLS-setup frames to unauthorized peers for the special case
+ * of a link teardown after a TDLS sta is removed due to being
+ * unreachable.
+ */
+ if (tdls_peer && !tdls_auth && !tdls_setup_frame)
+ goto fail_rcu;
+ /* send direct packets to authorized TDLS peers */
+ if (tdls_peer && tdls_auth) {
/* DA SA BSSID */
memcpy(hdr.addr1, skb->data, ETH_ALEN);
memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
@@ -2033,7 +2038,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
sta = sta_info_get(sdata, hdr.addr1);
if (sta) {
authorized = test_sta_flag(sta, WLAN_STA_AUTHORIZED);
- wme_sta = test_sta_flag(sta, WLAN_STA_WME);
+ wme_sta = sta->sta.wme;
}
}
@@ -2067,30 +2072,23 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
if (unlikely(!multicast && skb->sk &&
skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS)) {
- struct sk_buff *orig_skb = skb;
+ struct sk_buff *ack_skb = skb_clone_sk(skb);
- skb = skb_clone(skb, GFP_ATOMIC);
- if (skb) {
+ if (ack_skb) {
unsigned long flags;
int id;
spin_lock_irqsave(&local->ack_status_lock, flags);
- id = idr_alloc(&local->ack_status_frames, orig_skb,
+ id = idr_alloc(&local->ack_status_frames, ack_skb,
1, 0x10000, GFP_ATOMIC);
spin_unlock_irqrestore(&local->ack_status_lock, flags);
if (id >= 0) {
info_id = id;
info_flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
- } else if (skb_shared(skb)) {
- kfree_skb(orig_skb);
} else {
- kfree_skb(skb);
- skb = orig_skb;
+ kfree_skb(ack_skb);
}
- } else {
- /* couldn't clone -- lose tx status ... */
- skb = orig_skb;
}
}
@@ -2423,7 +2421,7 @@ static void ieee80211_set_csa(struct ieee80211_sub_if_data *sdata,
u8 *beacon_data;
size_t beacon_data_len;
int i;
- u8 count = sdata->csa_current_counter;
+ u8 count = beacon->csa_current_counter;
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP:
@@ -2442,46 +2440,53 @@ static void ieee80211_set_csa(struct ieee80211_sub_if_data *sdata,
return;
}
+ rcu_read_lock();
for (i = 0; i < IEEE80211_MAX_CSA_COUNTERS_NUM; ++i) {
- u16 counter_offset_beacon =
- sdata->csa_counter_offset_beacon[i];
- u16 counter_offset_presp = sdata->csa_counter_offset_presp[i];
-
- if (counter_offset_beacon) {
- if (WARN_ON(counter_offset_beacon >= beacon_data_len))
- return;
+ resp = rcu_dereference(sdata->u.ap.probe_resp);
- beacon_data[counter_offset_beacon] = count;
- }
-
- if (sdata->vif.type == NL80211_IFTYPE_AP &&
- counter_offset_presp) {
- rcu_read_lock();
- resp = rcu_dereference(sdata->u.ap.probe_resp);
-
- /* If nl80211 accepted the offset, this should
- * not happen.
- */
- if (WARN_ON(!resp)) {
+ if (beacon->csa_counter_offsets[i]) {
+ if (WARN_ON_ONCE(beacon->csa_counter_offsets[i] >=
+ beacon_data_len)) {
rcu_read_unlock();
return;
}
- resp->data[counter_offset_presp] = count;
- rcu_read_unlock();
+
+ beacon_data[beacon->csa_counter_offsets[i]] = count;
}
+
+ if (sdata->vif.type == NL80211_IFTYPE_AP && resp)
+ resp->data[resp->csa_counter_offsets[i]] = count;
}
+ rcu_read_unlock();
}
u8 ieee80211_csa_update_counter(struct ieee80211_vif *vif)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct beacon_data *beacon = NULL;
+ u8 count = 0;
- sdata->csa_current_counter--;
+ rcu_read_lock();
+
+ if (sdata->vif.type == NL80211_IFTYPE_AP)
+ beacon = rcu_dereference(sdata->u.ap.beacon);
+ else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
+ beacon = rcu_dereference(sdata->u.ibss.presp);
+ else if (ieee80211_vif_is_mesh(&sdata->vif))
+ beacon = rcu_dereference(sdata->u.mesh.beacon);
+
+ if (!beacon)
+ goto unlock;
+
+ beacon->csa_current_counter--;
/* the counter should never reach 0 */
- WARN_ON(!sdata->csa_current_counter);
+ WARN_ON_ONCE(!beacon->csa_current_counter);
+ count = beacon->csa_current_counter;
- return sdata->csa_current_counter;
+unlock:
+ rcu_read_unlock();
+ return count;
}
EXPORT_SYMBOL(ieee80211_csa_update_counter);
@@ -2491,7 +2496,6 @@ bool ieee80211_csa_is_complete(struct ieee80211_vif *vif)
struct beacon_data *beacon = NULL;
u8 *beacon_data;
size_t beacon_data_len;
- int counter_beacon = sdata->csa_counter_offset_beacon[0];
int ret = false;
if (!ieee80211_sdata_running(sdata))
@@ -2529,10 +2533,13 @@ bool ieee80211_csa_is_complete(struct ieee80211_vif *vif)
goto out;
}
- if (WARN_ON(counter_beacon > beacon_data_len))
+ if (!beacon->csa_counter_offsets[0])
goto out;
- if (beacon_data[counter_beacon] == 1)
+ if (WARN_ON_ONCE(beacon->csa_counter_offsets[0] > beacon_data_len))
+ goto out;
+
+ if (beacon_data[beacon->csa_counter_offsets[0]] == 1)
ret = true;
out:
rcu_read_unlock();
@@ -2548,6 +2555,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
bool is_template)
{
struct ieee80211_local *local = hw_to_local(hw);
+ struct beacon_data *beacon = NULL;
struct sk_buff *skb = NULL;
struct ieee80211_tx_info *info;
struct ieee80211_sub_if_data *sdata = NULL;
@@ -2569,10 +2577,10 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
if (sdata->vif.type == NL80211_IFTYPE_AP) {
struct ieee80211_if_ap *ap = &sdata->u.ap;
- struct beacon_data *beacon = rcu_dereference(ap->beacon);
+ beacon = rcu_dereference(ap->beacon);
if (beacon) {
- if (sdata->vif.csa_active) {
+ if (beacon->csa_counter_offsets[0]) {
if (!is_template)
ieee80211_csa_update_counter(vif);
@@ -2613,37 +2621,37 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
} else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
struct ieee80211_hdr *hdr;
- struct beacon_data *presp = rcu_dereference(ifibss->presp);
- if (!presp)
+ beacon = rcu_dereference(ifibss->presp);
+ if (!beacon)
goto out;
- if (sdata->vif.csa_active) {
+ if (beacon->csa_counter_offsets[0]) {
if (!is_template)
ieee80211_csa_update_counter(vif);
- ieee80211_set_csa(sdata, presp);
+ ieee80211_set_csa(sdata, beacon);
}
- skb = dev_alloc_skb(local->tx_headroom + presp->head_len +
+ skb = dev_alloc_skb(local->tx_headroom + beacon->head_len +
local->hw.extra_beacon_tailroom);
if (!skb)
goto out;
skb_reserve(skb, local->tx_headroom);
- memcpy(skb_put(skb, presp->head_len), presp->head,
- presp->head_len);
+ memcpy(skb_put(skb, beacon->head_len), beacon->head,
+ beacon->head_len);
hdr = (struct ieee80211_hdr *) skb->data;
hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
IEEE80211_STYPE_BEACON);
} else if (ieee80211_vif_is_mesh(&sdata->vif)) {
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
- struct beacon_data *bcn = rcu_dereference(ifmsh->beacon);
- if (!bcn)
+ beacon = rcu_dereference(ifmsh->beacon);
+ if (!beacon)
goto out;
- if (sdata->vif.csa_active) {
+ if (beacon->csa_counter_offsets[0]) {
if (!is_template)
/* TODO: For mesh csa_counter is in TU, so
* decrementing it by one isn't correct, but
@@ -2652,40 +2660,42 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
*/
ieee80211_csa_update_counter(vif);
- ieee80211_set_csa(sdata, bcn);
+ ieee80211_set_csa(sdata, beacon);
}
if (ifmsh->sync_ops)
- ifmsh->sync_ops->adjust_tbtt(sdata, bcn);
+ ifmsh->sync_ops->adjust_tbtt(sdata, beacon);
skb = dev_alloc_skb(local->tx_headroom +
- bcn->head_len +
+ beacon->head_len +
256 + /* TIM IE */
- bcn->tail_len +
+ beacon->tail_len +
local->hw.extra_beacon_tailroom);
if (!skb)
goto out;
skb_reserve(skb, local->tx_headroom);
- memcpy(skb_put(skb, bcn->head_len), bcn->head, bcn->head_len);
+ memcpy(skb_put(skb, beacon->head_len), beacon->head,
+ beacon->head_len);
ieee80211_beacon_add_tim(sdata, &ifmsh->ps, skb, is_template);
if (offs) {
- offs->tim_offset = bcn->head_len;
- offs->tim_length = skb->len - bcn->head_len;
+ offs->tim_offset = beacon->head_len;
+ offs->tim_length = skb->len - beacon->head_len;
}
- memcpy(skb_put(skb, bcn->tail_len), bcn->tail, bcn->tail_len);
+ memcpy(skb_put(skb, beacon->tail_len), beacon->tail,
+ beacon->tail_len);
} else {
WARN_ON(1);
goto out;
}
/* CSA offsets */
- if (offs) {
+ if (offs && beacon) {
int i;
for (i = 0; i < IEEE80211_MAX_CSA_COUNTERS_NUM; i++) {
- u16 csa_off = sdata->csa_counter_offset_beacon[i];
+ u16 csa_off = beacon->csa_counter_offsets[i];
if (!csa_off)
continue;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index a6cda52ed920..3c61060a4d2b 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -3,6 +3,7 @@
* Copyright 2005-2006, Devicescape Software, Inc.
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -317,7 +318,8 @@ void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue)
}
static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue,
- enum queue_stop_reason reason)
+ enum queue_stop_reason reason,
+ bool refcounted)
{
struct ieee80211_local *local = hw_to_local(hw);
@@ -329,7 +331,13 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue,
if (!test_bit(reason, &local->queue_stop_reasons[queue]))
return;
- __clear_bit(reason, &local->queue_stop_reasons[queue]);
+ if (!refcounted)
+ local->q_stop_reasons[queue][reason] = 0;
+ else
+ local->q_stop_reasons[queue][reason]--;
+
+ if (local->q_stop_reasons[queue][reason] == 0)
+ __clear_bit(reason, &local->queue_stop_reasons[queue]);
if (local->queue_stop_reasons[queue] != 0)
/* someone still has this queue stopped */
@@ -344,25 +352,28 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue,
}
void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue,
- enum queue_stop_reason reason)
+ enum queue_stop_reason reason,
+ bool refcounted)
{
struct ieee80211_local *local = hw_to_local(hw);
unsigned long flags;
spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
- __ieee80211_wake_queue(hw, queue, reason);
+ __ieee80211_wake_queue(hw, queue, reason, refcounted);
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
}
void ieee80211_wake_queue(struct ieee80211_hw *hw, int queue)
{
ieee80211_wake_queue_by_reason(hw, queue,
- IEEE80211_QUEUE_STOP_REASON_DRIVER);
+ IEEE80211_QUEUE_STOP_REASON_DRIVER,
+ false);
}
EXPORT_SYMBOL(ieee80211_wake_queue);
static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue,
- enum queue_stop_reason reason)
+ enum queue_stop_reason reason,
+ bool refcounted)
{
struct ieee80211_local *local = hw_to_local(hw);
struct ieee80211_sub_if_data *sdata;
@@ -373,10 +384,13 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue,
if (WARN_ON(queue >= hw->queues))
return;
- if (test_bit(reason, &local->queue_stop_reasons[queue]))
- return;
+ if (!refcounted)
+ local->q_stop_reasons[queue][reason] = 1;
+ else
+ local->q_stop_reasons[queue][reason]++;
- __set_bit(reason, &local->queue_stop_reasons[queue]);
+ if (__test_and_set_bit(reason, &local->queue_stop_reasons[queue]))
+ return;
if (local->hw.queues < IEEE80211_NUM_ACS)
n_acs = 1;
@@ -398,20 +412,22 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue,
}
void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue,
- enum queue_stop_reason reason)
+ enum queue_stop_reason reason,
+ bool refcounted)
{
struct ieee80211_local *local = hw_to_local(hw);
unsigned long flags;
spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
- __ieee80211_stop_queue(hw, queue, reason);
+ __ieee80211_stop_queue(hw, queue, reason, refcounted);
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
}
void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue)
{
ieee80211_stop_queue_by_reason(hw, queue,
- IEEE80211_QUEUE_STOP_REASON_DRIVER);
+ IEEE80211_QUEUE_STOP_REASON_DRIVER,
+ false);
}
EXPORT_SYMBOL(ieee80211_stop_queue);
@@ -429,9 +445,11 @@ void ieee80211_add_pending_skb(struct ieee80211_local *local,
}
spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
- __ieee80211_stop_queue(hw, queue, IEEE80211_QUEUE_STOP_REASON_SKB_ADD);
+ __ieee80211_stop_queue(hw, queue, IEEE80211_QUEUE_STOP_REASON_SKB_ADD,
+ false);
__skb_queue_tail(&local->pending[queue], skb);
- __ieee80211_wake_queue(hw, queue, IEEE80211_QUEUE_STOP_REASON_SKB_ADD);
+ __ieee80211_wake_queue(hw, queue, IEEE80211_QUEUE_STOP_REASON_SKB_ADD,
+ false);
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
}
@@ -455,20 +473,23 @@ void ieee80211_add_pending_skbs(struct ieee80211_local *local,
queue = info->hw_queue;
__ieee80211_stop_queue(hw, queue,
- IEEE80211_QUEUE_STOP_REASON_SKB_ADD);
+ IEEE80211_QUEUE_STOP_REASON_SKB_ADD,
+ false);
__skb_queue_tail(&local->pending[queue], skb);
}
for (i = 0; i < hw->queues; i++)
__ieee80211_wake_queue(hw, i,
- IEEE80211_QUEUE_STOP_REASON_SKB_ADD);
+ IEEE80211_QUEUE_STOP_REASON_SKB_ADD,
+ false);
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
}
void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
unsigned long queues,
- enum queue_stop_reason reason)
+ enum queue_stop_reason reason,
+ bool refcounted)
{
struct ieee80211_local *local = hw_to_local(hw);
unsigned long flags;
@@ -477,7 +498,7 @@ void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
for_each_set_bit(i, &queues, hw->queues)
- __ieee80211_stop_queue(hw, i, reason);
+ __ieee80211_stop_queue(hw, i, reason, refcounted);
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
}
@@ -485,7 +506,8 @@ void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
void ieee80211_stop_queues(struct ieee80211_hw *hw)
{
ieee80211_stop_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
- IEEE80211_QUEUE_STOP_REASON_DRIVER);
+ IEEE80211_QUEUE_STOP_REASON_DRIVER,
+ false);
}
EXPORT_SYMBOL(ieee80211_stop_queues);
@@ -508,7 +530,8 @@ EXPORT_SYMBOL(ieee80211_queue_stopped);
void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
unsigned long queues,
- enum queue_stop_reason reason)
+ enum queue_stop_reason reason,
+ bool refcounted)
{
struct ieee80211_local *local = hw_to_local(hw);
unsigned long flags;
@@ -517,7 +540,7 @@ void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
for_each_set_bit(i, &queues, hw->queues)
- __ieee80211_wake_queue(hw, i, reason);
+ __ieee80211_wake_queue(hw, i, reason, refcounted);
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
}
@@ -525,17 +548,16 @@ void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
void ieee80211_wake_queues(struct ieee80211_hw *hw)
{
ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
- IEEE80211_QUEUE_STOP_REASON_DRIVER);
+ IEEE80211_QUEUE_STOP_REASON_DRIVER,
+ false);
}
EXPORT_SYMBOL(ieee80211_wake_queues);
-void ieee80211_flush_queues(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata)
+static unsigned int
+ieee80211_get_vif_queues(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata)
{
- u32 queues;
-
- if (!local->ops->flush)
- return;
+ unsigned int queues;
if (sdata && local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) {
int ac;
@@ -551,13 +573,46 @@ void ieee80211_flush_queues(struct ieee80211_local *local,
queues = BIT(local->hw.queues) - 1;
}
- ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
- IEEE80211_QUEUE_STOP_REASON_FLUSH);
+ return queues;
+}
+
+void ieee80211_flush_queues(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata)
+{
+ unsigned int queues;
+
+ if (!local->ops->flush)
+ return;
+
+ queues = ieee80211_get_vif_queues(local, sdata);
+
+ ieee80211_stop_queues_by_reason(&local->hw, queues,
+ IEEE80211_QUEUE_STOP_REASON_FLUSH,
+ false);
drv_flush(local, sdata, queues, false);
- ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
- IEEE80211_QUEUE_STOP_REASON_FLUSH);
+ ieee80211_wake_queues_by_reason(&local->hw, queues,
+ IEEE80211_QUEUE_STOP_REASON_FLUSH,
+ false);
+}
+
+void ieee80211_stop_vif_queues(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ enum queue_stop_reason reason)
+{
+ ieee80211_stop_queues_by_reason(&local->hw,
+ ieee80211_get_vif_queues(local, sdata),
+ reason, true);
+}
+
+void ieee80211_wake_vif_queues(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ enum queue_stop_reason reason)
+{
+ ieee80211_wake_queues_by_reason(&local->hw,
+ ieee80211_get_vif_queues(local, sdata),
+ reason, true);
}
static void __iterate_active_interfaces(struct ieee80211_local *local,
@@ -960,6 +1015,31 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
}
elems->pwr_constr_elem = pos;
break;
+ case WLAN_EID_CISCO_VENDOR_SPECIFIC:
+ /* Lots of different options exist, but we only care
+ * about the Dynamic Transmit Power Control element.
+ * First check for the Cisco OUI, then for the DTPC
+ * tag (0x00).
+ */
+ if (elen < 4) {
+ elem_parse_failed = true;
+ break;
+ }
+
+ if (pos[0] != 0x00 || pos[1] != 0x40 ||
+ pos[2] != 0x96 || pos[3] != 0x00)
+ break;
+
+ if (elen != 6) {
+ elem_parse_failed = true;
+ break;
+ }
+
+ if (calc_crc)
+ crc = crc32_be(crc, pos - 2, elen + 2);
+
+ elems->cisco_dtpc_elem = pos;
+ break;
case WLAN_EID_TIMEOUT_INTERVAL:
if (elen >= sizeof(struct ieee80211_timeout_interval_ie))
elems->timeout_int = (void *)pos;
@@ -1166,14 +1246,17 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
}
}
-int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
- size_t buffer_len, const u8 *ie, size_t ie_len,
- enum ieee80211_band band, u32 rate_mask,
- struct cfg80211_chan_def *chandef)
+static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
+ u8 *buffer, size_t buffer_len,
+ const u8 *ie, size_t ie_len,
+ enum ieee80211_band band,
+ u32 rate_mask,
+ struct cfg80211_chan_def *chandef,
+ size_t *offset)
{
struct ieee80211_supported_band *sband;
u8 *pos = buffer, *end = buffer + buffer_len;
- size_t offset = 0, noffset;
+ size_t noffset;
int supp_rates_len, i;
u8 rates[32];
int num_rates;
@@ -1181,6 +1264,8 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
int shift;
u32 rate_flags;
+ *offset = 0;
+
sband = local->hw.wiphy->bands[band];
if (WARN_ON_ONCE(!sband))
return 0;
@@ -1219,12 +1304,12 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
noffset = ieee80211_ie_split(ie, ie_len,
before_extrates,
ARRAY_SIZE(before_extrates),
- offset);
- if (end - pos < noffset - offset)
+ *offset);
+ if (end - pos < noffset - *offset)
goto out_err;
- memcpy(pos, ie + offset, noffset - offset);
- pos += noffset - offset;
- offset = noffset;
+ memcpy(pos, ie + *offset, noffset - *offset);
+ pos += noffset - *offset;
+ *offset = noffset;
}
ext_rates_len = num_rates - supp_rates_len;
@@ -1258,12 +1343,12 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
};
noffset = ieee80211_ie_split(ie, ie_len,
before_ht, ARRAY_SIZE(before_ht),
- offset);
- if (end - pos < noffset - offset)
+ *offset);
+ if (end - pos < noffset - *offset)
goto out_err;
- memcpy(pos, ie + offset, noffset - offset);
- pos += noffset - offset;
- offset = noffset;
+ memcpy(pos, ie + *offset, noffset - *offset);
+ pos += noffset - *offset;
+ *offset = noffset;
}
if (sband->ht_cap.ht_supported) {
@@ -1298,12 +1383,12 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
};
noffset = ieee80211_ie_split(ie, ie_len,
before_vht, ARRAY_SIZE(before_vht),
- offset);
- if (end - pos < noffset - offset)
+ *offset);
+ if (end - pos < noffset - *offset)
goto out_err;
- memcpy(pos, ie + offset, noffset - offset);
- pos += noffset - offset;
- offset = noffset;
+ memcpy(pos, ie + *offset, noffset - *offset);
+ pos += noffset - *offset;
+ *offset = noffset;
}
if (sband->vht_cap.vht_supported) {
@@ -1313,21 +1398,54 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
sband->vht_cap.cap);
}
- /* add any remaining custom IEs */
- if (ie && ie_len) {
- noffset = ie_len;
- if (end - pos < noffset - offset)
- goto out_err;
- memcpy(pos, ie + offset, noffset - offset);
- pos += noffset - offset;
- }
-
return pos - buffer;
out_err:
WARN_ONCE(1, "not enough space for preq IEs\n");
return pos - buffer;
}
+int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
+ size_t buffer_len,
+ struct ieee80211_scan_ies *ie_desc,
+ const u8 *ie, size_t ie_len,
+ u8 bands_used, u32 *rate_masks,
+ struct cfg80211_chan_def *chandef)
+{
+ size_t pos = 0, old_pos = 0, custom_ie_offset = 0;
+ int i;
+
+ memset(ie_desc, 0, sizeof(*ie_desc));
+
+ for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
+ if (bands_used & BIT(i)) {
+ pos += ieee80211_build_preq_ies_band(local,
+ buffer + pos,
+ buffer_len - pos,
+ ie, ie_len, i,
+ rate_masks[i],
+ chandef,
+ &custom_ie_offset);
+ ie_desc->ies[i] = buffer + old_pos;
+ ie_desc->len[i] = pos - old_pos;
+ old_pos = pos;
+ }
+ }
+
+ /* add any remaining custom IEs */
+ if (ie && ie_len) {
+ if (WARN_ONCE(buffer_len - pos < ie_len - custom_ie_offset,
+ "not enough space for preq custom IEs\n"))
+ return pos;
+ memcpy(buffer + pos, ie + custom_ie_offset,
+ ie_len - custom_ie_offset);
+ ie_desc->common_ies = buffer + pos;
+ ie_desc->common_ie_len = ie_len - custom_ie_offset;
+ pos += ie_len - custom_ie_offset;
+ }
+
+ return pos;
+};
+
struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
u8 *dst, u32 ratemask,
struct ieee80211_channel *chan,
@@ -1340,6 +1458,8 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb;
struct ieee80211_mgmt *mgmt;
int ies_len;
+ u32 rate_masks[IEEE80211_NUM_BANDS] = {};
+ struct ieee80211_scan_ies dummy_ie_desc;
/*
* Do not send DS Channel parameter for directed probe requests
@@ -1357,10 +1477,11 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
if (!skb)
return NULL;
+ rate_masks[chan->band] = ratemask;
ies_len = ieee80211_build_preq_ies(local, skb_tail_pointer(skb),
- skb_tailroom(skb),
- ie, ie_len, chan->band,
- ratemask, &chandef);
+ skb_tailroom(skb), &dummy_ie_desc,
+ ie, ie_len, BIT(chan->band),
+ rate_masks, &chandef);
skb_put(skb, ies_len);
if (dst) {
@@ -1604,7 +1725,9 @@ int ieee80211_reconfig(struct ieee80211_local *local)
if (local->use_chanctx) {
mutex_lock(&local->chanctx_mtx);
list_for_each_entry(ctx, &local->chanctx_list, list)
- WARN_ON(drv_add_chanctx(local, ctx));
+ if (ctx->replace_state !=
+ IEEE80211_CHANCTX_REPLACES_OTHER)
+ WARN_ON(drv_add_chanctx(local, ctx));
mutex_unlock(&local->chanctx_mtx);
list_for_each_entry(sdata, &local->interfaces, list) {
@@ -1798,7 +1921,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
}
ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
- IEEE80211_QUEUE_STOP_REASON_SUSPEND);
+ IEEE80211_QUEUE_STOP_REASON_SUSPEND,
+ false);
/*
* Reconfigure sched scan if it was interrupted by FW restart or
@@ -2836,6 +2960,35 @@ void ieee80211_recalc_dtim(struct ieee80211_local *local,
ps->dtim_count = dtim_count;
}
+static u8 ieee80211_chanctx_radar_detect(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx)
+{
+ struct ieee80211_sub_if_data *sdata;
+ u8 radar_detect = 0;
+
+ lockdep_assert_held(&local->chanctx_mtx);
+
+ if (WARN_ON(ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED))
+ return 0;
+
+ list_for_each_entry(sdata, &ctx->reserved_vifs, reserved_chanctx_list)
+ if (sdata->reserved_radar_required)
+ radar_detect |= BIT(sdata->reserved_chandef.width);
+
+ /*
+ * An in-place reservation context should not have any assigned vifs
+ * until it replaces the other context.
+ */
+ WARN_ON(ctx->replace_state == IEEE80211_CHANCTX_REPLACES_OTHER &&
+ !list_empty(&ctx->assigned_vifs));
+
+ list_for_each_entry(sdata, &ctx->assigned_vifs, assigned_chanctx_list)
+ if (sdata->radar_required)
+ radar_detect |= BIT(sdata->vif.bss_conf.chandef.width);
+
+ return radar_detect;
+}
+
int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
const struct cfg80211_chan_def *chandef,
enum ieee80211_chanctx_mode chanmode,
@@ -2877,8 +3030,9 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
num[iftype] = 1;
list_for_each_entry(ctx, &local->chanctx_list, list) {
- if (ctx->conf.radar_enabled)
- radar_detect |= BIT(ctx->conf.def.width);
+ if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED)
+ continue;
+ radar_detect |= ieee80211_chanctx_radar_detect(local, ctx);
if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) {
num_different_channels++;
continue;
@@ -2935,10 +3089,12 @@ int ieee80211_max_num_channels(struct ieee80211_local *local)
lockdep_assert_held(&local->chanctx_mtx);
list_for_each_entry(ctx, &local->chanctx_list, list) {
+ if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED)
+ continue;
+
num_different_channels++;
- if (ctx->conf.radar_enabled)
- radar_detect |= BIT(ctx->conf.def.width);
+ radar_detect |= ieee80211_chanctx_radar_detect(local, ctx);
}
list_for_each_entry_rcu(sdata, &local->interfaces, list)
@@ -2953,3 +3109,18 @@ int ieee80211_max_num_channels(struct ieee80211_local *local)
return max_num_different_channels;
}
+
+u8 *ieee80211_add_wmm_info_ie(u8 *buf, u8 qosinfo)
+{
+ *buf++ = WLAN_EID_VENDOR_SPECIFIC;
+ *buf++ = 7; /* len */
+ *buf++ = 0x00; /* Microsoft OUI 00:50:F2 */
+ *buf++ = 0x50;
+ *buf++ = 0xf2;
+ *buf++ = 2; /* WME */
+ *buf++ = 0; /* WME info */
+ *buf++ = 1; /* WME ver */
+ *buf++ = qosinfo; /* U-APSD no in use */
+
+ return buf;
+}
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 9265adfdabfc..671ce0d27a80 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -129,6 +129,10 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
if (!vht_cap_ie || !sband->vht_cap.vht_supported)
return;
+ /* don't support VHT for TDLS peers for now */
+ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER))
+ return;
+
/*
* A VHT STA must support 40 MHz, but if we verify that here
* then we break a few things - some APs (e.g. Netgear R6300v2
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 6ee2b5863572..9181fb6d6437 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -271,22 +271,6 @@ static int ieee80211_wep_decrypt(struct ieee80211_local *local,
return ret;
}
-
-static bool ieee80211_wep_is_weak_iv(struct sk_buff *skb,
- struct ieee80211_key *key)
-{
- struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
- unsigned int hdrlen;
- u8 *ivpos;
- u32 iv;
-
- hdrlen = ieee80211_hdrlen(hdr->frame_control);
- ivpos = skb->data + hdrlen;
- iv = (ivpos[0] << 16) | (ivpos[1] << 8) | ivpos[2];
-
- return ieee80211_wep_weak_iv(iv, key->conf.keylen);
-}
-
ieee80211_rx_result
ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx)
{
@@ -301,16 +285,12 @@ ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx)
if (!(status->flag & RX_FLAG_DECRYPTED)) {
if (skb_linearize(rx->skb))
return RX_DROP_UNUSABLE;
- if (rx->sta && ieee80211_wep_is_weak_iv(rx->skb, rx->key))
- rx->sta->wep_weak_iv_count++;
if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key))
return RX_DROP_UNUSABLE;
} else if (!(status->flag & RX_FLAG_IV_STRIPPED)) {
if (!pskb_may_pull(rx->skb, ieee80211_hdrlen(fc) +
IEEE80211_WEP_IV_LEN))
return RX_DROP_UNUSABLE;
- if (rx->sta && ieee80211_wep_is_weak_iv(rx->skb, rx->key))
- rx->sta->wep_weak_iv_count++;
ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key);
/* remove ICV */
if (pskb_trim(rx->skb, rx->skb->len - IEEE80211_WEP_ICV_LEN))
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index d51422c778de..3b873989992c 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -1,5 +1,6 @@
/*
* Copyright 2004, Instant802 Networks, Inc.
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -118,7 +119,7 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata,
case NL80211_IFTYPE_AP_VLAN:
sta = rcu_dereference(sdata->u.vlan.sta);
if (sta) {
- qos = test_sta_flag(sta, WLAN_STA_WME);
+ qos = sta->sta.wme;
break;
}
case NL80211_IFTYPE_AP:
@@ -145,7 +146,7 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata,
if (!sta && ra && !is_multicast_ether_addr(ra)) {
sta = sta_info_get(sdata, ra);
if (sta)
- qos = test_sta_flag(sta, WLAN_STA_WME);
+ qos = sta->sta.wme;
}
rcu_read_unlock();
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 9b3dcc201145..983527a4c1ab 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -64,8 +64,11 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
if (!info->control.hw_key)
tail += IEEE80211_TKIP_ICV_LEN;
- if (WARN_ON(skb_tailroom(skb) < tail ||
- skb_headroom(skb) < IEEE80211_TKIP_IV_LEN))
+ if (WARN(skb_tailroom(skb) < tail ||
+ skb_headroom(skb) < IEEE80211_TKIP_IV_LEN,
+ "mmic: not enough head/tail (%d/%d,%d/%d)\n",
+ skb_headroom(skb), IEEE80211_TKIP_IV_LEN,
+ skb_tailroom(skb), tail))
return TX_DROP;
key = &tx->key->conf.key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY];
@@ -811,7 +814,7 @@ ieee80211_crypto_hw_encrypt(struct ieee80211_tx_data *tx)
ieee80211_rx_result
ieee80211_crypto_hw_decrypt(struct ieee80211_rx_data *rx)
{
- if (rx->sta->cipher_scheme)
+ if (rx->sta && rx->sta->cipher_scheme)
return ieee80211_crypto_cs_decrypt(rx);
return RX_DROP_UNUSABLE;
diff --git a/net/mac802154/ieee802154_dev.c b/net/mac802154/ieee802154_dev.c
index 2cf66d885e68..b36b2b996578 100644
--- a/net/mac802154/ieee802154_dev.c
+++ b/net/mac802154/ieee802154_dev.c
@@ -143,6 +143,7 @@ static void
mac802154_del_iface(struct wpan_phy *phy, struct net_device *dev)
{
struct mac802154_sub_if_data *sdata;
+
ASSERT_RTNL();
sdata = netdev_priv(dev);
@@ -166,11 +167,13 @@ mac802154_add_iface(struct wpan_phy *phy, const char *name, int type)
switch (type) {
case IEEE802154_DEV_MONITOR:
dev = alloc_netdev(sizeof(struct mac802154_sub_if_data),
- name, mac802154_monitor_setup);
+ name, NET_NAME_UNKNOWN,
+ mac802154_monitor_setup);
break;
case IEEE802154_DEV_WPAN:
dev = alloc_netdev(sizeof(struct mac802154_sub_if_data),
- name, mac802154_wpan_setup);
+ name, NET_NAME_UNKNOWN,
+ mac802154_wpan_setup);
break;
default:
dev = NULL;
@@ -276,7 +279,8 @@ ieee802154_alloc_device(size_t priv_data_len, struct ieee802154_ops *ops)
}
priv = wpan_phy_priv(phy);
- priv->hw.phy = priv->phy = phy;
+ priv->phy = phy;
+ priv->hw.phy = priv->phy;
priv->hw.priv = (char *)priv + ALIGN(sizeof(*priv), NETDEV_ALIGN);
priv->ops = ops;
@@ -302,29 +306,61 @@ EXPORT_SYMBOL(ieee802154_free_device);
int ieee802154_register_device(struct ieee802154_dev *dev)
{
struct mac802154_priv *priv = mac802154_to_priv(dev);
- int rc = -ENOMEM;
+ int rc = -ENOSYS;
+
+ if (dev->flags & IEEE802154_HW_TXPOWER) {
+ if (!priv->ops->set_txpower)
+ goto out;
+
+ priv->phy->set_txpower = mac802154_set_txpower;
+ }
+
+ if (dev->flags & IEEE802154_HW_LBT) {
+ if (!priv->ops->set_lbt)
+ goto out;
+
+ priv->phy->set_lbt = mac802154_set_lbt;
+ }
+
+ if (dev->flags & IEEE802154_HW_CCA_MODE) {
+ if (!priv->ops->set_cca_mode)
+ goto out;
+
+ priv->phy->set_cca_mode = mac802154_set_cca_mode;
+ }
+
+ if (dev->flags & IEEE802154_HW_CCA_ED_LEVEL) {
+ if (!priv->ops->set_cca_ed_level)
+ goto out;
+
+ priv->phy->set_cca_ed_level = mac802154_set_cca_ed_level;
+ }
+
+ if (dev->flags & IEEE802154_HW_CSMA_PARAMS) {
+ if (!priv->ops->set_csma_params)
+ goto out;
+
+ priv->phy->set_csma_params = mac802154_set_csma_params;
+ }
+
+ if (dev->flags & IEEE802154_HW_FRAME_RETRIES) {
+ if (!priv->ops->set_frame_retries)
+ goto out;
+
+ priv->phy->set_frame_retries = mac802154_set_frame_retries;
+ }
priv->dev_workqueue =
create_singlethread_workqueue(wpan_phy_name(priv->phy));
- if (!priv->dev_workqueue)
+ if (!priv->dev_workqueue) {
+ rc = -ENOMEM;
goto out;
+ }
wpan_phy_set_dev(priv->phy, priv->hw.parent);
priv->phy->add_iface = mac802154_add_iface;
priv->phy->del_iface = mac802154_del_iface;
- if (priv->ops->set_txpower)
- priv->phy->set_txpower = mac802154_set_txpower;
- if (priv->ops->set_lbt)
- priv->phy->set_lbt = mac802154_set_lbt;
- if (priv->ops->set_cca_mode)
- priv->phy->set_cca_mode = mac802154_set_cca_mode;
- if (priv->ops->set_cca_ed_level)
- priv->phy->set_cca_ed_level = mac802154_set_cca_ed_level;
- if (priv->ops->set_csma_params)
- priv->phy->set_csma_params = mac802154_set_csma_params;
- if (priv->ops->set_frame_retries)
- priv->phy->set_frame_retries = mac802154_set_frame_retries;
rc = wpan_phy_register(priv->phy);
if (rc < 0)
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index 1456f73b02b9..457058142098 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -538,6 +538,7 @@ static int llsec_recover_addr(struct mac802154_llsec *sec,
struct ieee802154_addr *addr)
{
__le16 caddr = sec->params.coord_shortaddr;
+
addr->pan_id = sec->params.pan_id;
if (caddr == cpu_to_le16(IEEE802154_ADDR_BROADCAST)) {
diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c
index 15aa2f2b03a7..868a040fd422 100644
--- a/net/mac802154/mib.c
+++ b/net/mac802154/mib.c
@@ -175,9 +175,9 @@ static void phy_chan_notify(struct work_struct *work)
mutex_lock(&priv->hw->phy->pib_lock);
res = hw->ops->set_channel(&hw->hw, priv->page, priv->chan);
- if (res)
+ if (res) {
pr_debug("set_channel failed\n");
- else {
+ } else {
priv->hw->phy->current_channel = priv->chan;
priv->hw->phy->current_page = priv->page;
}
@@ -210,8 +210,9 @@ void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan)
INIT_WORK(&work->work, phy_chan_notify);
work->dev = dev;
queue_work(priv->hw->dev_workqueue, &work->work);
- } else
+ } else {
mutex_unlock(&priv->hw->phy->pib_lock);
+ }
}
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index 7f820a108a9c..a14cf9ede171 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -86,9 +86,8 @@ fail:
static void mac802154_rx_worker(struct work_struct *work)
{
struct rx_work *rw = container_of(work, struct rx_work, work);
- struct sk_buff *skb = rw->skb;
- mac802154_subif_rx(rw->dev, skb, rw->lqi);
+ mac802154_subif_rx(rw->dev, rw->skb, rw->lqi);
kfree(rw);
}
@@ -101,7 +100,7 @@ ieee802154_rx_irqsafe(struct ieee802154_dev *dev, struct sk_buff *skb, u8 lqi)
if (!skb)
return;
- work = kzalloc(sizeof(struct rx_work), GFP_ATOMIC);
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
if (!work)
return;
diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
index 6d1647399d4f..fdf4c0e67259 100644
--- a/net/mac802154/tx.c
+++ b/net/mac802154/tx.c
@@ -89,8 +89,7 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb,
if (!(priv->phy->channels_supported[page] & (1 << chan))) {
WARN_ON(1);
- kfree_skb(skb);
- return NETDEV_TX_OK;
+ goto err_tx;
}
mac802154_monitors_rx(mac802154_to_priv(&priv->hw), skb);
@@ -98,16 +97,15 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb,
if (!(priv->hw.flags & IEEE802154_HW_OMIT_CKSUM)) {
u16 crc = crc_ccitt(0, skb->data, skb->len);
u8 *data = skb_put(skb, 2);
+
data[0] = crc & 0xff;
data[1] = crc >> 8;
}
- if (skb_cow_head(skb, priv->hw.extra_tx_headroom)) {
- kfree_skb(skb);
- return NETDEV_TX_OK;
- }
+ if (skb_cow_head(skb, priv->hw.extra_tx_headroom))
+ goto err_tx;
- work = kzalloc(sizeof(struct xmit_work), GFP_ATOMIC);
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
if (!work) {
kfree_skb(skb);
return NETDEV_TX_BUSY;
@@ -128,4 +126,8 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb,
queue_work(priv->dev_workqueue, &work->work);
return NETDEV_TX_OK;
+
+err_tx:
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
}
diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c
index 3c3069fd6971..4ab86a57dca5 100644
--- a/net/mac802154/wpan.c
+++ b/net/mac802154/wpan.c
@@ -90,7 +90,7 @@ mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
}
case SIOCSIFADDR:
dev_warn(&dev->dev,
- "Using DEBUGing ioctl SIOCSIFADDR isn't recommened!\n");
+ "Using DEBUGing ioctl SIOCSIFADDR isn't recommended!\n");
if (sa->family != AF_IEEE802154 ||
sa->addr.addr_type != IEEE802154_ADDR_SHORT ||
sa->addr.pan_id == IEEE802154_PANID_BROADCAST ||
@@ -462,7 +462,10 @@ mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb,
skb->pkt_type = PACKET_OTHERHOST;
break;
default:
- break;
+ spin_unlock_bh(&sdata->mib_lock);
+ pr_debug("invalid dest mode\n");
+ kfree_skb(skb);
+ return NET_RX_DROP;
}
spin_unlock_bh(&sdata->mib_lock);
@@ -472,8 +475,7 @@ mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb,
rc = mac802154_llsec_decrypt(&sdata->sec, skb);
if (rc) {
pr_debug("decryption failed: %i\n", rc);
- kfree_skb(skb);
- return NET_RX_DROP;
+ goto fail;
}
sdata->dev->stats.rx_packets++;
@@ -485,9 +487,12 @@ mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb,
default:
pr_warn("ieee802154: bad frame received (type = %d)\n",
mac_cb(skb)->type);
- kfree_skb(skb);
- return NET_RX_DROP;
+ goto fail;
}
+
+fail:
+ kfree_skb(skb);
+ return NET_RX_DROP;
}
static void mac802154_print_addr(const char *name,
@@ -573,6 +578,7 @@ void mac802154_wpans_rx(struct mac802154_priv *priv, struct sk_buff *skb)
ret = mac802154_parse_frame_start(skb, &hdr);
if (ret) {
pr_debug("got invalid frame\n");
+ kfree_skb(skb);
return;
}
diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index 6b38d083e1c9..e28ed2ef5b06 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c
@@ -65,15 +65,9 @@ out:
return segs;
}
-static int mpls_gso_send_check(struct sk_buff *skb)
-{
- return 0;
-}
-
static struct packet_offload mpls_mc_offload = {
.type = cpu_to_be16(ETH_P_MPLS_MC),
.callbacks = {
- .gso_send_check = mpls_gso_send_check,
.gso_segment = mpls_gso_segment,
},
};
@@ -81,7 +75,6 @@ static struct packet_offload mpls_mc_offload = {
static struct packet_offload mpls_uc_offload = {
.type = cpu_to_be16(ETH_P_MPLS_UC),
.callbacks = {
- .gso_send_check = mpls_gso_send_check,
.gso_segment = mpls_gso_segment,
},
};
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index e9410d17619d..ae5096ab65eb 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -46,6 +46,9 @@ config NF_CONNTRACK
To compile it as a module, choose M here. If unsure, say N.
+config NF_LOG_COMMON
+ tristate
+
if NF_CONNTRACK
config NF_CONNTRACK_MARK
@@ -493,10 +496,19 @@ config NFT_LIMIT
This option adds the "limit" expression that you can use to
ratelimit rule matchings.
-config NFT_NAT
+config NFT_MASQ
depends on NF_TABLES
depends on NF_CONNTRACK
depends on NF_NAT
+ tristate "Netfilter nf_tables masquerade support"
+ help
+ This option adds the "masquerade" expression that you can use
+ to perform NAT in the masquerade flavour.
+
+config NFT_NAT
+ depends on NF_TABLES
+ depends on NF_CONNTRACK
+ select NF_NAT
tristate "Netfilter nf_tables nat module"
help
This option adds the "nat" expression that you can use to perform
@@ -744,6 +756,9 @@ config NETFILTER_XT_TARGET_LED
config NETFILTER_XT_TARGET_LOG
tristate "LOG target support"
+ select NF_LOG_COMMON
+ select NF_LOG_IPV4
+ select NF_LOG_IPV6 if IPV6
default m if NETFILTER_ADVANCED=n
help
This option adds a `LOG' target, which allows you to create rules in
@@ -760,6 +775,14 @@ config NETFILTER_XT_TARGET_MARK
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_MARK (combined mark/MARK module).
+config NETFILTER_XT_NAT
+ tristate '"SNAT and DNAT" targets support'
+ depends on NF_NAT
+ ---help---
+ This option enables the SNAT and DNAT targets.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config NETFILTER_XT_TARGET_NETMAP
tristate '"NETMAP" target support'
depends on NF_NAT
@@ -833,6 +856,7 @@ config NETFILTER_XT_TARGET_TPROXY
tristate '"TPROXY" target transparent proxying support'
depends on NETFILTER_XTABLES
depends on NETFILTER_ADVANCED
+ depends on (IPV6 || IPV6=n)
depends on IP_NF_MANGLE
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index bffdad774da7..a9571be3f791 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -47,6 +47,9 @@ obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o
nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \
nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o
+# generic transport layer logging
+obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o
+
obj-$(CONFIG_NF_NAT) += nf_nat.o
# NAT protocols (nf_nat)
@@ -84,6 +87,7 @@ obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o
obj-$(CONFIG_NFT_HASH) += nft_hash.o
obj-$(CONFIG_NFT_COUNTER) += nft_counter.o
obj-$(CONFIG_NFT_LOG) += nft_log.o
+obj-$(CONFIG_NFT_MASQ) += nft_masq.o
# generic X tables
obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
@@ -92,7 +96,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o
obj-$(CONFIG_NETFILTER_XT_SET) += xt_set.o
-obj-$(CONFIG_NF_NAT) += xt_nat.o
+obj-$(CONFIG_NETFILTER_XT_NAT) += xt_nat.o
# targets
obj-$(CONFIG_NETFILTER_XT_TARGET_AUDIT) += xt_AUDIT.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 1fbab0cdd302..024a2e25c8a4 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -35,11 +35,7 @@ EXPORT_SYMBOL_GPL(nf_ipv6_ops);
int nf_register_afinfo(const struct nf_afinfo *afinfo)
{
- int err;
-
- err = mutex_lock_interruptible(&afinfo_mutex);
- if (err < 0)
- return err;
+ mutex_lock(&afinfo_mutex);
RCU_INIT_POINTER(nf_afinfo[afinfo->family], afinfo);
mutex_unlock(&afinfo_mutex);
return 0;
@@ -58,7 +54,7 @@ EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly;
EXPORT_SYMBOL(nf_hooks);
-#if defined(CONFIG_JUMP_LABEL)
+#ifdef HAVE_JUMP_LABEL
struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
EXPORT_SYMBOL(nf_hooks_needed);
#endif
@@ -68,18 +64,15 @@ static DEFINE_MUTEX(nf_hook_mutex);
int nf_register_hook(struct nf_hook_ops *reg)
{
struct nf_hook_ops *elem;
- int err;
- err = mutex_lock_interruptible(&nf_hook_mutex);
- if (err < 0)
- return err;
+ mutex_lock(&nf_hook_mutex);
list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
if (reg->priority < elem->priority)
break;
}
list_add_rcu(&reg->list, elem->list.prev);
mutex_unlock(&nf_hook_mutex);
-#if defined(CONFIG_JUMP_LABEL)
+#ifdef HAVE_JUMP_LABEL
static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
return 0;
@@ -91,7 +84,7 @@ void nf_unregister_hook(struct nf_hook_ops *reg)
mutex_lock(&nf_hook_mutex);
list_del_rcu(&reg->list);
mutex_unlock(&nf_hook_mutex);
-#if defined(CONFIG_JUMP_LABEL)
+#ifdef HAVE_JUMP_LABEL
static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
synchronize_net();
diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig
index 2f7f5c32c6f9..234a8ec82076 100644
--- a/net/netfilter/ipset/Kconfig
+++ b/net/netfilter/ipset/Kconfig
@@ -99,6 +99,15 @@ config IP_SET_HASH_IPPORTNET
To compile it as a module, choose M here. If unsure, say N.
+config IP_SET_HASH_MAC
+ tristate "hash:mac set support"
+ depends on IP_SET
+ help
+ This option adds the hash:mac set type support, by which
+ one can store MAC (ethernet address) elements in a set.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config IP_SET_HASH_NETPORTNET
tristate "hash:net,port,net set support"
depends on IP_SET
diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile
index 231f10196cb9..3dbd5e958489 100644
--- a/net/netfilter/ipset/Makefile
+++ b/net/netfilter/ipset/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_IP_SET_HASH_IPMARK) += ip_set_hash_ipmark.o
obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o
obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o
obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o
+obj-$(CONFIG_IP_SET_HASH_MAC) += ip_set_hash_mac.o
obj-$(CONFIG_IP_SET_HASH_NET) += ip_set_hash_net.o
obj-$(CONFIG_IP_SET_HASH_NETPORT) += ip_set_hash_netport.o
obj-$(CONFIG_IP_SET_HASH_NETIFACE) += ip_set_hash_netiface.o
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index f2c7d83dc23f..6f024a8a1534 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -128,6 +128,8 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
return 0;
if (SET_WITH_COUNTER(set))
ip_set_update_counter(ext_counter(x, set), ext, mext, flags);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_get_skbinfo(ext_skbinfo(x, set), ext, mext, flags);
return 1;
}
@@ -161,6 +163,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
ip_set_init_counter(ext_counter(x, set), ext);
if (SET_WITH_COMMENT(set))
ip_set_init_comment(ext_comment(x, set), ext);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_init_skbinfo(ext_skbinfo(x, set), ext);
return 0;
}
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 6f1f9f494808..55b083ec587a 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -27,7 +27,8 @@
#define IPSET_TYPE_REV_MIN 0
/* 1 Counter support added */
-#define IPSET_TYPE_REV_MAX 2 /* Comment support added */
+/* 2 Comment support added */
+#define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -112,7 +113,7 @@ bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb,
{
struct bitmap_ip *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct bitmap_ip_adt_elem e = { };
+ struct bitmap_ip_adt_elem e = { .id = 0 };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
u32 ip;
@@ -132,14 +133,17 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
struct bitmap_ip *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
u32 ip = 0, ip_to = 0;
- struct bitmap_ip_adt_elem e = { };
+ struct bitmap_ip_adt_elem e = { .id = 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
int ret = 0;
if (unlikely(!tb[IPSET_ATTR_IP] ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -357,6 +361,9 @@ static struct ip_set_type bitmap_ip_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 740eabededd9..86104744b00f 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -27,7 +27,8 @@
#define IPSET_TYPE_REV_MIN 0
/* 1 Counter support added */
-#define IPSET_TYPE_REV_MAX 2 /* Comment support added */
+/* 2 Comment support added */
+#define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -203,7 +204,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
{
struct bitmap_ipmac *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct bitmap_ipmac_adt_elem e = {};
+ struct bitmap_ipmac_adt_elem e = { .id = 0 };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
u32 ip;
@@ -232,7 +233,7 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
{
const struct bitmap_ipmac *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct bitmap_ipmac_adt_elem e = {};
+ struct bitmap_ipmac_adt_elem e = { .id = 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0;
int ret = 0;
@@ -240,7 +241,10 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
if (unlikely(!tb[IPSET_ATTR_IP] ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -394,6 +398,9 @@ static struct ip_set_type bitmap_ipmac_type = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index cf99676e69f8..005dd36444c3 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -22,7 +22,8 @@
#define IPSET_TYPE_REV_MIN 0
/* 1 Counter support added */
-#define IPSET_TYPE_REV_MAX 2 /* Comment support added */
+/* 2 Comment support added */
+#define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -104,7 +105,7 @@ bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb,
{
struct bitmap_port *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct bitmap_port_adt_elem e = {};
+ struct bitmap_port_adt_elem e = { .id = 0 };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
__be16 __port;
u16 port = 0;
@@ -129,7 +130,7 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
{
struct bitmap_port *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct bitmap_port_adt_elem e = {};
+ struct bitmap_port_adt_elem e = { .id = 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 port; /* wraparound */
u16 port_to;
@@ -139,7 +140,10 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -291,6 +295,9 @@ static struct ip_set_type bitmap_port_type = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index ec8114fae50b..912e5a05b79d 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -101,7 +101,7 @@ load_settype(const char *name)
nfnl_unlock(NFNL_SUBSYS_IPSET);
pr_debug("try to load ip_set_%s\n", name);
if (request_module("ip_set_%s", name) < 0) {
- pr_warning("Can't find ip_set type %s\n", name);
+ pr_warn("Can't find ip_set type %s\n", name);
nfnl_lock(NFNL_SUBSYS_IPSET);
return false;
}
@@ -195,20 +195,19 @@ ip_set_type_register(struct ip_set_type *type)
int ret = 0;
if (type->protocol != IPSET_PROTOCOL) {
- pr_warning("ip_set type %s, family %s, revision %u:%u uses "
- "wrong protocol version %u (want %u)\n",
- type->name, family_name(type->family),
- type->revision_min, type->revision_max,
- type->protocol, IPSET_PROTOCOL);
+ pr_warn("ip_set type %s, family %s, revision %u:%u uses wrong protocol version %u (want %u)\n",
+ type->name, family_name(type->family),
+ type->revision_min, type->revision_max,
+ type->protocol, IPSET_PROTOCOL);
return -EINVAL;
}
ip_set_type_lock();
if (find_set_type(type->name, type->family, type->revision_min)) {
/* Duplicate! */
- pr_warning("ip_set type %s, family %s with revision min %u "
- "already registered!\n", type->name,
- family_name(type->family), type->revision_min);
+ pr_warn("ip_set type %s, family %s with revision min %u already registered!\n",
+ type->name, family_name(type->family),
+ type->revision_min);
ret = -EINVAL;
goto unlock;
}
@@ -228,9 +227,9 @@ ip_set_type_unregister(struct ip_set_type *type)
{
ip_set_type_lock();
if (!find_set_type(type->name, type->family, type->revision_min)) {
- pr_warning("ip_set type %s, family %s with revision min %u "
- "not registered\n", type->name,
- family_name(type->family), type->revision_min);
+ pr_warn("ip_set type %s, family %s with revision min %u not registered\n",
+ type->name, family_name(type->family),
+ type->revision_min);
goto unlock;
}
list_del_rcu(&type->list);
@@ -338,6 +337,12 @@ const struct ip_set_ext_type ip_set_extensions[] = {
.len = sizeof(unsigned long),
.align = __alignof__(unsigned long),
},
+ [IPSET_EXT_ID_SKBINFO] = {
+ .type = IPSET_EXT_SKBINFO,
+ .flag = IPSET_FLAG_WITH_SKBINFO,
+ .len = sizeof(struct ip_set_skbinfo),
+ .align = __alignof__(struct ip_set_skbinfo),
+ },
[IPSET_EXT_ID_COMMENT] = {
.type = IPSET_EXT_COMMENT | IPSET_EXT_DESTROY,
.flag = IPSET_FLAG_WITH_COMMENT,
@@ -383,6 +388,7 @@ int
ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
struct ip_set_ext *ext)
{
+ u64 fullmark;
if (tb[IPSET_ATTR_TIMEOUT]) {
if (!(set->extensions & IPSET_EXT_TIMEOUT))
return -IPSET_ERR_TIMEOUT;
@@ -403,7 +409,25 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
return -IPSET_ERR_COMMENT;
ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]);
}
-
+ if (tb[IPSET_ATTR_SKBMARK]) {
+ if (!(set->extensions & IPSET_EXT_SKBINFO))
+ return -IPSET_ERR_SKBINFO;
+ fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK]));
+ ext->skbmark = fullmark >> 32;
+ ext->skbmarkmask = fullmark & 0xffffffff;
+ }
+ if (tb[IPSET_ATTR_SKBPRIO]) {
+ if (!(set->extensions & IPSET_EXT_SKBINFO))
+ return -IPSET_ERR_SKBINFO;
+ ext->skbprio = be32_to_cpu(nla_get_be32(
+ tb[IPSET_ATTR_SKBPRIO]));
+ }
+ if (tb[IPSET_ATTR_SKBQUEUE]) {
+ if (!(set->extensions & IPSET_EXT_SKBINFO))
+ return -IPSET_ERR_SKBINFO;
+ ext->skbqueue = be16_to_cpu(nla_get_be16(
+ tb[IPSET_ATTR_SKBQUEUE]));
+ }
return 0;
}
EXPORT_SYMBOL_GPL(ip_set_get_extensions);
@@ -478,7 +502,7 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
if (ret == -EAGAIN) {
/* Type requests element to be completed */
- pr_debug("element must be competed, ADD is triggered\n");
+ pr_debug("element must be completed, ADD is triggered\n");
write_lock_bh(&set->lock);
set->variant->kadt(set, skb, par, IPSET_ADD, opt);
write_unlock_bh(&set->lock);
@@ -1398,7 +1422,8 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb);
struct sk_buff *skb2;
struct nlmsgerr *errmsg;
- size_t payload = sizeof(*errmsg) + nlmsg_len(nlh);
+ size_t payload = min(SIZE_MAX,
+ sizeof(*errmsg) + nlmsg_len(nlh));
int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
struct nlattr *cmdattr;
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 61c7fb052802..fee7c64e4dd1 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -565,8 +565,8 @@ retry:
set->name, orig->htable_bits, htable_bits, orig);
if (!htable_bits) {
/* In case we have plenty of memory :-) */
- pr_warning("Cannot increase the hashsize of set %s further\n",
- set->name);
+ pr_warn("Cannot increase the hashsize of set %s further\n",
+ set->name);
return -IPSET_ERR_HASH_FULL;
}
t = ip_set_alloc(sizeof(*t)
@@ -651,8 +651,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
if (h->elements >= h->maxelem) {
if (net_ratelimit())
- pr_warning("Set %s is full, maxelem %u reached\n",
- set->name, h->maxelem);
+ pr_warn("Set %s is full, maxelem %u reached\n",
+ set->name, h->maxelem);
return -IPSET_ERR_HASH_FULL;
}
@@ -720,6 +720,8 @@ reuse_slot:
ip_set_init_counter(ext_counter(data, set), ext);
if (SET_WITH_COMMENT(set))
ip_set_init_comment(ext_comment(data, set), ext);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_init_skbinfo(ext_skbinfo(data, set), ext);
out:
rcu_read_unlock_bh();
@@ -797,6 +799,9 @@ mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
if (SET_WITH_COUNTER(set))
ip_set_update_counter(ext_counter(data, set),
ext, mext, flags);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_get_skbinfo(ext_skbinfo(data, set),
+ ext, mext, flags);
return mtype_do_data_match(data);
}
@@ -998,8 +1003,8 @@ mtype_list(const struct ip_set *set,
nla_put_failure:
nlmsg_trim(skb, incomplete);
if (unlikely(first == cb->args[IPSET_CB_ARG0])) {
- pr_warning("Can't list set %s: one bucket does not fit into "
- "a message. Please report it!\n", set->name);
+ pr_warn("Can't list set %s: one bucket does not fit into a message. Please report it!\n",
+ set->name);
cb->args[IPSET_CB_ARG0] = 0;
return -EMSGSIZE;
}
@@ -1049,8 +1054,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
struct HTYPE *h;
struct htable *t;
+#ifndef IP_SET_PROTO_UNDEF
if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
return -IPSET_ERR_INVALID_FAMILY;
+#endif
#ifdef IP_SET_HASH_WITH_MARKMASK
markmask = 0xffffffff;
@@ -1093,7 +1100,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
if (tb[IPSET_ATTR_MARKMASK]) {
markmask = ntohl(nla_get_u32(tb[IPSET_ATTR_MARKMASK]));
- if ((markmask > 4294967295u) || markmask == 0)
+ if (markmask == 0)
return -IPSET_ERR_INVALID_MARKMASK;
}
#endif
@@ -1132,25 +1139,32 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
rcu_assign_pointer(h->table, t);
set->data = h;
+#ifndef IP_SET_PROTO_UNDEF
if (set->family == NFPROTO_IPV4) {
+#endif
set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
set->dsize = ip_set_elem_len(set, tb,
sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)));
+#ifndef IP_SET_PROTO_UNDEF
} else {
set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
set->dsize = ip_set_elem_len(set, tb,
sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)));
}
+#endif
if (tb[IPSET_ATTR_TIMEOUT]) {
set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+#ifndef IP_SET_PROTO_UNDEF
if (set->family == NFPROTO_IPV4)
+#endif
IPSET_TOKEN(HTYPE, 4_gc_init)(set,
IPSET_TOKEN(HTYPE, 4_gc));
+#ifndef IP_SET_PROTO_UNDEF
else
IPSET_TOKEN(HTYPE, 6_gc_init)(set,
IPSET_TOKEN(HTYPE, 6_gc));
+#endif
}
-
pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
set->name, jhash_size(t->htable_bits),
t->htable_bits, h->maxelem, set->data, t);
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index dd40607f878e..76959d79e9d1 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -26,7 +26,8 @@
#define IPSET_TYPE_REV_MIN 0
/* 1 Counters support */
/* 2 Comments support */
-#define IPSET_TYPE_REV_MAX 3 /* Forceadd support */
+/* 3 Forceadd support */
+#define IPSET_TYPE_REV_MAX 4 /* skbinfo support */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -84,7 +85,7 @@ hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb,
{
const struct hash_ip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ip4_elem e = {};
+ struct hash_ip4_elem e = { 0 };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
__be32 ip;
@@ -103,7 +104,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
{
const struct hash_ip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ip4_elem e = {};
+ struct hash_ip4_elem e = { 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, hosts;
int ret = 0;
@@ -111,7 +112,10 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
if (unlikely(!tb[IPSET_ATTR_IP] ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -222,7 +226,7 @@ hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb,
{
const struct hash_ip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ip6_elem e = {};
+ struct hash_ip6_elem e = { { .all = { 0 } } };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
@@ -239,7 +243,7 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
{
const struct hash_ip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ip6_elem e = {};
+ struct hash_ip6_elem e = { { .all = { 0 } } };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
int ret;
@@ -247,6 +251,9 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_CIDR]))
return -IPSET_ERR_PROTOCOL;
@@ -295,6 +302,9 @@ static struct ip_set_type hash_ip_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index 4eff0a297254..7abf9788cfa8 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -25,7 +25,8 @@
#include <linux/netfilter/ipset/ip_set_hash.h>
#define IPSET_TYPE_REV_MIN 0
-#define IPSET_TYPE_REV_MAX 1 /* Forceadd support */
+/* 1 Forceadd support */
+#define IPSET_TYPE_REV_MAX 2 /* skbinfo support */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Vytas Dauksa <vytas.dauksa@smoothwall.net>");
@@ -113,7 +114,10 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_attr_netorder(tb, IPSET_ATTR_MARK) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -244,6 +248,9 @@ hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_CIDR]))
return -IPSET_ERR_PROTOCOL;
@@ -301,6 +308,9 @@ static struct ip_set_type hash_ipmark_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 7597b82a8b03..dcbcceb9a52f 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -28,7 +28,8 @@
/* 1 SCTP and UDPLITE support added */
/* 2 Counters support added */
/* 3 Comments support added */
-#define IPSET_TYPE_REV_MAX 4 /* Forceadd support added */
+/* 4 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 5 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -94,7 +95,7 @@ hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipport4_elem e = { };
+ struct hash_ipport4_elem e = { .ip = 0 };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
@@ -111,7 +112,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
{
const struct hash_ipport *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipport4_elem e = { };
+ struct hash_ipport4_elem e = { .ip = 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip, ip_to = 0, p = 0, port, port_to;
bool with_ports = false;
@@ -122,7 +123,10 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -258,7 +262,7 @@ hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipport6_elem e = { };
+ struct hash_ipport6_elem e = { .ip = { .all = { 0 } } };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
@@ -275,7 +279,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
{
const struct hash_ipport *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipport6_elem e = { };
+ struct hash_ipport6_elem e = { .ip = { .all = { 0 } } };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 port, port_to;
bool with_ports = false;
@@ -287,6 +291,9 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_CIDR]))
return -IPSET_ERR_PROTOCOL;
@@ -370,6 +377,9 @@ static struct ip_set_type hash_ipport_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 672655ffd573..7ef93fc887a1 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -28,7 +28,8 @@
/* 1 SCTP and UDPLITE support added */
/* 2 Counters support added */
/* 3 Comments support added */
-#define IPSET_TYPE_REV_MAX 4 /* Forceadd support added */
+/* 4 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 5 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -95,7 +96,7 @@ hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipportip4_elem e = { };
+ struct hash_ipportip4_elem e = { .ip = 0 };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
@@ -113,7 +114,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
{
const struct hash_ipportip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipportip4_elem e = { };
+ struct hash_ipportip4_elem e = { .ip = 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip, ip_to = 0, p = 0, port, port_to;
bool with_ports = false;
@@ -124,7 +125,10 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -265,7 +269,7 @@ hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipportip6_elem e = { };
+ struct hash_ipportip6_elem e = { .ip = { .all = { 0 } } };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
@@ -283,7 +287,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
{
const struct hash_ipportip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipportip6_elem e = { };
+ struct hash_ipportip6_elem e = { .ip = { .all = { 0 } } };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 port, port_to;
bool with_ports = false;
@@ -295,6 +299,9 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_CIDR]))
return -IPSET_ERR_PROTOCOL;
@@ -382,6 +389,9 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 7308d84f9277..b6012ad92781 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -30,7 +30,8 @@
/* 3 nomatch flag support added */
/* 4 Counters support added */
/* 5 Comments support added */
-#define IPSET_TYPE_REV_MAX 6 /* Forceadd support added */
+/* 6 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 7 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -179,7 +180,10 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -432,6 +436,9 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_CIDR]))
return -IPSET_ERR_PROTOCOL;
@@ -541,6 +548,9 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c
new file mode 100644
index 000000000000..65690b52a4d5
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_mac.c
@@ -0,0 +1,173 @@
+/* Copyright (C) 2014 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module implementing an IP set type: the hash:mac type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/if_ether.h>
+#include <net/netlink.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+#define IPSET_TYPE_REV_MIN 0
+#define IPSET_TYPE_REV_MAX 0
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+IP_SET_MODULE_DESC("hash:mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_hash:mac");
+
+/* Type specific function prefix */
+#define HTYPE hash_mac
+
+/* Member elements */
+struct hash_mac4_elem {
+ /* Zero valued IP addresses cannot be stored */
+ union {
+ unsigned char ether[ETH_ALEN];
+ __be32 foo[2];
+ };
+};
+
+/* Common functions */
+
+static inline bool
+hash_mac4_data_equal(const struct hash_mac4_elem *e1,
+ const struct hash_mac4_elem *e2,
+ u32 *multi)
+{
+ return ether_addr_equal(e1->ether, e2->ether);
+}
+
+static inline bool
+hash_mac4_data_list(struct sk_buff *skb, const struct hash_mac4_elem *e)
+{
+ return nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether);
+}
+
+static inline void
+hash_mac4_data_next(struct hash_mac4_elem *next,
+ const struct hash_mac4_elem *e)
+{
+}
+
+#define MTYPE hash_mac4
+#define PF 4
+#define HOST_MASK 32
+#define IP_SET_EMIT_CREATE
+#define IP_SET_PROTO_UNDEF
+#include "ip_set_hash_gen.h"
+
+/* Zero valued element is not supported */
+static const unsigned char invalid_ether[ETH_ALEN] = { 0 };
+
+static int
+hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb,
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_mac4_elem e = { { .foo[0] = 0, .foo[1] = 0 } };
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+ /* MAC can be src only */
+ if (!(opt->flags & IPSET_DIM_ONE_SRC))
+ return 0;
+
+ if (skb_mac_header(skb) < skb->head ||
+ (skb_mac_header(skb) + ETH_HLEN) > skb->data)
+ return -EINVAL;
+
+ memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
+ if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0)
+ return -EINVAL;
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_mac4_uadt(struct ip_set *set, struct nlattr *tb[],
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_mac4_elem e = { { .foo[0] = 0, .foo[1] = 0 } };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ int ret;
+
+ if (unlikely(!tb[IPSET_ATTR_ETHER] ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ return -IPSET_ERR_PROTOCOL;
+
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+ ret = ip_set_get_extensions(set, tb, &ext);
+ if (ret)
+ return ret;
+ memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
+ if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0)
+ return -IPSET_ERR_HASH_ELEM;
+
+ return adtfn(set, &e, &ext, &ext, flags);
+}
+
+static struct ip_set_type hash_mac_type __read_mostly = {
+ .name = "hash:mac",
+ .protocol = IPSET_PROTOCOL,
+ .features = IPSET_TYPE_MAC,
+ .dimension = IPSET_DIM_ONE,
+ .family = NFPROTO_UNSPEC,
+ .revision_min = IPSET_TYPE_REV_MIN,
+ .revision_max = IPSET_TYPE_REV_MAX,
+ .create = hash_mac_create,
+ .create_policy = {
+ [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
+ [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
+ [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
+ [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
+ [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
+ },
+ .adt_policy = {
+ [IPSET_ATTR_ETHER] = { .type = NLA_BINARY,
+ .len = ETH_ALEN },
+ [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
+ [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
+ [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
+ },
+ .me = THIS_MODULE,
+};
+
+static int __init
+hash_mac_init(void)
+{
+ return ip_set_type_register(&hash_mac_type);
+}
+
+static void __exit
+hash_mac_fini(void)
+{
+ ip_set_type_unregister(&hash_mac_type);
+}
+
+module_init(hash_mac_init);
+module_exit(hash_mac_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 4c7d495783a3..6b3ac10ac2f1 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -27,7 +27,8 @@
/* 2 nomatch flag support added */
/* 3 Counters support added */
/* 4 Comments support added */
-#define IPSET_TYPE_REV_MAX 5 /* Forceadd support added */
+/* 5 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 6 /* skbinfo mapping support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -150,7 +151,10 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -318,7 +322,10 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -377,6 +384,9 @@ static struct ip_set_type hash_net_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index db2606805b35..35dd35873442 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -28,7 +28,8 @@
/* 2 /0 support added */
/* 3 Counters support added */
/* 4 Comments support added */
-#define IPSET_TYPE_REV_MAX 5 /* Forceadd support added */
+/* 5 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 6 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -236,7 +237,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
#define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC)
if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
const struct nf_bridge_info *nf_bridge = skb->nf_bridge;
if (!nf_bridge)
@@ -281,7 +282,10 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -470,7 +474,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
ip6_netmask(&e.ip, e.cidr);
if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
const struct nf_bridge_info *nf_bridge = skb->nf_bridge;
if (!nf_bridge)
@@ -514,7 +518,10 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -590,6 +597,9 @@ static struct ip_set_type hash_netiface_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index 3e99987e4bf2..da00284b3571 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -24,7 +24,8 @@
#include <linux/netfilter/ipset/ip_set_hash.h>
#define IPSET_TYPE_REV_MIN 0
-#define IPSET_TYPE_REV_MAX 1 /* Forceadd support added */
+/* 1 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 2 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
@@ -171,7 +172,10 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -203,7 +207,7 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
flags |= (IPSET_FLAG_NOMATCH << 16);
}
- if (adt == IPSET_TEST || !(tb[IPSET_ATTR_IP_TO] &&
+ if (adt == IPSET_TEST || !(tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_IP2_TO])) {
e.ip[0] = htonl(ip & ip_set_hostmask(e.cidr[0]));
e.ip[1] = htonl(ip2_from & ip_set_hostmask(e.cidr[1]));
@@ -219,9 +223,10 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
if (ip_to < ip)
swap(ip, ip_to);
- if (ip + UINT_MAX == ip_to)
+ if (unlikely(ip + UINT_MAX == ip_to))
return -IPSET_ERR_HASH_RANGE;
- }
+ } else
+ ip_set_mask_from_to(ip, ip_to, e.cidr[0]);
ip2_to = ip2_from;
if (tb[IPSET_ATTR_IP2_TO]) {
@@ -230,10 +235,10 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
if (ip2_to < ip2_from)
swap(ip2_from, ip2_to);
- if (ip2_from + UINT_MAX == ip2_to)
+ if (unlikely(ip2_from + UINT_MAX == ip2_to))
return -IPSET_ERR_HASH_RANGE;
-
- }
+ } else
+ ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
if (retried)
ip = ntohl(h->next.ip[0]);
@@ -393,7 +398,10 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -461,6 +469,9 @@ static struct ip_set_type hash_netnet_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 1c645fbd09c7..c0ddb58d19dc 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -29,7 +29,8 @@
/* 3 nomatch flag support added */
/* 4 Counters support added */
/* 5 Comments support added */
-#define IPSET_TYPE_REV_MAX 6 /* Forceadd support added */
+/* 6 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 7 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -172,7 +173,10 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -389,7 +393,10 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -489,6 +496,9 @@ static struct ip_set_type hash_netport_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index c0d2ba73f8b2..b8053d675fc3 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -26,7 +26,8 @@
#define IPSET_TYPE_REV_MIN 0
/* 0 Comments support added */
-#define IPSET_TYPE_REV_MAX 1 /* Forceadd support added */
+/* 1 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 2 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
@@ -189,7 +190,10 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -257,7 +261,8 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(ip, ip_to);
if (unlikely(ip + UINT_MAX == ip_to))
return -IPSET_ERR_HASH_RANGE;
- }
+ } else
+ ip_set_mask_from_to(ip, ip_to, e.cidr[0]);
port_to = port = ntohs(e.port);
if (tb[IPSET_ATTR_PORT_TO]) {
@@ -275,7 +280,8 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(ip2_from, ip2_to);
if (unlikely(ip2_from + UINT_MAX == ip2_to))
return -IPSET_ERR_HASH_RANGE;
- }
+ } else
+ ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
if (retried)
ip = ntohl(h->next.ip[0]);
@@ -458,7 +464,10 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -567,6 +576,9 @@ static struct ip_set_type hash_netportnet_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 3e2317f3cf68..f8f682806e36 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -17,7 +17,8 @@
#define IPSET_TYPE_REV_MIN 0
/* 1 Counters support added */
-#define IPSET_TYPE_REV_MAX 2 /* Comments support added */
+/* 2 Comments support added */
+#define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -73,6 +74,10 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
ip_set_update_counter(ext_counter(e, set),
ext, &opt->ext,
cmdflags);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_get_skbinfo(ext_skbinfo(e, set),
+ ext, &opt->ext,
+ cmdflags);
return ret;
}
}
@@ -197,6 +202,8 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d,
ip_set_init_counter(ext_counter(e, set), ext);
if (SET_WITH_COMMENT(set))
ip_set_init_comment(ext_comment(e, set), ext);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_init_skbinfo(ext_skbinfo(e, set), ext);
return 0;
}
@@ -307,6 +314,8 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
ip_set_init_counter(ext_counter(e, set), ext);
if (SET_WITH_COMMENT(set))
ip_set_init_comment(ext_comment(e, set), ext);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_init_skbinfo(ext_skbinfo(e, set), ext);
/* Set is already added to the list */
ip_set_put_byindex(map->net, d->id);
return 0;
@@ -378,7 +387,10 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -597,7 +609,9 @@ init_list_set(struct net *net, struct ip_set *set, u32 size)
struct set_elem *e;
u32 i;
- map = kzalloc(sizeof(*map) + size * set->dsize, GFP_KERNEL);
+ map = kzalloc(sizeof(*map) +
+ min_t(u32, size, IP_SET_LIST_MAX_SIZE) * set->dsize,
+ GFP_KERNEL);
if (!map)
return false;
@@ -665,6 +679,9 @@ static struct ip_set_type list_set_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 0c3b1670b0d1..3b6929dec748 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -152,6 +152,16 @@ config IP_VS_WLC
If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N.
+config IP_VS_FO
+ tristate "weighted failover scheduling"
+ ---help---
+ The weighted failover scheduling algorithm directs network
+ connections to the server with the highest weight that is
+ currently available.
+
+ If you want to compile it in kernel, say Y. To compile it as a
+ module, choose M here. If unsure, say N.
+
config IP_VS_LBLC
tristate "locality-based least-connection scheduling"
---help---
diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile
index 34ee602ddb66..38b2723b2e3d 100644
--- a/net/netfilter/ipvs/Makefile
+++ b/net/netfilter/ipvs/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_IP_VS_RR) += ip_vs_rr.o
obj-$(CONFIG_IP_VS_WRR) += ip_vs_wrr.o
obj-$(CONFIG_IP_VS_LC) += ip_vs_lc.o
obj-$(CONFIG_IP_VS_WLC) += ip_vs_wlc.o
+obj-$(CONFIG_IP_VS_FO) += ip_vs_fo.o
obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o
obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o
obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 610e19c0e13f..b0f7b626b56d 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -27,6 +27,7 @@
#include <linux/interrupt.h>
#include <linux/in.h>
+#include <linux/inet.h>
#include <linux/net.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -77,6 +78,13 @@ static unsigned int ip_vs_conn_rnd __read_mostly;
#define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS)
#define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1)
+/* We need an addrstrlen that works with or without v6 */
+#ifdef CONFIG_IP_VS_IPV6
+#define IP_VS_ADDRSTRLEN INET6_ADDRSTRLEN
+#else
+#define IP_VS_ADDRSTRLEN (8+1)
+#endif
+
struct ip_vs_aligned_lock
{
spinlock_t l;
@@ -488,7 +496,12 @@ static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp)
break;
case IP_VS_CONN_F_TUNNEL:
- cp->packet_xmit = ip_vs_tunnel_xmit;
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->daf == AF_INET6)
+ cp->packet_xmit = ip_vs_tunnel_xmit_v6;
+ else
+#endif
+ cp->packet_xmit = ip_vs_tunnel_xmit;
break;
case IP_VS_CONN_F_DROUTE:
@@ -514,7 +527,10 @@ static inline void ip_vs_bind_xmit_v6(struct ip_vs_conn *cp)
break;
case IP_VS_CONN_F_TUNNEL:
- cp->packet_xmit = ip_vs_tunnel_xmit_v6;
+ if (cp->daf == AF_INET6)
+ cp->packet_xmit = ip_vs_tunnel_xmit_v6;
+ else
+ cp->packet_xmit = ip_vs_tunnel_xmit;
break;
case IP_VS_CONN_F_DROUTE:
@@ -580,7 +596,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
ip_vs_proto_name(cp->protocol),
IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
- IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+ IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
ip_vs_fwd_tag(cp), cp->state,
cp->flags, atomic_read(&cp->refcnt),
atomic_read(&dest->refcnt));
@@ -616,7 +632,13 @@ void ip_vs_try_bind_dest(struct ip_vs_conn *cp)
struct ip_vs_dest *dest;
rcu_read_lock();
- dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
+
+ /* This function is only invoked by the synchronization code. We do
+ * not currently support heterogeneous pools with synchronization,
+ * so we can make the assumption that the svc_af is the same as the
+ * dest_af
+ */
+ dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, cp->af, &cp->daddr,
cp->dport, &cp->vaddr, cp->vport,
cp->protocol, cp->fwmark, cp->flags);
if (dest) {
@@ -671,7 +693,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
ip_vs_proto_name(cp->protocol),
IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
- IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+ IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
ip_vs_fwd_tag(cp), cp->state,
cp->flags, atomic_read(&cp->refcnt),
atomic_read(&dest->refcnt));
@@ -740,7 +762,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
ntohs(ct->cport),
IP_VS_DBG_ADDR(ct->af, &ct->vaddr),
ntohs(ct->vport),
- IP_VS_DBG_ADDR(ct->af, &ct->daddr),
+ IP_VS_DBG_ADDR(ct->daf, &ct->daddr),
ntohs(ct->dport));
/*
@@ -848,7 +870,7 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
* Create a new connection entry and hash it into the ip_vs_conn_tab
*/
struct ip_vs_conn *
-ip_vs_conn_new(const struct ip_vs_conn_param *p,
+ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
const union nf_inet_addr *daddr, __be16 dport, unsigned int flags,
struct ip_vs_dest *dest, __u32 fwmark)
{
@@ -867,6 +889,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
ip_vs_conn_net_set(cp, p->net);
cp->af = p->af;
+ cp->daf = dest_af;
cp->protocol = p->protocol;
ip_vs_addr_set(p->af, &cp->caddr, p->caddr);
cp->cport = p->cport;
@@ -874,7 +897,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
ip_vs_addr_set(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
&cp->vaddr, p->vaddr);
cp->vport = p->vport;
- ip_vs_addr_set(p->af, &cp->daddr, daddr);
+ ip_vs_addr_set(cp->daf, &cp->daddr, daddr);
cp->dport = dport;
cp->flags = flags;
cp->fwmark = fwmark;
@@ -1036,6 +1059,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
struct net *net = seq_file_net(seq);
char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
size_t len = 0;
+ char dbuf[IP_VS_ADDRSTRLEN];
if (!ip_vs_conn_net_eq(cp, net))
return 0;
@@ -1050,24 +1074,32 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
pe_data[len] = '\0';
#ifdef CONFIG_IP_VS_IPV6
+ if (cp->daf == AF_INET6)
+ snprintf(dbuf, sizeof(dbuf), "%pI6", &cp->daddr.in6);
+ else
+#endif
+ snprintf(dbuf, sizeof(dbuf), "%08X",
+ ntohl(cp->daddr.ip));
+
+#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X "
- "%pI6 %04X %-11s %7lu%s\n",
+ "%s %04X %-11s %7lu%s\n",
ip_vs_proto_name(cp->protocol),
&cp->caddr.in6, ntohs(cp->cport),
&cp->vaddr.in6, ntohs(cp->vport),
- &cp->daddr.in6, ntohs(cp->dport),
+ dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
(cp->timer.expires-jiffies)/HZ, pe_data);
else
#endif
seq_printf(seq,
"%-3s %08X %04X %08X %04X"
- " %08X %04X %-11s %7lu%s\n",
+ " %s %04X %-11s %7lu%s\n",
ip_vs_proto_name(cp->protocol),
ntohl(cp->caddr.ip), ntohs(cp->cport),
ntohl(cp->vaddr.ip), ntohs(cp->vport),
- ntohl(cp->daddr.ip), ntohs(cp->dport),
+ dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
(cp->timer.expires-jiffies)/HZ, pe_data);
}
@@ -1105,6 +1137,7 @@ static const char *ip_vs_origin_name(unsigned int flags)
static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
{
+ char dbuf[IP_VS_ADDRSTRLEN];
if (v == SEQ_START_TOKEN)
seq_puts(seq,
@@ -1117,12 +1150,21 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
return 0;
#ifdef CONFIG_IP_VS_IPV6
+ if (cp->daf == AF_INET6)
+ snprintf(dbuf, sizeof(dbuf), "%pI6", &cp->daddr.in6);
+ else
+#endif
+ snprintf(dbuf, sizeof(dbuf), "%08X",
+ ntohl(cp->daddr.ip));
+
+#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
- seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X %pI6 %04X %-11s %-6s %7lu\n",
+ seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X "
+ "%s %04X %-11s %-6s %7lu\n",
ip_vs_proto_name(cp->protocol),
&cp->caddr.in6, ntohs(cp->cport),
&cp->vaddr.in6, ntohs(cp->vport),
- &cp->daddr.in6, ntohs(cp->dport),
+ dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
ip_vs_origin_name(cp->flags),
(cp->timer.expires-jiffies)/HZ);
@@ -1130,11 +1172,11 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
#endif
seq_printf(seq,
"%-3s %08X %04X %08X %04X "
- "%08X %04X %-11s %-6s %7lu\n",
+ "%s %04X %-11s %-6s %7lu\n",
ip_vs_proto_name(cp->protocol),
ntohl(cp->caddr.ip), ntohs(cp->cport),
ntohl(cp->vaddr.ip), ntohs(cp->vport),
- ntohl(cp->daddr.ip), ntohs(cp->dport),
+ dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
ip_vs_origin_name(cp->flags),
(cp->timer.expires-jiffies)/HZ);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index e6836755c45d..990decba1fe4 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -328,7 +328,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
* This adds param.pe_data to the template,
* and thus param.pe_data will be destroyed
* when the template expires */
- ct = ip_vs_conn_new(&param, &dest->addr, dport,
+ ct = ip_vs_conn_new(&param, dest->af, &dest->addr, dport,
IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
if (ct == NULL) {
kfree(param.pe_data);
@@ -357,7 +357,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, &iph->saddr,
src_port, &iph->daddr, dst_port, &param);
- cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark);
+ cp = ip_vs_conn_new(&param, dest->af, &dest->addr, dport, flags, dest,
+ skb->mark);
if (cp == NULL) {
ip_vs_conn_put(ct);
*ignored = -1;
@@ -479,7 +480,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
&iph->saddr, pptr[0], &iph->daddr,
pptr[1], &p);
- cp = ip_vs_conn_new(&p, &dest->addr,
+ cp = ip_vs_conn_new(&p, dest->af, &dest->addr,
dest->port ? dest->port : pptr[1],
flags, dest, skb->mark);
if (!cp) {
@@ -491,9 +492,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
"d:%s:%u conn->flags:%X conn->refcnt:%d\n",
ip_vs_fwd_tag(cp),
- IP_VS_DBG_ADDR(svc->af, &cp->caddr), ntohs(cp->cport),
- IP_VS_DBG_ADDR(svc->af, &cp->vaddr), ntohs(cp->vport),
- IP_VS_DBG_ADDR(svc->af, &cp->daddr), ntohs(cp->dport),
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
+ IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
cp->flags, atomic_read(&cp->refcnt));
ip_vs_conn_stats(cp, svc);
@@ -550,7 +551,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
&iph->saddr, pptr[0],
&iph->daddr, pptr[1], &p);
- cp = ip_vs_conn_new(&p, &daddr, 0,
+ cp = ip_vs_conn_new(&p, svc->af, &daddr, 0,
IP_VS_CONN_F_BYPASS | flags,
NULL, skb->mark);
if (!cp)
@@ -1906,7 +1907,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
{
.hook = ip_vs_local_reply6,
.owner = THIS_MODULE,
- .pf = NFPROTO_IPV4,
+ .pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_NAT_DST + 1,
},
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 581a6584ed0c..ac7ba689efe7 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -574,8 +574,8 @@ bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
* Called under RCU lock.
*/
static struct ip_vs_dest *
-ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
- __be16 dport)
+ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af,
+ const union nf_inet_addr *daddr, __be16 dport)
{
struct ip_vs_dest *dest;
@@ -583,9 +583,9 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
* Find the destination for the given service
*/
list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
- if ((dest->af == svc->af)
- && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
- && (dest->port == dport)) {
+ if ((dest->af == dest_af) &&
+ ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
+ (dest->port == dport)) {
/* HIT */
return dest;
}
@@ -602,7 +602,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
* on the backup.
* Called under RCU lock, no refcnt is returned.
*/
-struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
+struct ip_vs_dest *ip_vs_find_dest(struct net *net, int svc_af, int dest_af,
const union nf_inet_addr *daddr,
__be16 dport,
const union nf_inet_addr *vaddr,
@@ -613,14 +613,14 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
struct ip_vs_service *svc;
__be16 port = dport;
- svc = ip_vs_service_find(net, af, fwmark, protocol, vaddr, vport);
+ svc = ip_vs_service_find(net, svc_af, fwmark, protocol, vaddr, vport);
if (!svc)
return NULL;
if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
port = 0;
- dest = ip_vs_lookup_dest(svc, daddr, port);
+ dest = ip_vs_lookup_dest(svc, dest_af, daddr, port);
if (!dest)
- dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
+ dest = ip_vs_lookup_dest(svc, dest_af, daddr, port ^ dport);
return dest;
}
@@ -657,8 +657,8 @@ static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest)
* scheduling.
*/
static struct ip_vs_dest *
-ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
- __be16 dport)
+ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af,
+ const union nf_inet_addr *daddr, __be16 dport)
{
struct ip_vs_dest *dest;
struct netns_ipvs *ipvs = net_ipvs(svc->net);
@@ -671,11 +671,11 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
"dest->refcnt=%d\n",
dest->vfwmark,
- IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port),
atomic_read(&dest->refcnt));
- if (dest->af == svc->af &&
- ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
+ if (dest->af == dest_af &&
+ ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
dest->port == dport &&
dest->vfwmark == svc->fwmark &&
dest->protocol == svc->protocol &&
@@ -779,6 +779,12 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
struct ip_vs_scheduler *sched;
int conn_flags;
+ /* We cannot modify an address and change the address family */
+ BUG_ON(!add && udest->af != dest->af);
+
+ if (add && udest->af != svc->af)
+ ipvs->mixed_address_family_dests++;
+
/* set the weight and the flags */
atomic_set(&dest->weight, udest->weight);
conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
@@ -816,6 +822,8 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
dest->u_threshold = udest->u_threshold;
dest->l_threshold = udest->l_threshold;
+ dest->af = udest->af;
+
spin_lock_bh(&dest->dst_lock);
__ip_vs_dst_cache_reset(dest);
spin_unlock_bh(&dest->dst_lock);
@@ -847,7 +855,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
EnterFunction(2);
#ifdef CONFIG_IP_VS_IPV6
- if (svc->af == AF_INET6) {
+ if (udest->af == AF_INET6) {
atype = ipv6_addr_type(&udest->addr.in6);
if ((!(atype & IPV6_ADDR_UNICAST) ||
atype & IPV6_ADDR_LINKLOCAL) &&
@@ -875,12 +883,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
u64_stats_init(&ip_vs_dest_stats->syncp);
}
- dest->af = svc->af;
+ dest->af = udest->af;
dest->protocol = svc->protocol;
dest->vaddr = svc->addr;
dest->vport = svc->port;
dest->vfwmark = svc->fwmark;
- ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
+ ip_vs_addr_copy(udest->af, &dest->addr, &udest->addr);
dest->port = udest->port;
atomic_set(&dest->activeconns, 0);
@@ -928,11 +936,11 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
return -ERANGE;
}
- ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
+ ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
/* We use function that requires RCU lock */
rcu_read_lock();
- dest = ip_vs_lookup_dest(svc, &daddr, dport);
+ dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
rcu_read_unlock();
if (dest != NULL) {
@@ -944,12 +952,12 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
* Check if the dest already exists in the trash and
* is from the same service
*/
- dest = ip_vs_trash_get_dest(svc, &daddr, dport);
+ dest = ip_vs_trash_get_dest(svc, udest->af, &daddr, dport);
if (dest != NULL) {
IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
"dest->refcnt=%d, service %u/%s:%u\n",
- IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
+ IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport),
atomic_read(&dest->refcnt),
dest->vfwmark,
IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
@@ -992,11 +1000,11 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
return -ERANGE;
}
- ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
+ ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
/* We use function that requires RCU lock */
rcu_read_lock();
- dest = ip_vs_lookup_dest(svc, &daddr, dport);
+ dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
rcu_read_unlock();
if (dest == NULL) {
@@ -1055,6 +1063,9 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
list_del_rcu(&dest->n_list);
svc->num_dests--;
+ if (dest->af != svc->af)
+ net_ipvs(svc->net)->mixed_address_family_dests--;
+
if (svcupd) {
struct ip_vs_scheduler *sched;
@@ -1078,7 +1089,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
/* We use function that requires RCU lock */
rcu_read_lock();
- dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
+ dest = ip_vs_lookup_dest(svc, udest->af, &udest->addr, dport);
rcu_read_unlock();
if (dest == NULL) {
@@ -1807,92 +1818,6 @@ static struct ctl_table vs_vars[] = {
.proc_handler = proc_dointvec,
},
#endif
-#if 0
- {
- .procname = "timeout_established",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "timeout_synsent",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "timeout_synrecv",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "timeout_finwait",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "timeout_timewait",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "timeout_close",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "timeout_closewait",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "timeout_lastack",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "timeout_listen",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "timeout_synack",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "timeout_udp",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "timeout_icmp",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
-#endif
{ }
};
@@ -2265,29 +2190,41 @@ static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
return 0;
}
+#define CMDID(cmd) (cmd - IP_VS_BASE_CTL)
+
+struct ip_vs_svcdest_user {
+ struct ip_vs_service_user s;
+ struct ip_vs_dest_user d;
+};
-#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
-#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
-#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
- sizeof(struct ip_vs_dest_user))
-#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
-#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
-#define MAX_ARG_LEN SVCDEST_ARG_LEN
-
-static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
- [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
- [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
- [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
- [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
- [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
- [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
- [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
- [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
- [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
- [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
- [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
+static const unsigned char set_arglen[CMDID(IP_VS_SO_SET_MAX) + 1] = {
+ [CMDID(IP_VS_SO_SET_ADD)] = sizeof(struct ip_vs_service_user),
+ [CMDID(IP_VS_SO_SET_EDIT)] = sizeof(struct ip_vs_service_user),
+ [CMDID(IP_VS_SO_SET_DEL)] = sizeof(struct ip_vs_service_user),
+ [CMDID(IP_VS_SO_SET_ADDDEST)] = sizeof(struct ip_vs_svcdest_user),
+ [CMDID(IP_VS_SO_SET_DELDEST)] = sizeof(struct ip_vs_svcdest_user),
+ [CMDID(IP_VS_SO_SET_EDITDEST)] = sizeof(struct ip_vs_svcdest_user),
+ [CMDID(IP_VS_SO_SET_TIMEOUT)] = sizeof(struct ip_vs_timeout_user),
+ [CMDID(IP_VS_SO_SET_STARTDAEMON)] = sizeof(struct ip_vs_daemon_user),
+ [CMDID(IP_VS_SO_SET_STOPDAEMON)] = sizeof(struct ip_vs_daemon_user),
+ [CMDID(IP_VS_SO_SET_ZERO)] = sizeof(struct ip_vs_service_user),
};
+union ip_vs_set_arglen {
+ struct ip_vs_service_user field_IP_VS_SO_SET_ADD;
+ struct ip_vs_service_user field_IP_VS_SO_SET_EDIT;
+ struct ip_vs_service_user field_IP_VS_SO_SET_DEL;
+ struct ip_vs_svcdest_user field_IP_VS_SO_SET_ADDDEST;
+ struct ip_vs_svcdest_user field_IP_VS_SO_SET_DELDEST;
+ struct ip_vs_svcdest_user field_IP_VS_SO_SET_EDITDEST;
+ struct ip_vs_timeout_user field_IP_VS_SO_SET_TIMEOUT;
+ struct ip_vs_daemon_user field_IP_VS_SO_SET_STARTDAEMON;
+ struct ip_vs_daemon_user field_IP_VS_SO_SET_STOPDAEMON;
+ struct ip_vs_service_user field_IP_VS_SO_SET_ZERO;
+};
+
+#define MAX_SET_ARGLEN sizeof(union ip_vs_set_arglen)
+
static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
struct ip_vs_service_user *usvc_compat)
{
@@ -2318,6 +2255,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
udest->weight = udest_compat->weight;
udest->u_threshold = udest_compat->u_threshold;
udest->l_threshold = udest_compat->l_threshold;
+ udest->af = AF_INET;
}
static int
@@ -2325,7 +2263,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
{
struct net *net = sock_net(sk);
int ret;
- unsigned char arg[MAX_ARG_LEN];
+ unsigned char arg[MAX_SET_ARGLEN];
struct ip_vs_service_user *usvc_compat;
struct ip_vs_service_user_kern usvc;
struct ip_vs_service *svc;
@@ -2333,16 +2271,15 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
struct ip_vs_dest_user_kern udest;
struct netns_ipvs *ipvs = net_ipvs(net);
+ BUILD_BUG_ON(sizeof(arg) > 255);
if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
return -EINVAL;
- if (len < 0 || len > MAX_ARG_LEN)
- return -EINVAL;
- if (len != set_arglen[SET_CMDID(cmd)]) {
- pr_err("set_ctl: len %u != %u\n",
- len, set_arglen[SET_CMDID(cmd)]);
+ if (len != set_arglen[CMDID(cmd)]) {
+ IP_VS_DBG(1, "set_ctl: len %u != %u\n",
+ len, set_arglen[CMDID(cmd)]);
return -EINVAL;
}
@@ -2357,10 +2294,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
cmd == IP_VS_SO_SET_STOPDAEMON) {
struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
- if (mutex_lock_interruptible(&ipvs->sync_mutex)) {
- ret = -ERESTARTSYS;
- goto out_dec;
- }
+ mutex_lock(&ipvs->sync_mutex);
if (cmd == IP_VS_SO_SET_STARTDAEMON)
ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
dm->syncid);
@@ -2370,11 +2304,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
goto out_dec;
}
- if (mutex_lock_interruptible(&__ip_vs_mutex)) {
- ret = -ERESTARTSYS;
- goto out_dec;
- }
-
+ mutex_lock(&__ip_vs_mutex);
if (cmd == IP_VS_SO_SET_FLUSH) {
/* Flush the virtual service */
ret = ip_vs_flush(net, false);
@@ -2562,6 +2492,12 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
if (count >= get->num_dests)
break;
+ /* Cannot expose heterogeneous members via sockopt
+ * interface
+ */
+ if (dest->af != svc->af)
+ continue;
+
entry.addr = dest->addr.ip;
entry.port = dest->port;
entry.conn_flags = atomic_read(&dest->conn_flags);
@@ -2605,51 +2541,51 @@ __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
#endif
}
+static const unsigned char get_arglen[CMDID(IP_VS_SO_GET_MAX) + 1] = {
+ [CMDID(IP_VS_SO_GET_VERSION)] = 64,
+ [CMDID(IP_VS_SO_GET_INFO)] = sizeof(struct ip_vs_getinfo),
+ [CMDID(IP_VS_SO_GET_SERVICES)] = sizeof(struct ip_vs_get_services),
+ [CMDID(IP_VS_SO_GET_SERVICE)] = sizeof(struct ip_vs_service_entry),
+ [CMDID(IP_VS_SO_GET_DESTS)] = sizeof(struct ip_vs_get_dests),
+ [CMDID(IP_VS_SO_GET_TIMEOUT)] = sizeof(struct ip_vs_timeout_user),
+ [CMDID(IP_VS_SO_GET_DAEMON)] = 2 * sizeof(struct ip_vs_daemon_user),
+};
-#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
-#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
-#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
-#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
-#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
-#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
-#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
-
-static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
- [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
- [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
- [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
- [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
- [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
- [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
- [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
+union ip_vs_get_arglen {
+ char field_IP_VS_SO_GET_VERSION[64];
+ struct ip_vs_getinfo field_IP_VS_SO_GET_INFO;
+ struct ip_vs_get_services field_IP_VS_SO_GET_SERVICES;
+ struct ip_vs_service_entry field_IP_VS_SO_GET_SERVICE;
+ struct ip_vs_get_dests field_IP_VS_SO_GET_DESTS;
+ struct ip_vs_timeout_user field_IP_VS_SO_GET_TIMEOUT;
+ struct ip_vs_daemon_user field_IP_VS_SO_GET_DAEMON[2];
};
+#define MAX_GET_ARGLEN sizeof(union ip_vs_get_arglen)
+
static int
do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
{
- unsigned char arg[128];
+ unsigned char arg[MAX_GET_ARGLEN];
int ret = 0;
unsigned int copylen;
struct net *net = sock_net(sk);
struct netns_ipvs *ipvs = net_ipvs(net);
BUG_ON(!net);
+ BUILD_BUG_ON(sizeof(arg) > 255);
if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
return -EINVAL;
- if (*len < get_arglen[GET_CMDID(cmd)]) {
- pr_err("get_ctl: len %u < %u\n",
- *len, get_arglen[GET_CMDID(cmd)]);
+ copylen = get_arglen[CMDID(cmd)];
+ if (*len < (int) copylen) {
+ IP_VS_DBG(1, "get_ctl: len %d < %u\n", *len, copylen);
return -EINVAL;
}
- copylen = get_arglen[GET_CMDID(cmd)];
- if (copylen > 128)
- return -EINVAL;
-
if (copy_from_user(arg, user, copylen) != 0)
return -EFAULT;
/*
@@ -2659,9 +2595,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
struct ip_vs_daemon_user d[2];
memset(&d, 0, sizeof(d));
- if (mutex_lock_interruptible(&ipvs->sync_mutex))
- return -ERESTARTSYS;
-
+ mutex_lock(&ipvs->sync_mutex);
if (ipvs->sync_state & IP_VS_STATE_MASTER) {
d[0].state = IP_VS_STATE_MASTER;
strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
@@ -2680,9 +2614,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
return ret;
}
- if (mutex_lock_interruptible(&__ip_vs_mutex))
- return -ERESTARTSYS;
-
+ mutex_lock(&__ip_vs_mutex);
switch (cmd) {
case IP_VS_SO_GET_VERSION:
{
@@ -2863,6 +2795,7 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
[IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
[IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
[IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
+ [IPVS_DEST_ATTR_ADDR_FAMILY] = { .type = NLA_U16 },
};
static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
@@ -3118,7 +3051,8 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
atomic_read(&dest->inactconns)) ||
nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
- atomic_read(&dest->persistconns)))
+ atomic_read(&dest->persistconns)) ||
+ nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af))
goto nla_put_failure;
if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
goto nla_put_failure;
@@ -3199,6 +3133,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
{
struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
struct nlattr *nla_addr, *nla_port;
+ struct nlattr *nla_addr_family;
/* Parse mandatory identifying destination fields first */
if (nla == NULL ||
@@ -3207,6 +3142,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
nla_port = attrs[IPVS_DEST_ATTR_PORT];
+ nla_addr_family = attrs[IPVS_DEST_ATTR_ADDR_FAMILY];
if (!(nla_addr && nla_port))
return -EINVAL;
@@ -3216,6 +3152,11 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
udest->port = nla_get_be16(nla_port);
+ if (nla_addr_family)
+ udest->af = nla_get_u16(nla_addr_family);
+ else
+ udest->af = 0;
+
/* If a full entry was requested, check for the additional fields */
if (full_entry) {
struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
@@ -3320,6 +3261,12 @@ static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
return -EINVAL;
+ /* The synchronization protocol is incompatible with mixed family
+ * services
+ */
+ if (net_ipvs(net)->mixed_address_family_dests > 0)
+ return -EINVAL;
+
return start_sync_thread(net,
nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
@@ -3443,6 +3390,35 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
need_full_dest);
if (ret)
goto out;
+
+ /* Old protocols did not allow the user to specify address
+ * family, so we set it to zero instead. We also didn't
+ * allow heterogeneous pools in the old code, so it's safe
+ * to assume that this will have the same address family as
+ * the service.
+ */
+ if (udest.af == 0)
+ udest.af = svc->af;
+
+ if (udest.af != svc->af) {
+ /* The synchronization protocol is incompatible
+ * with mixed family services
+ */
+ if (net_ipvs(net)->sync_state) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Which connection types do we support? */
+ switch (udest.conn_flags) {
+ case IP_VS_CONN_F_TUNNEL:
+ /* We are able to forward this */
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+ }
}
switch (cmd) {
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index c3b84546ea9e..6be5c538b71e 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -234,7 +234,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n",
IP_VS_DBG_ADDR(svc->af, &iph->daddr),
- IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port));
return dest;
diff --git a/net/netfilter/ipvs/ip_vs_fo.c b/net/netfilter/ipvs/ip_vs_fo.c
new file mode 100644
index 000000000000..e09874d02938
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_fo.c
@@ -0,0 +1,79 @@
+/*
+ * IPVS: Weighted Fail Over module
+ *
+ * Authors: Kenny Mathis <kmathis@chokepoint.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ * Kenny Mathis : added initial functionality based on weight
+ *
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+/* Weighted Fail Over Module */
+static struct ip_vs_dest *
+ip_vs_fo_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+ struct ip_vs_iphdr *iph)
+{
+ struct ip_vs_dest *dest, *hweight = NULL;
+ int hw = 0; /* Track highest weight */
+
+ IP_VS_DBG(6, "ip_vs_fo_schedule(): Scheduling...\n");
+
+ /* Basic failover functionality
+ * Find virtual server with highest weight and send it traffic
+ */
+ list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
+ if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+ atomic_read(&dest->weight) > hw) {
+ hweight = dest;
+ hw = atomic_read(&dest->weight);
+ }
+ }
+
+ if (hweight) {
+ IP_VS_DBG_BUF(6, "FO: server %s:%u activeconns %d weight %d\n",
+ IP_VS_DBG_ADDR(hweight->af, &hweight->addr),
+ ntohs(hweight->port),
+ atomic_read(&hweight->activeconns),
+ atomic_read(&hweight->weight));
+ return hweight;
+ }
+
+ ip_vs_scheduler_err(svc, "no destination available");
+ return NULL;
+}
+
+static struct ip_vs_scheduler ip_vs_fo_scheduler = {
+ .name = "fo",
+ .refcnt = ATOMIC_INIT(0),
+ .module = THIS_MODULE,
+ .n_list = LIST_HEAD_INIT(ip_vs_fo_scheduler.n_list),
+ .schedule = ip_vs_fo_schedule,
+};
+
+static int __init ip_vs_fo_init(void)
+{
+ return register_ip_vs_scheduler(&ip_vs_fo_scheduler);
+}
+
+static void __exit ip_vs_fo_cleanup(void)
+{
+ unregister_ip_vs_scheduler(&ip_vs_fo_scheduler);
+ synchronize_rcu();
+}
+
+module_init(ip_vs_fo_init);
+module_exit(ip_vs_fo_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 77c173282f38..a64fa15790e5 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -233,7 +233,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
ip_vs_conn_fill_param(ip_vs_conn_net(cp),
AF_INET, IPPROTO_TCP, &cp->caddr,
0, &cp->vaddr, port, &p);
- n_cp = ip_vs_conn_new(&p, &from, port,
+ /* As above, this is ipv4 only */
+ n_cp = ip_vs_conn_new(&p, AF_INET, &from, port,
IP_VS_CONN_F_NO_CPORT |
IP_VS_CONN_F_NFCT,
cp->dest, skb->mark);
@@ -396,7 +397,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
htons(ntohs(cp->vport)-1), &p);
n_cp = ip_vs_conn_in_get(&p);
if (!n_cp) {
- n_cp = ip_vs_conn_new(&p, &cp->daddr,
+ /* This is ipv4 only */
+ n_cp = ip_vs_conn_new(&p, AF_INET, &cp->daddr,
htons(ntohs(cp->dport)-1),
IP_VS_CONN_F_NFCT, cp->dest,
skb->mark);
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 547ff33c1efd..127f14046c51 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -199,11 +199,11 @@ ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl,
*/
static inline struct ip_vs_lblc_entry *
ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
- struct ip_vs_dest *dest)
+ u16 af, struct ip_vs_dest *dest)
{
struct ip_vs_lblc_entry *en;
- en = ip_vs_lblc_get(dest->af, tbl, daddr);
+ en = ip_vs_lblc_get(af, tbl, daddr);
if (en) {
if (en->dest == dest)
return en;
@@ -213,8 +213,8 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
if (!en)
return NULL;
- en->af = dest->af;
- ip_vs_addr_copy(dest->af, &en->addr, daddr);
+ en->af = af;
+ ip_vs_addr_copy(af, &en->addr, daddr);
en->lastuse = jiffies;
ip_vs_dest_hold(dest);
@@ -521,13 +521,13 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
/* If we fail to create a cache entry, we'll just use the valid dest */
spin_lock_bh(&svc->sched_lock);
if (!tbl->dead)
- ip_vs_lblc_new(tbl, &iph->daddr, dest);
+ ip_vs_lblc_new(tbl, &iph->daddr, svc->af, dest);
spin_unlock_bh(&svc->sched_lock);
out:
IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n",
IP_VS_DBG_ADDR(svc->af, &iph->daddr),
- IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port));
+ IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port));
return dest;
}
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 3f21a2f47de1..2229d2d8bbe0 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -362,18 +362,18 @@ ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,
*/
static inline struct ip_vs_lblcr_entry *
ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
- struct ip_vs_dest *dest)
+ u16 af, struct ip_vs_dest *dest)
{
struct ip_vs_lblcr_entry *en;
- en = ip_vs_lblcr_get(dest->af, tbl, daddr);
+ en = ip_vs_lblcr_get(af, tbl, daddr);
if (!en) {
en = kmalloc(sizeof(*en), GFP_ATOMIC);
if (!en)
return NULL;
- en->af = dest->af;
- ip_vs_addr_copy(dest->af, &en->addr, daddr);
+ en->af = af;
+ ip_vs_addr_copy(af, &en->addr, daddr);
en->lastuse = jiffies;
/* initialize its dest set */
@@ -706,13 +706,13 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
/* If we fail to create a cache entry, we'll just use the valid dest */
spin_lock_bh(&svc->sched_lock);
if (!tbl->dead)
- ip_vs_lblcr_new(tbl, &iph->daddr, dest);
+ ip_vs_lblcr_new(tbl, &iph->daddr, svc->af, dest);
spin_unlock_bh(&svc->sched_lock);
out:
IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n",
IP_VS_DBG_ADDR(svc->af, &iph->daddr),
- IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port));
+ IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port));
return dest;
}
diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
index 2bdcb1cf2127..19a0769a989a 100644
--- a/net/netfilter/ipvs/ip_vs_lc.c
+++ b/net/netfilter/ipvs/ip_vs_lc.c
@@ -59,7 +59,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
else
IP_VS_DBG_BUF(6, "LC: server %s:%u activeconns %d "
"inactconns %d\n",
- IP_VS_DBG_ADDR(svc->af, &least->addr),
+ IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->inactconns));
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index 961a6de9bb29..a8b63401e773 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -107,7 +107,8 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
out:
IP_VS_DBG_BUF(6, "NQ: server %s:%u "
"activeconns %d refcnt %d weight %d overhead %d\n",
- IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+ IP_VS_DBG_ADDR(least->af, &least->addr),
+ ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->refcnt),
atomic_read(&least->weight), loh);
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 2f7ea7564044..5b84c0b56642 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -432,7 +432,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
pd->pp->name,
((direction == IP_VS_DIR_OUTPUT) ?
"output " : "input "),
- IP_VS_DBG_ADDR(cp->af, &cp->daddr),
+ IP_VS_DBG_ADDR(cp->daf, &cp->daddr),
ntohs(cp->dport),
IP_VS_DBG_ADDR(cp->af, &cp->caddr),
ntohs(cp->cport),
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index e3a697234a98..8e92beb0cca9 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -510,7 +510,7 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
th->fin ? 'F' : '.',
th->ack ? 'A' : '.',
th->rst ? 'R' : '.',
- IP_VS_DBG_ADDR(cp->af, &cp->daddr),
+ IP_VS_DBG_ADDR(cp->daf, &cp->daddr),
ntohs(cp->dport),
IP_VS_DBG_ADDR(cp->af, &cp->caddr),
ntohs(cp->cport),
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
index 176b87c35e34..58bacfc461ee 100644
--- a/net/netfilter/ipvs/ip_vs_rr.c
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -95,7 +95,7 @@ stop:
spin_unlock_bh(&svc->sched_lock);
IP_VS_DBG_BUF(6, "RR: server %s:%u "
"activeconns %d refcnt %d weight %d\n",
- IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
+ IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
atomic_read(&dest->activeconns),
atomic_read(&dest->refcnt), atomic_read(&dest->weight));
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index e446b9fa7424..f8e2d00f528b 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -108,7 +108,8 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
IP_VS_DBG_BUF(6, "SED: server %s:%u "
"activeconns %d refcnt %d weight %d overhead %d\n",
- IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+ IP_VS_DBG_ADDR(least->af, &least->addr),
+ ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->refcnt),
atomic_read(&least->weight), loh);
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index cc65b2f42cd4..98a13433b68c 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -138,7 +138,7 @@ ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
return dest;
IP_VS_DBG_BUF(6, "SH: selected unavailable server %s:%d, reselecting",
- IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port));
+ IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port));
/* if the original dest is unavailable, loop around the table
* starting from ihash to find a new dest
@@ -153,7 +153,7 @@ ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
return dest;
IP_VS_DBG_BUF(6, "SH: selected unavailable "
"server %s:%d (offset %d), reselecting",
- IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port), roffset);
}
@@ -192,7 +192,7 @@ ip_vs_sh_reassign(struct ip_vs_sh_state *s, struct ip_vs_service *svc)
RCU_INIT_POINTER(b->dest, dest);
IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n",
- i, IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ i, IP_VS_DBG_ADDR(dest->af, &dest->addr),
atomic_read(&dest->weight));
/* Don't move to next dest until filling weight */
@@ -342,7 +342,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n",
IP_VS_DBG_ADDR(svc->af, &iph->saddr),
- IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port));
return dest;
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index db801263ee9f..7162c86fd50d 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -880,14 +880,20 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
* but still handled.
*/
rcu_read_lock();
- dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
- param->vport, protocol, fwmark, flags);
+ /* This function is only invoked by the synchronization
+ * code. We do not currently support heterogeneous pools
+ * with synchronization, so we can make the assumption that
+ * the svc_af is the same as the dest_af
+ */
+ dest = ip_vs_find_dest(net, type, type, daddr, dport,
+ param->vaddr, param->vport, protocol,
+ fwmark, flags);
- cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
+ cp = ip_vs_conn_new(param, type, daddr, dport, flags, dest,
+ fwmark);
rcu_read_unlock();
if (!cp) {
- if (param->pe_data)
- kfree(param->pe_data);
+ kfree(param->pe_data);
IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
return;
}
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index b5b4650d50a9..6b366fd90554 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -80,7 +80,8 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
IP_VS_DBG_BUF(6, "WLC: server %s:%u "
"activeconns %d refcnt %d weight %d overhead %d\n",
- IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+ IP_VS_DBG_ADDR(least->af, &least->addr),
+ ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->refcnt),
atomic_read(&least->weight), loh);
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index 0546cd572d6b..17e6d4406ca7 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -216,7 +216,7 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
found:
IP_VS_DBG_BUF(6, "WRR: server %s:%u "
"activeconns %d refcnt %d weight %d\n",
- IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
+ IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
atomic_read(&dest->activeconns),
atomic_read(&dest->refcnt),
atomic_read(&dest->weight));
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 73ba1cc7a88d..91f17c1eb8a2 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -38,6 +38,7 @@
#include <net/route.h> /* for ip_route_output */
#include <net/ipv6.h>
#include <net/ip6_route.h>
+#include <net/ip_tunnels.h>
#include <net/addrconf.h>
#include <linux/icmpv6.h>
#include <linux/netfilter.h>
@@ -156,18 +157,113 @@ retry:
return rt;
}
+#ifdef CONFIG_IP_VS_IPV6
+static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
+{
+ return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK;
+}
+#endif
+
+static inline bool crosses_local_route_boundary(int skb_af, struct sk_buff *skb,
+ int rt_mode,
+ bool new_rt_is_local)
+{
+ bool rt_mode_allow_local = !!(rt_mode & IP_VS_RT_MODE_LOCAL);
+ bool rt_mode_allow_non_local = !!(rt_mode & IP_VS_RT_MODE_LOCAL);
+ bool rt_mode_allow_redirect = !!(rt_mode & IP_VS_RT_MODE_RDR);
+ bool source_is_loopback;
+ bool old_rt_is_local;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (skb_af == AF_INET6) {
+ int addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr);
+
+ source_is_loopback =
+ (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
+ (addr_type & IPV6_ADDR_LOOPBACK);
+ old_rt_is_local = __ip_vs_is_local_route6(
+ (struct rt6_info *)skb_dst(skb));
+ } else
+#endif
+ {
+ source_is_loopback = ipv4_is_loopback(ip_hdr(skb)->saddr);
+ old_rt_is_local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
+ }
+
+ if (unlikely(new_rt_is_local)) {
+ if (!rt_mode_allow_local)
+ return true;
+ if (!rt_mode_allow_redirect && !old_rt_is_local)
+ return true;
+ } else {
+ if (!rt_mode_allow_non_local)
+ return true;
+ if (source_is_loopback)
+ return true;
+ }
+ return false;
+}
+
+static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu)
+{
+ struct sock *sk = skb->sk;
+ struct rtable *ort = skb_rtable(skb);
+
+ if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
+ ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
+}
+
+static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
+ struct ip_vs_iphdr *ipvsh,
+ struct sk_buff *skb, int mtu)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (skb_af == AF_INET6) {
+ struct net *net = dev_net(skb_dst(skb)->dev);
+
+ if (unlikely(__mtu_check_toobig_v6(skb, mtu))) {
+ if (!skb->dev)
+ skb->dev = net->loopback_dev;
+ /* only send ICMP too big on first fragment */
+ if (!ipvsh->fragoffs)
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ IP_VS_DBG(1, "frag needed for %pI6c\n",
+ &ipv6_hdr(skb)->saddr);
+ return false;
+ }
+ } else
+#endif
+ {
+ struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+
+ /* If we're going to tunnel the packet and pmtu discovery
+ * is disabled, we'll just fragment it anyway
+ */
+ if ((rt_mode & IP_VS_RT_MODE_TUNNEL) && !sysctl_pmtu_disc(ipvs))
+ return true;
+
+ if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) &&
+ skb->len > mtu && !skb_is_gso(skb))) {
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
+ IP_VS_DBG(1, "frag needed for %pI4\n",
+ &ip_hdr(skb)->saddr);
+ return false;
+ }
+ }
+
+ return true;
+}
+
/* Get route to destination or remote server */
static int
-__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
- __be32 daddr, int rt_mode, __be32 *ret_saddr)
+__ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
+ __be32 daddr, int rt_mode, __be32 *ret_saddr,
+ struct ip_vs_iphdr *ipvsh)
{
struct net *net = dev_net(skb_dst(skb)->dev);
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_dest_dst *dest_dst;
struct rtable *rt; /* Route to the other host */
- struct rtable *ort; /* Original route */
- struct iphdr *iph;
- __be16 df;
int mtu;
int local, noref = 1;
@@ -217,30 +313,14 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
}
local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0;
- if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
- rt_mode)) {
- IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
- (rt->rt_flags & RTCF_LOCAL) ?
- "local":"non-local", &daddr);
+ if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode,
+ local))) {
+ IP_VS_DBG_RL("We are crossing local and non-local addresses"
+ " daddr=%pI4\n", &dest->addr.ip);
goto err_put;
}
- iph = ip_hdr(skb);
- if (likely(!local)) {
- if (unlikely(ipv4_is_loopback(iph->saddr))) {
- IP_VS_DBG_RL("Stopping traffic from loopback address "
- "%pI4 to non-local address, dest: %pI4\n",
- &iph->saddr, &daddr);
- goto err_put;
- }
- } else {
- ort = skb_rtable(skb);
- if (!(rt_mode & IP_VS_RT_MODE_RDR) &&
- !(ort->rt_flags & RTCF_LOCAL)) {
- IP_VS_DBG_RL("Redirect from non-local address %pI4 to "
- "local requires NAT method, dest: %pI4\n",
- &iph->daddr, &daddr);
- goto err_put;
- }
+
+ if (unlikely(local)) {
/* skb to local stack, preserve old route */
if (!noref)
ip_rt_put(rt);
@@ -249,28 +329,17 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) {
mtu = dst_mtu(&rt->dst);
- df = iph->frag_off & htons(IP_DF);
} else {
- struct sock *sk = skb->sk;
-
mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
if (mtu < 68) {
IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
goto err_put;
}
- ort = skb_rtable(skb);
- if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
- ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
- /* MTU check allowed? */
- df = sysctl_pmtu_disc(ipvs) ? iph->frag_off & htons(IP_DF) : 0;
+ maybe_update_pmtu(skb_af, skb, mtu);
}
- /* MTU checking */
- if (unlikely(df && skb->len > mtu && !skb_is_gso(skb))) {
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
- IP_VS_DBG(1, "frag needed for %pI4\n", &iph->saddr);
+ if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu))
goto err_put;
- }
skb_dst_drop(skb);
if (noref) {
@@ -294,12 +363,6 @@ err_unreach:
}
#ifdef CONFIG_IP_VS_IPV6
-
-static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
-{
- return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK;
-}
-
static struct dst_entry *
__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
struct in6_addr *ret_saddr, int do_xfrm)
@@ -338,14 +401,13 @@ out_err:
* Get route to destination or remote server
*/
static int
-__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
+__ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
struct in6_addr *daddr, struct in6_addr *ret_saddr,
struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode)
{
struct net *net = dev_net(skb_dst(skb)->dev);
struct ip_vs_dest_dst *dest_dst;
struct rt6_info *rt; /* Route to the other host */
- struct rt6_info *ort; /* Original route */
struct dst_entry *dst;
int mtu;
int local, noref = 1;
@@ -392,32 +454,15 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
}
local = __ip_vs_is_local_route6(rt);
- if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
- rt_mode)) {
- IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n",
- local ? "local":"non-local", daddr);
+
+ if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode,
+ local))) {
+ IP_VS_DBG_RL("We are crossing local and non-local addresses"
+ " daddr=%pI6\n", &dest->addr.in6);
goto err_put;
}
- if (likely(!local)) {
- if (unlikely((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
- ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
- IPV6_ADDR_LOOPBACK)) {
- IP_VS_DBG_RL("Stopping traffic from loopback address "
- "%pI6c to non-local address, "
- "dest: %pI6c\n",
- &ipv6_hdr(skb)->saddr, daddr);
- goto err_put;
- }
- } else {
- ort = (struct rt6_info *) skb_dst(skb);
- if (!(rt_mode & IP_VS_RT_MODE_RDR) &&
- !__ip_vs_is_local_route6(ort)) {
- IP_VS_DBG_RL("Redirect from non-local address %pI6c "
- "to local requires NAT method, "
- "dest: %pI6c\n",
- &ipv6_hdr(skb)->daddr, daddr);
- goto err_put;
- }
+
+ if (unlikely(local)) {
/* skb to local stack, preserve old route */
if (!noref)
dst_release(&rt->dst);
@@ -428,28 +473,17 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL)))
mtu = dst_mtu(&rt->dst);
else {
- struct sock *sk = skb->sk;
-
mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
if (mtu < IPV6_MIN_MTU) {
IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
IPV6_MIN_MTU);
goto err_put;
}
- ort = (struct rt6_info *) skb_dst(skb);
- if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
- ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
+ maybe_update_pmtu(skb_af, skb, mtu);
}
- if (unlikely(__mtu_check_toobig_v6(skb, mtu))) {
- if (!skb->dev)
- skb->dev = net->loopback_dev;
- /* only send ICMP too big on first fragment */
- if (!ipvsh->fragoffs)
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- IP_VS_DBG(1, "frag needed for %pI6c\n", &ipv6_hdr(skb)->saddr);
+ if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu))
goto err_put;
- }
skb_dst_drop(skb);
if (noref) {
@@ -555,8 +589,8 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- if (__ip_vs_get_out_rt(skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL,
- NULL) < 0)
+ if (__ip_vs_get_out_rt(cp->af, skb, NULL, iph->daddr,
+ IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0)
goto tx_error;
ip_send_check(iph);
@@ -585,7 +619,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- if (__ip_vs_get_out_rt_v6(skb, NULL, &ipvsh->daddr.in6, NULL,
+ if (__ip_vs_get_out_rt_v6(cp->af, skb, NULL, &ipvsh->daddr.in6, NULL,
ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0)
goto tx_error;
@@ -632,10 +666,10 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
}
was_input = rt_is_input_route(skb_rtable(skb));
- local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+ local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
- IP_VS_RT_MODE_RDR, NULL);
+ IP_VS_RT_MODE_RDR, NULL, ipvsh);
if (local < 0)
goto tx_error;
rt = skb_rtable(skb);
@@ -720,8 +754,8 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
}
- local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
- ipvsh, 0,
+ local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
+ NULL, ipvsh, 0,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR);
@@ -790,6 +824,81 @@ tx_error:
}
#endif
+/* When forwarding a packet, we must ensure that we've got enough headroom
+ * for the encapsulation packet in the skb. This also gives us an
+ * opportunity to figure out what the payload_len, dsfield, ttl, and df
+ * values should be, so that we won't need to look at the old ip header
+ * again
+ */
+static struct sk_buff *
+ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
+ unsigned int max_headroom, __u8 *next_protocol,
+ __u32 *payload_len, __u8 *dsfield, __u8 *ttl,
+ __be16 *df)
+{
+ struct sk_buff *new_skb = NULL;
+ struct iphdr *old_iph = NULL;
+#ifdef CONFIG_IP_VS_IPV6
+ struct ipv6hdr *old_ipv6h = NULL;
+#endif
+
+ if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
+ new_skb = skb_realloc_headroom(skb, max_headroom);
+ if (!new_skb)
+ goto error;
+ consume_skb(skb);
+ skb = new_skb;
+ }
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (skb_af == AF_INET6) {
+ old_ipv6h = ipv6_hdr(skb);
+ *next_protocol = IPPROTO_IPV6;
+ if (payload_len)
+ *payload_len =
+ ntohs(old_ipv6h->payload_len) +
+ sizeof(*old_ipv6h);
+ *dsfield = ipv6_get_dsfield(old_ipv6h);
+ *ttl = old_ipv6h->hop_limit;
+ if (df)
+ *df = 0;
+ } else
+#endif
+ {
+ old_iph = ip_hdr(skb);
+ /* Copy DF, reset fragment offset and MF */
+ if (df)
+ *df = (old_iph->frag_off & htons(IP_DF));
+ *next_protocol = IPPROTO_IPIP;
+
+ /* fix old IP header checksum */
+ ip_send_check(old_iph);
+ *dsfield = ipv4_get_dsfield(old_iph);
+ *ttl = old_iph->ttl;
+ if (payload_len)
+ *payload_len = ntohs(old_iph->tot_len);
+ }
+
+ return skb;
+error:
+ kfree_skb(skb);
+ return ERR_PTR(-ENOMEM);
+}
+
+static inline int __tun_gso_type_mask(int encaps_af, int orig_af)
+{
+ if (encaps_af == AF_INET) {
+ if (orig_af == AF_INET)
+ return SKB_GSO_IPIP;
+
+ return SKB_GSO_SIT;
+ }
+
+ /* GSO: we need to provide proper SKB_GSO_ value for IPv6:
+ * SKB_GSO_SIT/IPV6
+ */
+ return 0;
+}
/*
* IP Tunneling transmitter
@@ -818,9 +927,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct rtable *rt; /* Route to the other host */
__be32 saddr; /* Source for tunnel */
struct net_device *tdev; /* Device to other host */
- struct iphdr *old_iph = ip_hdr(skb);
- u8 tos = old_iph->tos;
- __be16 df;
+ __u8 next_protocol = 0;
+ __u8 dsfield = 0;
+ __u8 ttl = 0;
+ __be16 df = 0;
+ __be16 *dfp = NULL;
struct iphdr *iph; /* Our new IP header */
unsigned int max_headroom; /* The extra header space needed */
int ret, local;
@@ -828,11 +939,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+ local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_CONNECT |
- IP_VS_RT_MODE_TUNNEL, &saddr);
+ IP_VS_RT_MODE_TUNNEL, &saddr, ipvsh);
if (local < 0)
goto tx_error;
if (local) {
@@ -843,30 +954,26 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
rt = skb_rtable(skb);
tdev = rt->dst.dev;
- /* Copy DF, reset fragment offset and MF */
- df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;
-
/*
* Okay, now see if we can stuff it in the buffer as-is.
*/
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
- if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
- struct sk_buff *new_skb =
- skb_realloc_headroom(skb, max_headroom);
+ /* We only care about the df field if sysctl_pmtu_disc(ipvs) is set */
+ dfp = sysctl_pmtu_disc(ipvs) ? &df : NULL;
+ skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom,
+ &next_protocol, NULL, &dsfield,
+ &ttl, dfp);
+ if (IS_ERR(skb))
+ goto tx_error;
- if (!new_skb)
- goto tx_error;
- consume_skb(skb);
- skb = new_skb;
- old_iph = ip_hdr(skb);
- }
+ skb = iptunnel_handle_offloads(
+ skb, false, __tun_gso_type_mask(AF_INET, cp->af));
+ if (IS_ERR(skb))
+ goto tx_error;
skb->transport_header = skb->network_header;
- /* fix old IP header checksum */
- ip_send_check(old_iph);
-
skb_push(skb, sizeof(struct iphdr));
skb_reset_network_header(skb);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
@@ -878,11 +985,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
iph->version = 4;
iph->ihl = sizeof(struct iphdr)>>2;
iph->frag_off = df;
- iph->protocol = IPPROTO_IPIP;
- iph->tos = tos;
+ iph->protocol = next_protocol;
+ iph->tos = dsfield;
iph->daddr = cp->daddr.ip;
iph->saddr = saddr;
- iph->ttl = old_iph->ttl;
+ iph->ttl = ttl;
ip_select_ident(skb, NULL);
/* Another hack: avoid icmp_send in ip_fragment */
@@ -900,7 +1007,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
return NF_STOLEN;
tx_error:
- kfree_skb(skb);
+ if (!IS_ERR(skb))
+ kfree_skb(skb);
rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
@@ -914,7 +1022,10 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
struct rt6_info *rt; /* Route to the other host */
struct in6_addr saddr; /* Source for tunnel */
struct net_device *tdev; /* Device to other host */
- struct ipv6hdr *old_iph = ipv6_hdr(skb);
+ __u8 next_protocol = 0;
+ __u32 payload_len = 0;
+ __u8 dsfield = 0;
+ __u8 ttl = 0;
struct ipv6hdr *iph; /* Our new IP header */
unsigned int max_headroom; /* The extra header space needed */
int ret, local;
@@ -922,7 +1033,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
+ local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
&saddr, ipvsh, 1,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
@@ -942,16 +1053,16 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
*/
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
- if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
- struct sk_buff *new_skb =
- skb_realloc_headroom(skb, max_headroom);
+ skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom,
+ &next_protocol, &payload_len,
+ &dsfield, &ttl, NULL);
+ if (IS_ERR(skb))
+ goto tx_error;
- if (!new_skb)
- goto tx_error;
- consume_skb(skb);
- skb = new_skb;
- old_iph = ipv6_hdr(skb);
- }
+ skb = iptunnel_handle_offloads(
+ skb, false, __tun_gso_type_mask(AF_INET6, cp->af));
+ if (IS_ERR(skb))
+ goto tx_error;
skb->transport_header = skb->network_header;
@@ -964,14 +1075,13 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
*/
iph = ipv6_hdr(skb);
iph->version = 6;
- iph->nexthdr = IPPROTO_IPV6;
- iph->payload_len = old_iph->payload_len;
- be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
- iph->priority = old_iph->priority;
+ iph->nexthdr = next_protocol;
+ iph->payload_len = htons(payload_len);
memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
+ ipv6_change_dsfield(iph, 0, dsfield);
iph->daddr = cp->daddr.in6;
iph->saddr = saddr;
- iph->hop_limit = old_iph->hop_limit;
+ iph->hop_limit = ttl;
/* Another hack: avoid icmp_send in ip_fragment */
skb->ignore_df = 1;
@@ -988,7 +1098,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
return NF_STOLEN;
tx_error:
- kfree_skb(skb);
+ if (!IS_ERR(skb))
+ kfree_skb(skb);
rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
@@ -1009,10 +1120,10 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+ local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
- IP_VS_RT_MODE_KNOWN_NH, NULL);
+ IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh);
if (local < 0)
goto tx_error;
if (local) {
@@ -1048,8 +1159,8 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
- ipvsh, 0,
+ local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
+ NULL, ipvsh, 0,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL);
if (local < 0)
@@ -1116,7 +1227,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
rcu_read_lock();
- local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, rt_mode, NULL);
+ local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, rt_mode,
+ NULL, iph);
if (local < 0)
goto tx_error;
rt = skb_rtable(skb);
@@ -1207,8 +1319,8 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
rcu_read_lock();
- local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
- ipvsh, 0, rt_mode);
+ local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
+ NULL, ipvsh, 0, rt_mode);
if (local < 0)
goto tx_error;
rt = (struct rt6_info *) skb_dst(skb);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 1f4f954c4b47..5016a6929085 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -142,7 +142,7 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone)
static u32 __hash_bucket(u32 hash, unsigned int size)
{
- return ((u64)hash * size) >> 32;
+ return reciprocal_scale(hash, size);
}
static u32 hash_bucket(u32 hash, const struct net *net)
@@ -352,57 +352,28 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
local_bh_enable();
}
-static void death_by_event(unsigned long ul_conntrack)
-{
- struct nf_conn *ct = (void *)ul_conntrack;
- struct net *net = nf_ct_net(ct);
- struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
-
- BUG_ON(ecache == NULL);
-
- if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) {
- /* bad luck, let's retry again */
- ecache->timeout.expires = jiffies +
- (prandom_u32() % net->ct.sysctl_events_retry_timeout);
- add_timer(&ecache->timeout);
- return;
- }
- /* we've got the event delivered, now it's dying */
- set_bit(IPS_DYING_BIT, &ct->status);
- nf_ct_put(ct);
-}
-
-static void nf_ct_dying_timeout(struct nf_conn *ct)
-{
- struct net *net = nf_ct_net(ct);
- struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
-
- BUG_ON(ecache == NULL);
-
- /* set a new timer to retry event delivery */
- setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct);
- ecache->timeout.expires = jiffies +
- (prandom_u32() % net->ct.sysctl_events_retry_timeout);
- add_timer(&ecache->timeout);
-}
-
bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
{
struct nf_conn_tstamp *tstamp;
tstamp = nf_conn_tstamp_find(ct);
if (tstamp && tstamp->stop == 0)
- tstamp->stop = ktime_to_ns(ktime_get_real());
+ tstamp->stop = ktime_get_real_ns();
+
+ if (nf_ct_is_dying(ct))
+ goto delete;
- if (!nf_ct_is_dying(ct) &&
- unlikely(nf_conntrack_event_report(IPCT_DESTROY, ct,
- portid, report) < 0)) {
+ if (nf_conntrack_event_report(IPCT_DESTROY, ct,
+ portid, report) < 0) {
/* destroy event was not delivered */
nf_ct_delete_from_lists(ct);
- nf_ct_dying_timeout(ct);
+ nf_conntrack_ecache_delayed_work(nf_ct_net(ct));
return false;
}
+
+ nf_conntrack_ecache_work(nf_ct_net(ct));
set_bit(IPS_DYING_BIT, &ct->status);
+ delete:
nf_ct_delete_from_lists(ct);
nf_ct_put(ct);
return true;
@@ -1464,26 +1435,6 @@ void nf_conntrack_flush_report(struct net *net, u32 portid, int report)
}
EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
-static void nf_ct_release_dying_list(struct net *net)
-{
- struct nf_conntrack_tuple_hash *h;
- struct nf_conn *ct;
- struct hlist_nulls_node *n;
- int cpu;
-
- for_each_possible_cpu(cpu) {
- struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
-
- spin_lock_bh(&pcpu->lock);
- hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
- ct = nf_ct_tuplehash_to_ctrack(h);
- /* never fails to remove them, no listeners at this point */
- nf_ct_kill(ct);
- }
- spin_unlock_bh(&pcpu->lock);
- }
-}
-
static int untrack_refs(void)
{
int cnt = 0, cpu;
@@ -1548,7 +1499,6 @@ i_see_dead_people:
busy = 0;
list_for_each_entry(net, net_exit_list, exit_list) {
nf_ct_iterate_cleanup(net, kill_all, NULL, 0, 0);
- nf_ct_release_dying_list(net);
if (atomic_read(&net->ct.count) != 0)
busy = 1;
}
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 1df176146567..4e78c57b818f 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -29,6 +29,90 @@
static DEFINE_MUTEX(nf_ct_ecache_mutex);
+#define ECACHE_RETRY_WAIT (HZ/10)
+
+enum retry_state {
+ STATE_CONGESTED,
+ STATE_RESTART,
+ STATE_DONE,
+};
+
+static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
+{
+ struct nf_conn *refs[16];
+ struct nf_conntrack_tuple_hash *h;
+ struct hlist_nulls_node *n;
+ unsigned int evicted = 0;
+ enum retry_state ret = STATE_DONE;
+
+ spin_lock(&pcpu->lock);
+
+ hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+ if (nf_ct_is_dying(ct))
+ continue;
+
+ if (nf_conntrack_event(IPCT_DESTROY, ct)) {
+ ret = STATE_CONGESTED;
+ break;
+ }
+
+ /* we've got the event delivered, now it's dying */
+ set_bit(IPS_DYING_BIT, &ct->status);
+ refs[evicted] = ct;
+
+ if (++evicted >= ARRAY_SIZE(refs)) {
+ ret = STATE_RESTART;
+ break;
+ }
+ }
+
+ spin_unlock(&pcpu->lock);
+
+ /* can't _put while holding lock */
+ while (evicted)
+ nf_ct_put(refs[--evicted]);
+
+ return ret;
+}
+
+static void ecache_work(struct work_struct *work)
+{
+ struct netns_ct *ctnet =
+ container_of(work, struct netns_ct, ecache_dwork.work);
+ int cpu, delay = -1;
+ struct ct_pcpu *pcpu;
+
+ local_bh_disable();
+
+ for_each_possible_cpu(cpu) {
+ enum retry_state ret;
+
+ pcpu = per_cpu_ptr(ctnet->pcpu_lists, cpu);
+
+ ret = ecache_work_evict_list(pcpu);
+
+ switch (ret) {
+ case STATE_CONGESTED:
+ delay = ECACHE_RETRY_WAIT;
+ goto out;
+ case STATE_RESTART:
+ delay = 0;
+ break;
+ case STATE_DONE:
+ break;
+ }
+ }
+
+ out:
+ local_bh_enable();
+
+ ctnet->ecache_dwork_pending = delay > 0;
+ if (delay >= 0)
+ schedule_delayed_work(&ctnet->ecache_dwork, delay);
+}
+
/* deliver cached events and clear cache entry - must be called with locally
* disabled softirqs */
void nf_ct_deliver_cached_events(struct nf_conn *ct)
@@ -157,7 +241,6 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
#define NF_CT_EVENTS_DEFAULT 1
static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
-static int nf_ct_events_retry_timeout __read_mostly = 15*HZ;
#ifdef CONFIG_SYSCTL
static struct ctl_table event_sysctl_table[] = {
@@ -168,13 +251,6 @@ static struct ctl_table event_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
- .procname = "nf_conntrack_events_retry_timeout",
- .data = &init_net.ct.sysctl_events_retry_timeout,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
{}
};
#endif /* CONFIG_SYSCTL */
@@ -196,7 +272,6 @@ static int nf_conntrack_event_init_sysctl(struct net *net)
goto out;
table[0].data = &net->ct.sysctl_events;
- table[1].data = &net->ct.sysctl_events_retry_timeout;
/* Don't export sysctls to unprivileged users */
if (net->user_ns != &init_user_ns)
@@ -238,12 +313,13 @@ static void nf_conntrack_event_fini_sysctl(struct net *net)
int nf_conntrack_ecache_pernet_init(struct net *net)
{
net->ct.sysctl_events = nf_ct_events;
- net->ct.sysctl_events_retry_timeout = nf_ct_events_retry_timeout;
+ INIT_DELAYED_WORK(&net->ct.ecache_dwork, ecache_work);
return nf_conntrack_event_init_sysctl(net);
}
void nf_conntrack_ecache_pernet_fini(struct net *net)
{
+ cancel_delayed_work_sync(&net->ct.ecache_dwork);
nf_conntrack_event_fini_sysctl(net);
}
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index f87e8f68ad45..91a1837acd0e 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -83,7 +83,8 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple
hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
(((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
(__force __u16)tuple->dst.u.all) ^ nf_conntrack_hash_rnd);
- return ((u64)hash * nf_ct_expect_hsize) >> 32;
+
+ return reciprocal_scale(hash, nf_ct_expect_hsize);
}
struct nf_conntrack_expect *
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 300ed1eec729..1bd9ed9e62f6 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -745,8 +745,7 @@ static int ctnetlink_done(struct netlink_callback *cb)
{
if (cb->args[1])
nf_ct_put((struct nf_conn *)cb->args[1]);
- if (cb->data)
- kfree(cb->data);
+ kfree(cb->data);
return 0;
}
@@ -1738,7 +1737,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
}
tstamp = nf_conn_tstamp_find(ct);
if (tstamp)
- tstamp->start = ktime_to_ns(ktime_get_real());
+ tstamp->start = ktime_get_real_ns();
err = nf_conntrack_hash_check_insert(ct);
if (err < 0)
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index d25f29377648..957c1db66652 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -14,6 +14,30 @@
static unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ;
+static bool nf_generic_should_process(u8 proto)
+{
+ switch (proto) {
+#ifdef CONFIG_NF_CT_PROTO_SCTP_MODULE
+ case IPPROTO_SCTP:
+ return false;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_DCCP_MODULE
+ case IPPROTO_DCCP:
+ return false;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_GRE_MODULE
+ case IPPROTO_GRE:
+ return false;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE_MODULE
+ case IPPROTO_UDPLITE:
+ return false;
+#endif
+ default:
+ return true;
+ }
+}
+
static inline struct nf_generic_net *generic_pernet(struct net *net)
{
return &net->ct.nf_ct_proto.generic;
@@ -67,7 +91,7 @@ static int generic_packet(struct nf_conn *ct,
static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, unsigned int *timeouts)
{
- return true;
+ return nf_generic_should_process(nf_ct_protonum(ct));
}
#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index f641751dba9d..cf65a1e040dd 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -101,7 +101,7 @@ static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
{
struct ct_iter_state *st = seq->private;
- st->time_now = ktime_to_ns(ktime_get_real());
+ st->time_now = ktime_get_real_ns();
rcu_read_lock();
return ct_get_idx(seq, *pos);
}
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 85296d4eac0e..daad6022c689 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -16,16 +16,22 @@
#define NF_LOG_PREFIXLEN 128
#define NFLOGGER_NAME_LEN 64
-static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly;
+static struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly;
static DEFINE_MUTEX(nf_log_mutex);
static struct nf_logger *__find_logger(int pf, const char *str_logger)
{
- struct nf_logger *t;
+ struct nf_logger *log;
+ int i;
+
+ for (i = 0; i < NF_LOG_TYPE_MAX; i++) {
+ if (loggers[pf][i] == NULL)
+ continue;
- list_for_each_entry(t, &nf_loggers_l[pf], list[pf]) {
- if (!strnicmp(str_logger, t->name, strlen(t->name)))
- return t;
+ log = rcu_dereference_protected(loggers[pf][i],
+ lockdep_is_held(&nf_log_mutex));
+ if (!strnicmp(str_logger, log->name, strlen(log->name)))
+ return log;
}
return NULL;
@@ -73,17 +79,14 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger)
if (pf >= ARRAY_SIZE(init_net.nf.nf_loggers))
return -EINVAL;
- for (i = 0; i < ARRAY_SIZE(logger->list); i++)
- INIT_LIST_HEAD(&logger->list[i]);
-
mutex_lock(&nf_log_mutex);
if (pf == NFPROTO_UNSPEC) {
for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++)
- list_add_tail(&(logger->list[i]), &(nf_loggers_l[i]));
+ rcu_assign_pointer(loggers[i][logger->type], logger);
} else {
/* register at end of list to honor first register win */
- list_add_tail(&logger->list[pf], &nf_loggers_l[pf]);
+ rcu_assign_pointer(loggers[pf][logger->type], logger);
}
mutex_unlock(&nf_log_mutex);
@@ -98,7 +101,7 @@ void nf_log_unregister(struct nf_logger *logger)
mutex_lock(&nf_log_mutex);
for (i = 0; i < NFPROTO_NUMPROTO; i++)
- list_del(&logger->list[i]);
+ RCU_INIT_POINTER(loggers[i][logger->type], NULL);
mutex_unlock(&nf_log_mutex);
}
EXPORT_SYMBOL(nf_log_unregister);
@@ -129,6 +132,48 @@ void nf_log_unbind_pf(struct net *net, u_int8_t pf)
}
EXPORT_SYMBOL(nf_log_unbind_pf);
+void nf_logger_request_module(int pf, enum nf_log_type type)
+{
+ if (loggers[pf][type] == NULL)
+ request_module("nf-logger-%u-%u", pf, type);
+}
+EXPORT_SYMBOL_GPL(nf_logger_request_module);
+
+int nf_logger_find_get(int pf, enum nf_log_type type)
+{
+ struct nf_logger *logger;
+ int ret = -ENOENT;
+
+ logger = loggers[pf][type];
+ if (logger == NULL)
+ request_module("nf-logger-%u-%u", pf, type);
+
+ rcu_read_lock();
+ logger = rcu_dereference(loggers[pf][type]);
+ if (logger == NULL)
+ goto out;
+
+ if (logger && try_module_get(logger->me))
+ ret = 0;
+out:
+ rcu_read_unlock();
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_logger_find_get);
+
+void nf_logger_put(int pf, enum nf_log_type type)
+{
+ struct nf_logger *logger;
+
+ BUG_ON(loggers[pf][type] == NULL);
+
+ rcu_read_lock();
+ logger = rcu_dereference(loggers[pf][type]);
+ module_put(logger->me);
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(nf_logger_put);
+
void nf_log_packet(struct net *net,
u_int8_t pf,
unsigned int hooknum,
@@ -143,7 +188,11 @@ void nf_log_packet(struct net *net,
const struct nf_logger *logger;
rcu_read_lock();
- logger = rcu_dereference(net->nf.nf_loggers[pf]);
+ if (loginfo != NULL)
+ logger = rcu_dereference(loggers[pf][loginfo->type]);
+ else
+ logger = rcu_dereference(net->nf.nf_loggers[pf]);
+
if (logger) {
va_start(args, fmt);
vsnprintf(prefix, sizeof(prefix), fmt, args);
@@ -154,6 +203,63 @@ void nf_log_packet(struct net *net,
}
EXPORT_SYMBOL(nf_log_packet);
+#define S_SIZE (1024 - (sizeof(unsigned int) + 1))
+
+struct nf_log_buf {
+ unsigned int count;
+ char buf[S_SIZE + 1];
+};
+static struct nf_log_buf emergency, *emergency_ptr = &emergency;
+
+__printf(2, 3) int nf_log_buf_add(struct nf_log_buf *m, const char *f, ...)
+{
+ va_list args;
+ int len;
+
+ if (likely(m->count < S_SIZE)) {
+ va_start(args, f);
+ len = vsnprintf(m->buf + m->count, S_SIZE - m->count, f, args);
+ va_end(args);
+ if (likely(m->count + len < S_SIZE)) {
+ m->count += len;
+ return 0;
+ }
+ }
+ m->count = S_SIZE;
+ printk_once(KERN_ERR KBUILD_MODNAME " please increase S_SIZE\n");
+ return -1;
+}
+EXPORT_SYMBOL_GPL(nf_log_buf_add);
+
+struct nf_log_buf *nf_log_buf_open(void)
+{
+ struct nf_log_buf *m = kmalloc(sizeof(*m), GFP_ATOMIC);
+
+ if (unlikely(!m)) {
+ local_bh_disable();
+ do {
+ m = xchg(&emergency_ptr, NULL);
+ } while (!m);
+ }
+ m->count = 0;
+ return m;
+}
+EXPORT_SYMBOL_GPL(nf_log_buf_open);
+
+void nf_log_buf_close(struct nf_log_buf *m)
+{
+ m->buf[m->count] = 0;
+ printk("%s\n", m->buf);
+
+ if (likely(m != &emergency))
+ kfree(m);
+ else {
+ emergency_ptr = m;
+ local_bh_enable();
+ }
+}
+EXPORT_SYMBOL_GPL(nf_log_buf_close);
+
#ifdef CONFIG_PROC_FS
static void *seq_start(struct seq_file *seq, loff_t *pos)
{
@@ -188,8 +294,7 @@ static int seq_show(struct seq_file *s, void *v)
{
loff_t *pos = v;
const struct nf_logger *logger;
- struct nf_logger *t;
- int ret;
+ int i, ret;
struct net *net = seq_file_net(s);
logger = rcu_dereference_protected(net->nf.nf_loggers[*pos],
@@ -203,11 +308,16 @@ static int seq_show(struct seq_file *s, void *v)
if (ret < 0)
return ret;
- list_for_each_entry(t, &nf_loggers_l[*pos], list[*pos]) {
- ret = seq_printf(s, "%s", t->name);
+ for (i = 0; i < NF_LOG_TYPE_MAX; i++) {
+ if (loggers[*pos][i] == NULL)
+ continue;
+
+ logger = rcu_dereference_protected(loggers[*pos][i],
+ lockdep_is_held(&nf_log_mutex));
+ ret = seq_printf(s, "%s", logger->name);
if (ret < 0)
return ret;
- if (&t->list[*pos] != nf_loggers_l[*pos].prev) {
+ if (i == 0 && loggers[*pos][i + 1] != NULL) {
ret = seq_printf(s, ",");
if (ret < 0)
return ret;
@@ -389,14 +499,5 @@ static struct pernet_operations nf_log_net_ops = {
int __init netfilter_log_init(void)
{
- int i, ret;
-
- ret = register_pernet_subsys(&nf_log_net_ops);
- if (ret < 0)
- return ret;
-
- for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++)
- INIT_LIST_HEAD(&(nf_loggers_l[i]));
-
- return 0;
+ return register_pernet_subsys(&nf_log_net_ops);
}
diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c
new file mode 100644
index 000000000000..a2233e77cf39
--- /dev/null
+++ b/net/netfilter/nf_log_common.c
@@ -0,0 +1,187 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/if_arp.h>
+#include <linux/ip.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/tcp.h>
+#include <net/route.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/xt_LOG.h>
+#include <net/netfilter/nf_log.h>
+
+int nf_log_dump_udp_header(struct nf_log_buf *m, const struct sk_buff *skb,
+ u8 proto, int fragment, unsigned int offset)
+{
+ struct udphdr _udph;
+ const struct udphdr *uh;
+
+ if (proto == IPPROTO_UDP)
+ /* Max length: 10 "PROTO=UDP " */
+ nf_log_buf_add(m, "PROTO=UDP ");
+ else /* Max length: 14 "PROTO=UDPLITE " */
+ nf_log_buf_add(m, "PROTO=UDPLITE ");
+
+ if (fragment)
+ goto out;
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph);
+ if (uh == NULL) {
+ nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset);
+
+ return 1;
+ }
+
+ /* Max length: 20 "SPT=65535 DPT=65535 " */
+ nf_log_buf_add(m, "SPT=%u DPT=%u LEN=%u ",
+ ntohs(uh->source), ntohs(uh->dest), ntohs(uh->len));
+
+out:
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_log_dump_udp_header);
+
+int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb,
+ u8 proto, int fragment, unsigned int offset,
+ unsigned int logflags)
+{
+ struct tcphdr _tcph;
+ const struct tcphdr *th;
+
+ /* Max length: 10 "PROTO=TCP " */
+ nf_log_buf_add(m, "PROTO=TCP ");
+
+ if (fragment)
+ return 0;
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
+ if (th == NULL) {
+ nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset);
+ return 1;
+ }
+
+ /* Max length: 20 "SPT=65535 DPT=65535 " */
+ nf_log_buf_add(m, "SPT=%u DPT=%u ",
+ ntohs(th->source), ntohs(th->dest));
+ /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
+ if (logflags & XT_LOG_TCPSEQ) {
+ nf_log_buf_add(m, "SEQ=%u ACK=%u ",
+ ntohl(th->seq), ntohl(th->ack_seq));
+ }
+
+ /* Max length: 13 "WINDOW=65535 " */
+ nf_log_buf_add(m, "WINDOW=%u ", ntohs(th->window));
+ /* Max length: 9 "RES=0x3C " */
+ nf_log_buf_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) &
+ TCP_RESERVED_BITS) >> 22));
+ /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
+ if (th->cwr)
+ nf_log_buf_add(m, "CWR ");
+ if (th->ece)
+ nf_log_buf_add(m, "ECE ");
+ if (th->urg)
+ nf_log_buf_add(m, "URG ");
+ if (th->ack)
+ nf_log_buf_add(m, "ACK ");
+ if (th->psh)
+ nf_log_buf_add(m, "PSH ");
+ if (th->rst)
+ nf_log_buf_add(m, "RST ");
+ if (th->syn)
+ nf_log_buf_add(m, "SYN ");
+ if (th->fin)
+ nf_log_buf_add(m, "FIN ");
+ /* Max length: 11 "URGP=65535 " */
+ nf_log_buf_add(m, "URGP=%u ", ntohs(th->urg_ptr));
+
+ if ((logflags & XT_LOG_TCPOPT) && th->doff*4 > sizeof(struct tcphdr)) {
+ u_int8_t _opt[60 - sizeof(struct tcphdr)];
+ const u_int8_t *op;
+ unsigned int i;
+ unsigned int optsize = th->doff*4 - sizeof(struct tcphdr);
+
+ op = skb_header_pointer(skb, offset + sizeof(struct tcphdr),
+ optsize, _opt);
+ if (op == NULL) {
+ nf_log_buf_add(m, "OPT (TRUNCATED)");
+ return 1;
+ }
+
+ /* Max length: 127 "OPT (" 15*4*2chars ") " */
+ nf_log_buf_add(m, "OPT (");
+ for (i = 0; i < optsize; i++)
+ nf_log_buf_add(m, "%02X", op[i]);
+
+ nf_log_buf_add(m, ") ");
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_log_dump_tcp_header);
+
+void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk)
+{
+ if (!sk || sk->sk_state == TCP_TIME_WAIT)
+ return;
+
+ read_lock_bh(&sk->sk_callback_lock);
+ if (sk->sk_socket && sk->sk_socket->file) {
+ const struct cred *cred = sk->sk_socket->file->f_cred;
+ nf_log_buf_add(m, "UID=%u GID=%u ",
+ from_kuid_munged(&init_user_ns, cred->fsuid),
+ from_kgid_munged(&init_user_ns, cred->fsgid));
+ }
+ read_unlock_bh(&sk->sk_callback_lock);
+}
+EXPORT_SYMBOL_GPL(nf_log_dump_sk_uid_gid);
+
+void
+nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
+ unsigned int hooknum, const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct nf_loginfo *loginfo, const char *prefix)
+{
+ nf_log_buf_add(m, KERN_SOH "%c%sIN=%s OUT=%s ",
+ '0' + loginfo->u.log.level, prefix,
+ in ? in->name : "",
+ out ? out->name : "");
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+ if (skb->nf_bridge) {
+ const struct net_device *physindev;
+ const struct net_device *physoutdev;
+
+ physindev = skb->nf_bridge->physindev;
+ if (physindev && in != physindev)
+ nf_log_buf_add(m, "PHYSIN=%s ", physindev->name);
+ physoutdev = skb->nf_bridge->physoutdev;
+ if (physoutdev && out != physoutdev)
+ nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name);
+ }
+#endif
+}
+EXPORT_SYMBOL_GPL(nf_log_dump_packet_common);
+
+static int __init nf_log_common_init(void)
+{
+ return 0;
+}
+
+static void __exit nf_log_common_exit(void) {}
+
+module_init(nf_log_common_init);
+module_exit(nf_log_common_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index a49907b1dabc..4e0b47831d43 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -126,7 +126,8 @@ hash_by_src(const struct net *net, u16 zone,
/* Original src, to ensure we map it consistently if poss. */
hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
tuple->dst.protonum ^ zone ^ nf_conntrack_hash_rnd);
- return ((u64)hash * net->ct.nat_htable_size) >> 32;
+
+ return reciprocal_scale(hash, net->ct.nat_htable_size);
}
/* Is this tuple already taken? (not by us) */
@@ -274,7 +275,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
}
var_ipp->all[i] = (__force __u32)
- htonl(minip + (((u64)j * dist) >> 32));
+ htonl(minip + reciprocal_scale(j, dist));
if (var_ipp->all[i] != range->max_addr.all[i])
full_range = true;
@@ -710,7 +711,7 @@ static struct nf_ct_ext_type nat_extend __read_mostly = {
.flags = NF_CT_EXT_F_PREALLOC,
};
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
diff --git a/net/netfilter/nf_nat_proto_common.c b/net/netfilter/nf_nat_proto_common.c
index 83a72a235cae..fbce552a796e 100644
--- a/net/netfilter/nf_nat_proto_common.c
+++ b/net/netfilter/nf_nat_proto_common.c
@@ -95,7 +95,7 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
}
EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple);
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[],
struct nf_nat_range *range)
{
diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c
index c8be2cdac0bf..b8067b53ff3a 100644
--- a/net/netfilter/nf_nat_proto_dccp.c
+++ b/net/netfilter/nf_nat_proto_dccp.c
@@ -78,7 +78,7 @@ static const struct nf_nat_l4proto nf_nat_l4proto_dccp = {
.manip_pkt = dccp_manip_pkt,
.in_range = nf_nat_l4proto_in_range,
.unique_tuple = dccp_unique_tuple,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c
index 754536f2c674..cbc7ade1487b 100644
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -59,7 +59,7 @@ static const struct nf_nat_l4proto nf_nat_l4proto_sctp = {
.manip_pkt = sctp_manip_pkt,
.in_range = nf_nat_l4proto_in_range,
.unique_tuple = sctp_unique_tuple,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
diff --git a/net/netfilter/nf_nat_proto_tcp.c b/net/netfilter/nf_nat_proto_tcp.c
index 83ec8a6e4c36..37f5505f4529 100644
--- a/net/netfilter/nf_nat_proto_tcp.c
+++ b/net/netfilter/nf_nat_proto_tcp.c
@@ -79,7 +79,7 @@ const struct nf_nat_l4proto nf_nat_l4proto_tcp = {
.manip_pkt = tcp_manip_pkt,
.in_range = nf_nat_l4proto_in_range,
.unique_tuple = tcp_unique_tuple,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
diff --git a/net/netfilter/nf_nat_proto_udp.c b/net/netfilter/nf_nat_proto_udp.c
index 7df613fb34a2..b0ede2f0d8bc 100644
--- a/net/netfilter/nf_nat_proto_udp.c
+++ b/net/netfilter/nf_nat_proto_udp.c
@@ -70,7 +70,7 @@ const struct nf_nat_l4proto nf_nat_l4proto_udp = {
.manip_pkt = udp_manip_pkt,
.in_range = nf_nat_l4proto_in_range,
.unique_tuple = udp_unique_tuple,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
diff --git a/net/netfilter/nf_nat_proto_udplite.c b/net/netfilter/nf_nat_proto_udplite.c
index 776a0d1317b1..368f14e01e75 100644
--- a/net/netfilter/nf_nat_proto_udplite.c
+++ b/net/netfilter/nf_nat_proto_udplite.c
@@ -69,7 +69,7 @@ static const struct nf_nat_l4proto nf_nat_l4proto_udplite = {
.manip_pkt = udplite_manip_pkt,
.in_range = nf_nat_l4proto_in_range,
.unique_tuple = udplite_unique_tuple,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 5d24b1fdb593..4c8b68e5fa16 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -52,7 +52,7 @@ void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
dev_put(entry->indev);
if (entry->outdev)
dev_put(entry->outdev);
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (entry->skb->nf_bridge) {
struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
@@ -77,7 +77,7 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
dev_hold(entry->indev);
if (entry->outdev)
dev_hold(entry->outdev);
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (entry->skb->nf_bridge) {
struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
struct net_device *physdev;
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
index f042ae521557..c68c1e58b362 100644
--- a/net/netfilter/nf_sockopt.c
+++ b/net/netfilter/nf_sockopt.c
@@ -26,9 +26,7 @@ int nf_register_sockopt(struct nf_sockopt_ops *reg)
struct nf_sockopt_ops *ops;
int ret = 0;
- if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0)
- return -EINTR;
-
+ mutex_lock(&nf_sockopt_mutex);
list_for_each_entry(ops, &nf_sockopts, list) {
if (ops->pf == reg->pf
&& (overlap(ops->set_optmin, ops->set_optmax,
@@ -65,9 +63,7 @@ static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, u_int8_t pf,
{
struct nf_sockopt_ops *ops;
- if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0)
- return ERR_PTR(-EINTR);
-
+ mutex_lock(&nf_sockopt_mutex);
list_for_each_entry(ops, &nf_sockopts, list) {
if (ops->pf == pf) {
if (!try_module_get(ops->owner))
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 8746ff9a8357..556a0dfa4abc 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -127,6 +127,204 @@ static void nft_trans_destroy(struct nft_trans *trans)
kfree(trans);
}
+static void nf_tables_unregister_hooks(const struct nft_table *table,
+ const struct nft_chain *chain,
+ unsigned int hook_nops)
+{
+ if (!(table->flags & NFT_TABLE_F_DORMANT) &&
+ chain->flags & NFT_BASE_CHAIN)
+ nf_unregister_hooks(nft_base_chain(chain)->ops, hook_nops);
+}
+
+/* Internal table flags */
+#define NFT_TABLE_INACTIVE (1 << 15)
+
+static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_table));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ if (msg_type == NFT_MSG_NEWTABLE)
+ ctx->table->flags |= NFT_TABLE_INACTIVE;
+
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+ return 0;
+}
+
+static int nft_deltable(struct nft_ctx *ctx)
+{
+ int err;
+
+ err = nft_trans_table_add(ctx, NFT_MSG_DELTABLE);
+ if (err < 0)
+ return err;
+
+ list_del_rcu(&ctx->table->list);
+ return err;
+}
+
+static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ if (msg_type == NFT_MSG_NEWCHAIN)
+ ctx->chain->flags |= NFT_CHAIN_INACTIVE;
+
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+ return 0;
+}
+
+static int nft_delchain(struct nft_ctx *ctx)
+{
+ int err;
+
+ err = nft_trans_chain_add(ctx, NFT_MSG_DELCHAIN);
+ if (err < 0)
+ return err;
+
+ ctx->table->use--;
+ list_del_rcu(&ctx->chain->list);
+
+ return err;
+}
+
+static inline bool
+nft_rule_is_active(struct net *net, const struct nft_rule *rule)
+{
+ return (rule->genmask & (1 << net->nft.gencursor)) == 0;
+}
+
+static inline int gencursor_next(struct net *net)
+{
+ return net->nft.gencursor+1 == 1 ? 1 : 0;
+}
+
+static inline int
+nft_rule_is_active_next(struct net *net, const struct nft_rule *rule)
+{
+ return (rule->genmask & (1 << gencursor_next(net))) == 0;
+}
+
+static inline void
+nft_rule_activate_next(struct net *net, struct nft_rule *rule)
+{
+ /* Now inactive, will be active in the future */
+ rule->genmask = (1 << net->nft.gencursor);
+}
+
+static inline void
+nft_rule_deactivate_next(struct net *net, struct nft_rule *rule)
+{
+ rule->genmask = (1 << gencursor_next(net));
+}
+
+static inline void nft_rule_clear(struct net *net, struct nft_rule *rule)
+{
+ rule->genmask = 0;
+}
+
+static int
+nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule)
+{
+ /* You cannot delete the same rule twice */
+ if (nft_rule_is_active_next(ctx->net, rule)) {
+ nft_rule_deactivate_next(ctx->net, rule);
+ ctx->chain->use--;
+ return 0;
+ }
+ return -ENOENT;
+}
+
+static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type,
+ struct nft_rule *rule)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_rule));
+ if (trans == NULL)
+ return NULL;
+
+ nft_trans_rule(trans) = rule;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+
+ return trans;
+}
+
+static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule)
+{
+ struct nft_trans *trans;
+ int err;
+
+ trans = nft_trans_rule_add(ctx, NFT_MSG_DELRULE, rule);
+ if (trans == NULL)
+ return -ENOMEM;
+
+ err = nf_tables_delrule_deactivate(ctx, rule);
+ if (err < 0) {
+ nft_trans_destroy(trans);
+ return err;
+ }
+
+ return 0;
+}
+
+static int nft_delrule_by_chain(struct nft_ctx *ctx)
+{
+ struct nft_rule *rule;
+ int err;
+
+ list_for_each_entry(rule, &ctx->chain->rules, list) {
+ err = nft_delrule(ctx, rule);
+ if (err < 0)
+ return err;
+ }
+ return 0;
+}
+
+/* Internal set flag */
+#define NFT_SET_INACTIVE (1 << 15)
+
+static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
+ struct nft_set *set)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) {
+ nft_trans_set_id(trans) =
+ ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
+ set->flags |= NFT_SET_INACTIVE;
+ }
+ nft_trans_set(trans) = set;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+
+ return 0;
+}
+
+static int nft_delset(struct nft_ctx *ctx, struct nft_set *set)
+{
+ int err;
+
+ err = nft_trans_set_add(ctx, NFT_MSG_DELSET, set);
+ if (err < 0)
+ return err;
+
+ list_del_rcu(&set->list);
+ ctx->table->use--;
+
+ return err;
+}
+
/*
* Tables
*/
@@ -207,9 +405,9 @@ static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
[NFTA_TABLE_FLAGS] = { .type = NLA_U32 },
};
-static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq,
- int event, u32 flags, int family,
- const struct nft_table *table)
+static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
+ u32 portid, u32 seq, int event, u32 flags,
+ int family, const struct nft_table *table)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
@@ -222,7 +420,7 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq,
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = family;
nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
+ nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) ||
@@ -250,8 +448,8 @@ static int nf_tables_table_notify(const struct nft_ctx *ctx, int event)
if (skb == NULL)
goto err;
- err = nf_tables_fill_table_info(skb, ctx->portid, ctx->seq, event, 0,
- ctx->afi->family, ctx->table);
+ err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq,
+ event, 0, ctx->afi->family, ctx->table);
if (err < 0) {
kfree_skb(skb);
goto err;
@@ -290,7 +488,7 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
if (idx > s_idx)
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
- if (nf_tables_fill_table_info(skb,
+ if (nf_tables_fill_table_info(skb, net,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFT_MSG_NEWTABLE,
@@ -309,9 +507,6 @@ done:
return skb->len;
}
-/* Internal table flags */
-#define NFT_TABLE_INACTIVE (1 << 15)
-
static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -345,7 +540,7 @@ static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
if (!skb2)
return -ENOMEM;
- err = nf_tables_fill_table_info(skb2, NETLINK_CB(skb).portid,
+ err = nf_tables_fill_table_info(skb2, net, NETLINK_CB(skb).portid,
nlh->nlmsg_seq, NFT_MSG_NEWTABLE, 0,
family, table);
if (err < 0)
@@ -443,21 +638,6 @@ err:
return ret;
}
-static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
-{
- struct nft_trans *trans;
-
- trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_table));
- if (trans == NULL)
- return -ENOMEM;
-
- if (msg_type == NFT_MSG_NEWTABLE)
- ctx->table->flags |= NFT_TABLE_INACTIVE;
-
- list_add_tail(&trans->list, &ctx->net->nft.commit_list);
- return 0;
-}
-
static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -527,6 +707,67 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
return 0;
}
+static int nft_flush_table(struct nft_ctx *ctx)
+{
+ int err;
+ struct nft_chain *chain, *nc;
+ struct nft_set *set, *ns;
+
+ list_for_each_entry_safe(chain, nc, &ctx->table->chains, list) {
+ ctx->chain = chain;
+
+ err = nft_delrule_by_chain(ctx);
+ if (err < 0)
+ goto out;
+
+ err = nft_delchain(ctx);
+ if (err < 0)
+ goto out;
+ }
+
+ list_for_each_entry_safe(set, ns, &ctx->table->sets, list) {
+ if (set->flags & NFT_SET_ANONYMOUS &&
+ !list_empty(&set->bindings))
+ continue;
+
+ err = nft_delset(ctx, set);
+ if (err < 0)
+ goto out;
+ }
+
+ err = nft_deltable(ctx);
+out:
+ return err;
+}
+
+static int nft_flush(struct nft_ctx *ctx, int family)
+{
+ struct nft_af_info *afi;
+ struct nft_table *table, *nt;
+ const struct nlattr * const *nla = ctx->nla;
+ int err = 0;
+
+ list_for_each_entry(afi, &ctx->net->nft.af_info, list) {
+ if (family != AF_UNSPEC && afi->family != family)
+ continue;
+
+ ctx->afi = afi;
+ list_for_each_entry_safe(table, nt, &afi->tables, list) {
+ if (nla[NFTA_TABLE_NAME] &&
+ nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0)
+ continue;
+
+ ctx->table = table;
+
+ err = nft_flush_table(ctx);
+ if (err < 0)
+ goto out;
+ }
+ }
+out:
+ return err;
+}
+
static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -535,9 +776,13 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
struct nft_af_info *afi;
struct nft_table *table;
struct net *net = sock_net(skb->sk);
- int family = nfmsg->nfgen_family, err;
+ int family = nfmsg->nfgen_family;
struct nft_ctx ctx;
+ nft_ctx_init(&ctx, skb, nlh, NULL, NULL, NULL, nla);
+ if (family == AF_UNSPEC || nla[NFTA_TABLE_NAME] == NULL)
+ return nft_flush(&ctx, family);
+
afi = nf_tables_afinfo_lookup(net, family, false);
if (IS_ERR(afi))
return PTR_ERR(afi);
@@ -547,16 +792,11 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
return PTR_ERR(table);
if (table->flags & NFT_TABLE_INACTIVE)
return -ENOENT;
- if (table->use > 0)
- return -EBUSY;
- nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
- err = nft_trans_table_add(&ctx, NFT_MSG_DELTABLE);
- if (err < 0)
- return err;
+ ctx.afi = afi;
+ ctx.table = table;
- list_del_rcu(&table->list);
- return 0;
+ return nft_flush_table(&ctx);
}
static void nf_tables_table_destroy(struct nft_ctx *ctx)
@@ -674,9 +914,9 @@ nla_put_failure:
return -ENOSPC;
}
-static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq,
- int event, u32 flags, int family,
- const struct nft_table *table,
+static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
+ u32 portid, u32 seq, int event, u32 flags,
+ int family, const struct nft_table *table,
const struct nft_chain *chain)
{
struct nlmsghdr *nlh;
@@ -690,7 +930,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq,
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = family;
nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
+ nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name))
goto nla_put_failure;
@@ -748,8 +988,8 @@ static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
if (skb == NULL)
goto err;
- err = nf_tables_fill_chain_info(skb, ctx->portid, ctx->seq, event, 0,
- ctx->afi->family, ctx->table,
+ err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq,
+ event, 0, ctx->afi->family, ctx->table,
ctx->chain);
if (err < 0) {
kfree_skb(skb);
@@ -791,7 +1031,8 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
if (idx > s_idx)
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
- if (nf_tables_fill_chain_info(skb, NETLINK_CB(cb->skb).portid,
+ if (nf_tables_fill_chain_info(skb, net,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFT_MSG_NEWCHAIN,
NLM_F_MULTI,
@@ -850,7 +1091,7 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
if (!skb2)
return -ENOMEM;
- err = nf_tables_fill_chain_info(skb2, NETLINK_CB(skb).portid,
+ err = nf_tables_fill_chain_info(skb2, net, NETLINK_CB(skb).portid,
nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, 0,
family, table, chain);
if (err < 0)
@@ -899,6 +1140,9 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
static void nft_chain_stats_replace(struct nft_base_chain *chain,
struct nft_stats __percpu *newstats)
{
+ if (newstats == NULL)
+ return;
+
if (chain->stats) {
struct nft_stats __percpu *oldstats =
nft_dereference(chain->stats);
@@ -910,21 +1154,6 @@ static void nft_chain_stats_replace(struct nft_base_chain *chain,
rcu_assign_pointer(chain->stats, newstats);
}
-static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
-{
- struct nft_trans *trans;
-
- trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain));
- if (trans == NULL)
- return -ENOMEM;
-
- if (msg_type == NFT_MSG_NEWCHAIN)
- ctx->chain->flags |= NFT_CHAIN_INACTIVE;
-
- list_add_tail(&trans->list, &ctx->net->nft.commit_list);
- return 0;
-}
-
static void nf_tables_chain_destroy(struct nft_chain *chain)
{
BUG_ON(chain->use > 0);
@@ -1154,11 +1383,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
list_add_tail_rcu(&chain->list, &table->chains);
return 0;
err2:
- if (!(table->flags & NFT_TABLE_F_DORMANT) &&
- chain->flags & NFT_BASE_CHAIN) {
- nf_unregister_hooks(nft_base_chain(chain)->ops,
- afi->nops);
- }
+ nf_tables_unregister_hooks(table, chain, afi->nops);
err1:
nf_tables_chain_destroy(chain);
return err;
@@ -1175,7 +1400,6 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
struct nft_ctx ctx;
- int err;
afi = nf_tables_afinfo_lookup(net, family, false);
if (IS_ERR(afi))
@@ -1196,13 +1420,8 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
return -EBUSY;
nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
- err = nft_trans_chain_add(&ctx, NFT_MSG_DELCHAIN);
- if (err < 0)
- return err;
- table->use--;
- list_del_rcu(&chain->list);
- return 0;
+ return nft_delchain(&ctx);
}
/*
@@ -1429,8 +1648,9 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
.len = NFT_USERDATA_MAXLEN },
};
-static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq,
- int event, u32 flags, int family,
+static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
+ u32 portid, u32 seq, int event,
+ u32 flags, int family,
const struct nft_table *table,
const struct nft_chain *chain,
const struct nft_rule *rule)
@@ -1450,7 +1670,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq,
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = family;
nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
+ nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_RULE_TABLE, table->name))
goto nla_put_failure;
@@ -1506,8 +1726,8 @@ static int nf_tables_rule_notify(const struct nft_ctx *ctx,
if (skb == NULL)
goto err;
- err = nf_tables_fill_rule_info(skb, ctx->portid, ctx->seq, event, 0,
- ctx->afi->family, ctx->table,
+ err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq,
+ event, 0, ctx->afi->family, ctx->table,
ctx->chain, rule);
if (err < 0) {
kfree_skb(skb);
@@ -1524,41 +1744,6 @@ err:
return err;
}
-static inline bool
-nft_rule_is_active(struct net *net, const struct nft_rule *rule)
-{
- return (rule->genmask & (1 << net->nft.gencursor)) == 0;
-}
-
-static inline int gencursor_next(struct net *net)
-{
- return net->nft.gencursor+1 == 1 ? 1 : 0;
-}
-
-static inline int
-nft_rule_is_active_next(struct net *net, const struct nft_rule *rule)
-{
- return (rule->genmask & (1 << gencursor_next(net))) == 0;
-}
-
-static inline void
-nft_rule_activate_next(struct net *net, struct nft_rule *rule)
-{
- /* Now inactive, will be active in the future */
- rule->genmask = (1 << net->nft.gencursor);
-}
-
-static inline void
-nft_rule_disactivate_next(struct net *net, struct nft_rule *rule)
-{
- rule->genmask = (1 << gencursor_next(net));
-}
-
-static inline void nft_rule_clear(struct net *net, struct nft_rule *rule)
-{
- rule->genmask = 0;
-}
-
static int nf_tables_dump_rules(struct sk_buff *skb,
struct netlink_callback *cb)
{
@@ -1588,7 +1773,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
if (idx > s_idx)
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
- if (nf_tables_fill_rule_info(skb, NETLINK_CB(cb->skb).portid,
+ if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFT_MSG_NEWRULE,
NLM_F_MULTI | NLM_F_APPEND,
@@ -1654,7 +1839,7 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb,
if (!skb2)
return -ENOMEM;
- err = nf_tables_fill_rule_info(skb2, NETLINK_CB(skb).portid,
+ err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid,
nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
family, table, chain, rule);
if (err < 0)
@@ -1684,21 +1869,6 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
kfree(rule);
}
-static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type,
- struct nft_rule *rule)
-{
- struct nft_trans *trans;
-
- trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_rule));
- if (trans == NULL)
- return NULL;
-
- nft_trans_rule(trans) = rule;
- list_add_tail(&trans->list, &ctx->net->nft.commit_list);
-
- return trans;
-}
-
#define NFT_RULE_MAXEXPRS 128
static struct nft_expr_info *info;
@@ -1820,7 +1990,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
err = -ENOMEM;
goto err2;
}
- nft_rule_disactivate_next(net, old_rule);
+ nft_rule_deactivate_next(net, old_rule);
chain->use--;
list_add_tail_rcu(&rule->list, &old_rule->list);
} else {
@@ -1864,33 +2034,6 @@ err1:
return err;
}
-static int
-nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule)
-{
- /* You cannot delete the same rule twice */
- if (nft_rule_is_active_next(ctx->net, rule)) {
- if (nft_trans_rule_add(ctx, NFT_MSG_DELRULE, rule) == NULL)
- return -ENOMEM;
- nft_rule_disactivate_next(ctx->net, rule);
- ctx->chain->use--;
- return 0;
- }
- return -ENOENT;
-}
-
-static int nf_table_delrule_by_chain(struct nft_ctx *ctx)
-{
- struct nft_rule *rule;
- int err;
-
- list_for_each_entry(rule, &ctx->chain->rules, list) {
- err = nf_tables_delrule_one(ctx, rule);
- if (err < 0)
- return err;
- }
- return 0;
-}
-
static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -1929,14 +2072,14 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
if (IS_ERR(rule))
return PTR_ERR(rule);
- err = nf_tables_delrule_one(&ctx, rule);
+ err = nft_delrule(&ctx, rule);
} else {
- err = nf_table_delrule_by_chain(&ctx);
+ err = nft_delrule_by_chain(&ctx);
}
} else {
list_for_each_entry(chain, &table->chains, list) {
ctx.chain = chain;
- err = nf_table_delrule_by_chain(&ctx);
+ err = nft_delrule_by_chain(&ctx);
if (err < 0)
break;
}
@@ -2180,7 +2323,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = ctx->afi->family;
nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
+ nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
goto nla_put_failure;
@@ -2201,6 +2344,11 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
goto nla_put_failure;
}
+ if (set->policy != NFT_SET_POL_PERFORMANCE) {
+ if (nla_put_be32(skb, NFTA_SET_POLICY, htonl(set->policy)))
+ goto nla_put_failure;
+ }
+
desc = nla_nest_start(skb, NFTA_SET_DESC);
if (desc == NULL)
goto nla_put_failure;
@@ -2247,80 +2395,7 @@ err:
return err;
}
-static int nf_tables_dump_sets_table(struct nft_ctx *ctx, struct sk_buff *skb,
- struct netlink_callback *cb)
-{
- const struct nft_set *set;
- unsigned int idx = 0, s_idx = cb->args[0];
-
- if (cb->args[1])
- return skb->len;
-
- rcu_read_lock();
- cb->seq = ctx->net->nft.base_seq;
-
- list_for_each_entry_rcu(set, &ctx->table->sets, list) {
- if (idx < s_idx)
- goto cont;
- if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET,
- NLM_F_MULTI) < 0) {
- cb->args[0] = idx;
- goto done;
- }
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-cont:
- idx++;
- }
- cb->args[1] = 1;
-done:
- rcu_read_unlock();
- return skb->len;
-}
-
-static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb,
- struct netlink_callback *cb)
-{
- const struct nft_set *set;
- unsigned int idx, s_idx = cb->args[0];
- struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2];
-
- if (cb->args[1])
- return skb->len;
-
- rcu_read_lock();
- cb->seq = ctx->net->nft.base_seq;
-
- list_for_each_entry_rcu(table, &ctx->afi->tables, list) {
- if (cur_table) {
- if (cur_table != table)
- continue;
-
- cur_table = NULL;
- }
- ctx->table = table;
- idx = 0;
- list_for_each_entry_rcu(set, &ctx->table->sets, list) {
- if (idx < s_idx)
- goto cont;
- if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET,
- NLM_F_MULTI) < 0) {
- cb->args[0] = idx;
- cb->args[2] = (unsigned long) table;
- goto done;
- }
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-cont:
- idx++;
- }
- }
- cb->args[1] = 1;
-done:
- rcu_read_unlock();
- return skb->len;
-}
-
-static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
- struct netlink_callback *cb)
+static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
{
const struct nft_set *set;
unsigned int idx, s_idx = cb->args[0];
@@ -2328,6 +2403,7 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2];
struct net *net = sock_net(skb->sk);
int cur_family = cb->args[3];
+ struct nft_ctx *ctx = cb->data, ctx_set;
if (cb->args[1])
return skb->len;
@@ -2336,28 +2412,34 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
cb->seq = net->nft.base_seq;
list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
+ if (ctx->afi && ctx->afi != afi)
+ continue;
+
if (cur_family) {
if (afi->family != cur_family)
continue;
cur_family = 0;
}
-
list_for_each_entry_rcu(table, &afi->tables, list) {
+ if (ctx->table && ctx->table != table)
+ continue;
+
if (cur_table) {
if (cur_table != table)
continue;
cur_table = NULL;
}
-
- ctx->table = table;
- ctx->afi = afi;
idx = 0;
- list_for_each_entry_rcu(set, &ctx->table->sets, list) {
+ list_for_each_entry_rcu(set, &table->sets, list) {
if (idx < s_idx)
goto cont;
- if (nf_tables_fill_set(skb, ctx, set,
+
+ ctx_set = *ctx;
+ ctx_set.table = table;
+ ctx_set.afi = afi;
+ if (nf_tables_fill_set(skb, &ctx_set, set,
NFT_MSG_NEWSET,
NLM_F_MULTI) < 0) {
cb->args[0] = idx;
@@ -2379,35 +2461,12 @@ done:
return skb->len;
}
-static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
+static int nf_tables_dump_sets_done(struct netlink_callback *cb)
{
- const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
- struct nlattr *nla[NFTA_SET_MAX + 1];
- struct nft_ctx ctx;
- int err, ret;
-
- err = nlmsg_parse(cb->nlh, sizeof(*nfmsg), nla, NFTA_SET_MAX,
- nft_set_policy);
- if (err < 0)
- return err;
-
- err = nft_ctx_init_from_setattr(&ctx, cb->skb, cb->nlh, (void *)nla);
- if (err < 0)
- return err;
-
- if (ctx.table == NULL) {
- if (ctx.afi == NULL)
- ret = nf_tables_dump_sets_all(&ctx, skb, cb);
- else
- ret = nf_tables_dump_sets_family(&ctx, skb, cb);
- } else
- ret = nf_tables_dump_sets_table(&ctx, skb, cb);
-
- return ret;
+ kfree(cb->data);
+ return 0;
}
-#define NFT_SET_INACTIVE (1 << 15) /* Internal set flag */
-
static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -2426,7 +2485,17 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = nf_tables_dump_sets,
+ .done = nf_tables_dump_sets_done,
};
+ struct nft_ctx *ctx_dump;
+
+ ctx_dump = kmalloc(sizeof(*ctx_dump), GFP_KERNEL);
+ if (ctx_dump == NULL)
+ return -ENOMEM;
+
+ *ctx_dump = ctx;
+ c.data = ctx_dump;
+
return netlink_dump_start(nlsk, skb, nlh, &c);
}
@@ -2472,26 +2541,6 @@ static int nf_tables_set_desc_parse(const struct nft_ctx *ctx,
return 0;
}
-static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
- struct nft_set *set)
-{
- struct nft_trans *trans;
-
- trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set));
- if (trans == NULL)
- return -ENOMEM;
-
- if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) {
- nft_trans_set_id(trans) =
- ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
- set->flags |= NFT_SET_INACTIVE;
- }
- nft_trans_set(trans) = set;
- list_add_tail(&trans->list, &ctx->net->nft.commit_list);
-
- return 0;
-}
-
static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -2625,6 +2674,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
set->dlen = desc.dlen;
set->flags = flags;
set->size = desc.size;
+ set->policy = policy;
err = ops->init(set, &desc, nla);
if (err < 0)
@@ -2685,13 +2735,7 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
if (!list_empty(&set->bindings))
return -EBUSY;
- err = nft_trans_set_add(&ctx, NFT_MSG_DELSET, set);
- if (err < 0)
- return err;
-
- list_del_rcu(&set->list);
- ctx.table->use--;
- return 0;
+ return nft_delset(&ctx, set);
}
static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
@@ -2889,7 +2933,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = ctx.afi->family;
nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
+ nfmsg->res_id = htons(ctx.net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, ctx.table->name))
goto nla_put_failure;
@@ -2970,7 +3014,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb,
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = ctx->afi->family;
nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
+ nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
goto nla_put_failure;
@@ -3150,6 +3194,9 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
struct nft_ctx ctx;
int rem, err = 0;
+ if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
+ return -EINVAL;
+
err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, true);
if (err < 0)
return err;
@@ -3208,16 +3255,14 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
goto err2;
trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set);
- if (trans == NULL)
+ if (trans == NULL) {
+ err = -ENOMEM;
goto err2;
+ }
nft_trans_elem(trans) = elem;
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
-
- nft_data_uninit(&elem.key, NFT_DATA_VALUE);
- if (set->flags & NFT_SET_MAP)
- nft_data_uninit(&elem.data, set->dtype);
-
+ return 0;
err2:
nft_data_uninit(&elem.key, desc.type);
err1:
@@ -3233,6 +3278,9 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
struct nft_ctx ctx;
int rem, err = 0;
+ if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
+ return -EINVAL;
+
err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false);
if (err < 0)
return err;
@@ -3253,6 +3301,87 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
return err;
}
+static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
+ u32 portid, u32 seq)
+{
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfmsg;
+ int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_NEWGEN;
+
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0);
+ if (nlh == NULL)
+ goto nla_put_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = AF_UNSPEC;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
+
+ if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)))
+ goto nla_put_failure;
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_trim(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int nf_tables_gen_notify(struct net *net, struct sk_buff *skb, int event)
+{
+ struct nlmsghdr *nlh = nlmsg_hdr(skb);
+ struct sk_buff *skb2;
+ int err;
+
+ if (nlmsg_report(nlh) &&
+ !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ return 0;
+
+ err = -ENOBUFS;
+ skb2 = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb2 == NULL)
+ goto err;
+
+ err = nf_tables_fill_gen_info(skb2, net, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq);
+ if (err < 0) {
+ kfree_skb(skb2);
+ goto err;
+ }
+
+ err = nfnetlink_send(skb2, net, NETLINK_CB(skb).portid,
+ NFNLGRP_NFTABLES, nlmsg_report(nlh), GFP_KERNEL);
+err:
+ if (err < 0) {
+ nfnetlink_set_err(net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES,
+ err);
+ }
+ return err;
+}
+
+static int nf_tables_getgen(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ struct net *net = sock_net(skb->sk);
+ struct sk_buff *skb2;
+ int err;
+
+ skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb2 == NULL)
+ return -ENOMEM;
+
+ err = nf_tables_fill_gen_info(skb2, net, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq);
+ if (err < 0)
+ goto err;
+
+ return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+err:
+ kfree_skb(skb2);
+ return err;
+}
+
static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
[NFT_MSG_NEWTABLE] = {
.call_batch = nf_tables_newtable,
@@ -3329,6 +3458,9 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.attr_count = NFTA_SET_ELEM_LIST_MAX,
.policy = nft_set_elem_list_policy,
},
+ [NFT_MSG_GETGEN] = {
+ .call = nf_tables_getgen,
+ },
};
static void nft_chain_commit_update(struct nft_trans *trans)
@@ -3380,7 +3512,7 @@ static int nf_tables_commit(struct sk_buff *skb)
{
struct net *net = sock_net(skb->sk);
struct nft_trans *trans, *next;
- struct nft_set *set;
+ struct nft_trans_elem *te;
/* Bump generation counter, invalidate any dump in progress */
while (++net->nft.base_seq == 0);
@@ -3422,11 +3554,9 @@ static int nf_tables_commit(struct sk_buff *skb)
break;
case NFT_MSG_DELCHAIN:
nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN);
- if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) &&
- trans->ctx.chain->flags & NFT_BASE_CHAIN) {
- nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops,
- trans->ctx.afi->nops);
- }
+ nf_tables_unregister_hooks(trans->ctx.table,
+ trans->ctx.chain,
+ trans->ctx.afi->nops);
break;
case NFT_MSG_NEWRULE:
nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
@@ -3466,13 +3596,17 @@ static int nf_tables_commit(struct sk_buff *skb)
nft_trans_destroy(trans);
break;
case NFT_MSG_DELSETELEM:
- nf_tables_setelem_notify(&trans->ctx,
- nft_trans_elem_set(trans),
- &nft_trans_elem(trans),
+ te = (struct nft_trans_elem *)trans->data;
+ nf_tables_setelem_notify(&trans->ctx, te->set,
+ &te->elem,
NFT_MSG_DELSETELEM, 0);
- set = nft_trans_elem_set(trans);
- set->ops->get(set, &nft_trans_elem(trans));
- set->ops->remove(set, &nft_trans_elem(trans));
+ te->set->ops->get(te->set, &te->elem);
+ te->set->ops->remove(te->set, &te->elem);
+ nft_data_uninit(&te->elem.key, NFT_DATA_VALUE);
+ if (te->elem.flags & NFT_SET_MAP) {
+ nft_data_uninit(&te->elem.data,
+ te->set->dtype);
+ }
nft_trans_destroy(trans);
break;
}
@@ -3484,6 +3618,8 @@ static int nf_tables_commit(struct sk_buff *skb)
call_rcu(&trans->rcu_head, nf_tables_commit_release_rcu);
}
+ nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
+
return 0;
}
@@ -3545,11 +3681,9 @@ static int nf_tables_abort(struct sk_buff *skb)
} else {
trans->ctx.table->use--;
list_del_rcu(&trans->ctx.chain->list);
- if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) &&
- trans->ctx.chain->flags & NFT_BASE_CHAIN) {
- nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops,
- trans->ctx.afi->nops);
- }
+ nf_tables_unregister_hooks(trans->ctx.table,
+ trans->ctx.chain,
+ trans->ctx.afi->nops);
}
break;
case NFT_MSG_DELCHAIN:
@@ -4029,6 +4163,7 @@ static void __exit nf_tables_module_exit(void)
{
unregister_pernet_subsys(&nf_tables_net_ops);
nfnetlink_subsys_unregister(&nf_tables_subsys);
+ rcu_barrier();
nf_tables_core_module_exit();
kfree(info);
}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index c138b8fbe280..6c5a915cfa75 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -222,6 +222,51 @@ replay:
}
}
+struct nfnl_err {
+ struct list_head head;
+ struct nlmsghdr *nlh;
+ int err;
+};
+
+static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err)
+{
+ struct nfnl_err *nfnl_err;
+
+ nfnl_err = kmalloc(sizeof(struct nfnl_err), GFP_KERNEL);
+ if (nfnl_err == NULL)
+ return -ENOMEM;
+
+ nfnl_err->nlh = nlh;
+ nfnl_err->err = err;
+ list_add_tail(&nfnl_err->head, list);
+
+ return 0;
+}
+
+static void nfnl_err_del(struct nfnl_err *nfnl_err)
+{
+ list_del(&nfnl_err->head);
+ kfree(nfnl_err);
+}
+
+static void nfnl_err_reset(struct list_head *err_list)
+{
+ struct nfnl_err *nfnl_err, *next;
+
+ list_for_each_entry_safe(nfnl_err, next, err_list, head)
+ nfnl_err_del(nfnl_err);
+}
+
+static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb)
+{
+ struct nfnl_err *nfnl_err, *next;
+
+ list_for_each_entry_safe(nfnl_err, next, err_list, head) {
+ netlink_ack(skb, nfnl_err->nlh, nfnl_err->err);
+ nfnl_err_del(nfnl_err);
+ }
+}
+
static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
u_int16_t subsys_id)
{
@@ -230,6 +275,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
const struct nfnetlink_subsystem *ss;
const struct nfnl_callback *nc;
bool success = true, done = false;
+ static LIST_HEAD(err_list);
int err;
if (subsys_id >= NFNL_SUBSYS_COUNT)
@@ -287,6 +333,7 @@ replay:
type = nlh->nlmsg_type;
if (type == NFNL_MSG_BATCH_BEGIN) {
/* Malformed: Batch begin twice */
+ nfnl_err_reset(&err_list);
success = false;
goto done;
} else if (type == NFNL_MSG_BATCH_END) {
@@ -333,7 +380,8 @@ replay:
* original skb.
*/
if (err == -EAGAIN) {
- ss->abort(skb);
+ nfnl_err_reset(&err_list);
+ ss->abort(oskb);
nfnl_unlock(subsys_id);
kfree_skb(nskb);
goto replay;
@@ -341,11 +389,24 @@ replay:
}
ack:
if (nlh->nlmsg_flags & NLM_F_ACK || err) {
+ /* Errors are delivered once the full batch has been
+ * processed, this avoids that the same error is
+ * reported several times when replaying the batch.
+ */
+ if (nfnl_err_add(&err_list, nlh, err) < 0) {
+ /* We failed to enqueue an error, reset the
+ * list of errors and send OOM to userspace
+ * pointing to the batch header.
+ */
+ nfnl_err_reset(&err_list);
+ netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM);
+ success = false;
+ goto done;
+ }
/* We don't stop processing the batch on errors, thus,
* userspace gets all the errors that the batch
* triggers.
*/
- netlink_ack(skb, nlh, err);
if (err)
success = false;
}
@@ -357,10 +418,11 @@ ack:
}
done:
if (success && done)
- ss->commit(skb);
+ ss->commit(oskb);
else
- ss->abort(skb);
+ ss->abort(oskb);
+ nfnl_err_deliver(&err_list, oskb);
nfnl_unlock(subsys_id);
kfree_skb(nskb);
}
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 2baa125c2e8d..c18af2f63eef 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -40,7 +40,13 @@ struct nf_acct {
char data[0];
};
+struct nfacct_filter {
+ u32 value;
+ u32 mask;
+};
+
#define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES)
+#define NFACCT_OVERQUOTA_BIT 2 /* NFACCT_F_OVERQUOTA */
static int
nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
@@ -77,7 +83,8 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
smp_mb__before_atomic();
/* reset overquota flag if quota is enabled. */
if ((matching->flags & NFACCT_F_QUOTA))
- clear_bit(NFACCT_F_OVERQUOTA, &matching->flags);
+ clear_bit(NFACCT_OVERQUOTA_BIT,
+ &matching->flags);
return 0;
}
return -EBUSY;
@@ -129,6 +136,7 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0;
u64 pkts, bytes;
+ u32 old_flags;
event |= NFNL_SUBSYS_ACCT << 8;
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
@@ -143,12 +151,13 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
if (nla_put_string(skb, NFACCT_NAME, acct->name))
goto nla_put_failure;
+ old_flags = acct->flags;
if (type == NFNL_MSG_ACCT_GET_CTRZERO) {
pkts = atomic64_xchg(&acct->pkts, 0);
bytes = atomic64_xchg(&acct->bytes, 0);
smp_mb__before_atomic();
if (acct->flags & NFACCT_F_QUOTA)
- clear_bit(NFACCT_F_OVERQUOTA, &acct->flags);
+ clear_bit(NFACCT_OVERQUOTA_BIT, &acct->flags);
} else {
pkts = atomic64_read(&acct->pkts);
bytes = atomic64_read(&acct->bytes);
@@ -160,7 +169,7 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
if (acct->flags & NFACCT_F_QUOTA) {
u64 *quota = (u64 *)acct->data;
- if (nla_put_be32(skb, NFACCT_FLAGS, htonl(acct->flags)) ||
+ if (nla_put_be32(skb, NFACCT_FLAGS, htonl(old_flags)) ||
nla_put_be64(skb, NFACCT_QUOTA, cpu_to_be64(*quota)))
goto nla_put_failure;
}
@@ -177,6 +186,7 @@ static int
nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
struct nf_acct *cur, *last;
+ const struct nfacct_filter *filter = cb->data;
if (cb->args[2])
return 0;
@@ -193,6 +203,10 @@ nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
last = NULL;
}
+
+ if (filter && (cur->flags & filter->mask) != filter->value)
+ continue;
+
if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
@@ -207,6 +221,38 @@ nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
+static int nfnl_acct_done(struct netlink_callback *cb)
+{
+ kfree(cb->data);
+ return 0;
+}
+
+static const struct nla_policy filter_policy[NFACCT_FILTER_MAX + 1] = {
+ [NFACCT_FILTER_MASK] = { .type = NLA_U32 },
+ [NFACCT_FILTER_VALUE] = { .type = NLA_U32 },
+};
+
+static struct nfacct_filter *
+nfacct_filter_alloc(const struct nlattr * const attr)
+{
+ struct nfacct_filter *filter;
+ struct nlattr *tb[NFACCT_FILTER_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(tb, NFACCT_FILTER_MAX, attr, filter_policy);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ filter = kzalloc(sizeof(struct nfacct_filter), GFP_KERNEL);
+ if (!filter)
+ return ERR_PTR(-ENOMEM);
+
+ filter->mask = ntohl(nla_get_be32(tb[NFACCT_FILTER_MASK]));
+ filter->value = ntohl(nla_get_be32(tb[NFACCT_FILTER_VALUE]));
+
+ return filter;
+}
+
static int
nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
const struct nlmsghdr *nlh, const struct nlattr * const tb[])
@@ -218,7 +264,18 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = nfnl_acct_dump,
+ .done = nfnl_acct_done,
};
+
+ if (tb[NFACCT_FILTER]) {
+ struct nfacct_filter *filter;
+
+ filter = nfacct_filter_alloc(tb[NFACCT_FILTER]);
+ if (IS_ERR(filter))
+ return PTR_ERR(filter);
+
+ c.data = filter;
+ }
return netlink_dump_start(nfnl, skb, nlh, &c);
}
@@ -310,6 +367,7 @@ static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = {
[NFACCT_PKTS] = { .type = NLA_U64 },
[NFACCT_FLAGS] = { .type = NLA_U32 },
[NFACCT_QUOTA] = { .type = NLA_U64 },
+ [NFACCT_FILTER] = {.type = NLA_NESTED },
};
static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
@@ -412,7 +470,7 @@ int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct)
ret = now > *quota;
if (now >= *quota &&
- !test_and_set_bit(NFACCT_F_OVERQUOTA, &nfacct->flags)) {
+ !test_and_set_bit(NFACCT_OVERQUOTA_BIT, &nfacct->flags)) {
nfnl_overquota_report(nfacct);
}
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index d292c8d286eb..b1e3a0579416 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -36,7 +36,7 @@
#include <linux/atomic.h>
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
#include "../bridge/br_private.h"
#endif
@@ -429,7 +429,7 @@ __build_packet_message(struct nfnl_log_net *log,
goto nla_put_failure;
if (indev) {
-#ifndef CONFIG_BRIDGE_NETFILTER
+#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV,
htonl(indev->ifindex)))
goto nla_put_failure;
@@ -460,7 +460,7 @@ __build_packet_message(struct nfnl_log_net *log,
}
if (outdev) {
-#ifndef CONFIG_BRIDGE_NETFILTER
+#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
htonl(outdev->ifindex)))
goto nla_put_failure;
@@ -640,7 +640,7 @@ nfulnl_log_packet(struct net *net,
+ nla_total_size(sizeof(struct nfulnl_msg_packet_hdr))
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
#endif
@@ -773,6 +773,7 @@ nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
static struct nf_logger nfulnl_logger __read_mostly = {
.name = "nfnetlink_log",
+ .type = NF_LOG_TYPE_ULOG,
.logfn = &nfulnl_log_packet,
.me = THIS_MODULE,
};
@@ -1105,6 +1106,9 @@ MODULE_DESCRIPTION("netfilter userspace logging");
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_LICENSE("GPL");
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ULOG);
+MODULE_ALIAS_NF_LOGGER(AF_INET, 1);
+MODULE_ALIAS_NF_LOGGER(AF_INET6, 1);
+MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 1);
module_init(nfnetlink_log_init);
module_exit(nfnetlink_log_fini);
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 108120f216b1..a82077d9f59b 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -36,7 +36,7 @@
#include <linux/atomic.h>
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
#include "../bridge/br_private.h"
#endif
@@ -302,7 +302,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
+ nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
#endif
@@ -380,7 +380,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
indev = entry->indev;
if (indev) {
-#ifndef CONFIG_BRIDGE_NETFILTER
+#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (nla_put_be32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex)))
goto nla_put_failure;
#else
@@ -410,7 +410,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
}
if (outdev) {
-#ifndef CONFIG_BRIDGE_NETFILTER
+#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex)))
goto nla_put_failure;
#else
@@ -569,7 +569,7 @@ nf_queue_entry_dup(struct nf_queue_entry *e)
return NULL;
}
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
/* When called from bridge netfilter, skb->data must point to MAC header
* before calling skb_gso_segment(). Else, original MAC header is lost
* and segmented skbs will be sent to wrong destination.
@@ -763,7 +763,7 @@ dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
if (entry->outdev)
if (entry->outdev->ifindex == ifindex)
return 1;
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (entry->skb->nf_bridge) {
if (entry->skb->nf_bridge->physindev &&
entry->skb->nf_bridge->physindev->ifindex == ifindex)
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 1840989092ed..7e2683c8a44a 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -101,26 +101,12 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
static void target_compat_from_user(struct xt_target *t, void *in, void *out)
{
-#ifdef CONFIG_COMPAT
- if (t->compat_from_user) {
- int pad;
-
- t->compat_from_user(out, in);
- pad = XT_ALIGN(t->targetsize) - t->targetsize;
- if (pad > 0)
- memset(out + t->targetsize, 0, pad);
- } else
-#endif
- memcpy(out, in, XT_ALIGN(t->targetsize));
-}
+ int pad;
-static inline int nft_compat_target_offset(struct xt_target *target)
-{
-#ifdef CONFIG_COMPAT
- return xt_compat_target_offset(target);
-#else
- return 0;
-#endif
+ memcpy(out, in, t->targetsize);
+ pad = XT_ALIGN(t->targetsize) - t->targetsize;
+ if (pad > 0)
+ memset(out + t->targetsize, 0, pad);
}
static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] = {
@@ -208,34 +194,6 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
module_put(target->me);
}
-static int
-target_dump_info(struct sk_buff *skb, const struct xt_target *t, const void *in)
-{
- int ret;
-
-#ifdef CONFIG_COMPAT
- if (t->compat_to_user) {
- mm_segment_t old_fs;
- void *out;
-
- out = kmalloc(XT_ALIGN(t->targetsize), GFP_ATOMIC);
- if (out == NULL)
- return -ENOMEM;
-
- /* We want to reuse existing compat_to_user */
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- t->compat_to_user(out, in);
- set_fs(old_fs);
- ret = nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(t->targetsize), out);
- kfree(out);
- } else
-#endif
- ret = nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(t->targetsize), in);
-
- return ret;
-}
-
static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct xt_target *target = expr->ops->data;
@@ -243,7 +201,7 @@ static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr)
if (nla_put_string(skb, NFTA_TARGET_NAME, target->name) ||
nla_put_be32(skb, NFTA_TARGET_REV, htonl(target->revision)) ||
- target_dump_info(skb, target, info))
+ nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(target->targetsize), info))
goto nla_put_failure;
return 0;
@@ -341,17 +299,12 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
static void match_compat_from_user(struct xt_match *m, void *in, void *out)
{
-#ifdef CONFIG_COMPAT
- if (m->compat_from_user) {
- int pad;
-
- m->compat_from_user(out, in);
- pad = XT_ALIGN(m->matchsize) - m->matchsize;
- if (pad > 0)
- memset(out + m->matchsize, 0, pad);
- } else
-#endif
- memcpy(out, in, XT_ALIGN(m->matchsize));
+ int pad;
+
+ memcpy(out, in, m->matchsize);
+ pad = XT_ALIGN(m->matchsize) - m->matchsize;
+ if (pad > 0)
+ memset(out + m->matchsize, 0, pad);
}
static int
@@ -404,43 +357,6 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
module_put(match->me);
}
-static int
-match_dump_info(struct sk_buff *skb, const struct xt_match *m, const void *in)
-{
- int ret;
-
-#ifdef CONFIG_COMPAT
- if (m->compat_to_user) {
- mm_segment_t old_fs;
- void *out;
-
- out = kmalloc(XT_ALIGN(m->matchsize), GFP_ATOMIC);
- if (out == NULL)
- return -ENOMEM;
-
- /* We want to reuse existing compat_to_user */
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- m->compat_to_user(out, in);
- set_fs(old_fs);
- ret = nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(m->matchsize), out);
- kfree(out);
- } else
-#endif
- ret = nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(m->matchsize), in);
-
- return ret;
-}
-
-static inline int nft_compat_match_offset(struct xt_match *match)
-{
-#ifdef CONFIG_COMPAT
- return xt_compat_match_offset(match);
-#else
- return 0;
-#endif
-}
-
static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
void *info = nft_expr_priv(expr);
@@ -448,7 +364,7 @@ static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr)
if (nla_put_string(skb, NFTA_MATCH_NAME, match->name) ||
nla_put_be32(skb, NFTA_MATCH_REV, htonl(match->revision)) ||
- match_dump_info(skb, match, info))
+ nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(match->matchsize), info))
goto nla_put_failure;
return 0;
@@ -643,8 +559,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
return ERR_PTR(-ENOMEM);
nft_match->ops.type = &nft_match_type;
- nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize) +
- nft_compat_match_offset(match));
+ nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize));
nft_match->ops.eval = nft_match_eval;
nft_match->ops.init = nft_match_init;
nft_match->ops.destroy = nft_match_destroy;
@@ -714,8 +629,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
return ERR_PTR(-ENOMEM);
nft_target->ops.type = &nft_target_type;
- nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize) +
- nft_compat_target_offset(target));
+ nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
nft_target->ops.eval = nft_target_eval;
nft_target->ops.init = nft_target_init;
nft_target->ops.destroy = nft_target_destroy;
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 4080ed6a072b..8892b7b6184a 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -15,209 +15,40 @@
#include <linux/log2.h>
#include <linux/jhash.h>
#include <linux/netlink.h>
-#include <linux/vmalloc.h>
+#include <linux/rhashtable.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
-#define NFT_HASH_MIN_SIZE 4UL
-
-struct nft_hash {
- struct nft_hash_table __rcu *tbl;
-};
-
-struct nft_hash_table {
- unsigned int size;
- struct nft_hash_elem __rcu *buckets[];
-};
+/* We target a hash table size of 4, element hint is 75% of final size */
+#define NFT_HASH_ELEMENT_HINT 3
struct nft_hash_elem {
- struct nft_hash_elem __rcu *next;
+ struct rhash_head node;
struct nft_data key;
struct nft_data data[];
};
-#define nft_hash_for_each_entry(i, head) \
- for (i = nft_dereference(head); i != NULL; i = nft_dereference(i->next))
-#define nft_hash_for_each_entry_rcu(i, head) \
- for (i = rcu_dereference(head); i != NULL; i = rcu_dereference(i->next))
-
-static u32 nft_hash_rnd __read_mostly;
-static bool nft_hash_rnd_initted __read_mostly;
-
-static unsigned int nft_hash_data(const struct nft_data *data,
- unsigned int hsize, unsigned int len)
-{
- unsigned int h;
-
- h = jhash(data->data, len, nft_hash_rnd);
- return h & (hsize - 1);
-}
-
static bool nft_hash_lookup(const struct nft_set *set,
const struct nft_data *key,
struct nft_data *data)
{
- const struct nft_hash *priv = nft_set_priv(set);
- const struct nft_hash_table *tbl = rcu_dereference(priv->tbl);
+ const struct rhashtable *priv = nft_set_priv(set);
const struct nft_hash_elem *he;
- unsigned int h;
-
- h = nft_hash_data(key, tbl->size, set->klen);
- nft_hash_for_each_entry_rcu(he, tbl->buckets[h]) {
- if (nft_data_cmp(&he->key, key, set->klen))
- continue;
- if (set->flags & NFT_SET_MAP)
- nft_data_copy(data, he->data);
- return true;
- }
- return false;
-}
-
-static void nft_hash_tbl_free(const struct nft_hash_table *tbl)
-{
- kvfree(tbl);
-}
-
-static unsigned int nft_hash_tbl_size(unsigned int nelem)
-{
- return max(roundup_pow_of_two(nelem * 4 / 3), NFT_HASH_MIN_SIZE);
-}
-
-static struct nft_hash_table *nft_hash_tbl_alloc(unsigned int nbuckets)
-{
- struct nft_hash_table *tbl;
- size_t size;
-
- size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
- tbl = kzalloc(size, GFP_KERNEL | __GFP_REPEAT | __GFP_NOWARN);
- if (tbl == NULL)
- tbl = vzalloc(size);
- if (tbl == NULL)
- return NULL;
- tbl->size = nbuckets;
-
- return tbl;
-}
-
-static void nft_hash_chain_unzip(const struct nft_set *set,
- const struct nft_hash_table *ntbl,
- struct nft_hash_table *tbl, unsigned int n)
-{
- struct nft_hash_elem *he, *last, *next;
- unsigned int h;
-
- he = nft_dereference(tbl->buckets[n]);
- if (he == NULL)
- return;
- h = nft_hash_data(&he->key, ntbl->size, set->klen);
-
- /* Find last element of first chain hashing to bucket h */
- last = he;
- nft_hash_for_each_entry(he, he->next) {
- if (nft_hash_data(&he->key, ntbl->size, set->klen) != h)
- break;
- last = he;
- }
- /* Unlink first chain from the old table */
- RCU_INIT_POINTER(tbl->buckets[n], last->next);
+ he = rhashtable_lookup(priv, key);
+ if (he && set->flags & NFT_SET_MAP)
+ nft_data_copy(data, he->data);
- /* If end of chain reached, done */
- if (he == NULL)
- return;
-
- /* Find first element of second chain hashing to bucket h */
- next = NULL;
- nft_hash_for_each_entry(he, he->next) {
- if (nft_hash_data(&he->key, ntbl->size, set->klen) != h)
- continue;
- next = he;
- break;
- }
-
- /* Link the two chains */
- RCU_INIT_POINTER(last->next, next);
-}
-
-static int nft_hash_tbl_expand(const struct nft_set *set, struct nft_hash *priv)
-{
- struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl;
- struct nft_hash_elem *he;
- unsigned int i, h;
- bool complete;
-
- ntbl = nft_hash_tbl_alloc(tbl->size * 2);
- if (ntbl == NULL)
- return -ENOMEM;
-
- /* Link new table's buckets to first element in the old table
- * hashing to the new bucket.
- */
- for (i = 0; i < ntbl->size; i++) {
- h = i < tbl->size ? i : i - tbl->size;
- nft_hash_for_each_entry(he, tbl->buckets[h]) {
- if (nft_hash_data(&he->key, ntbl->size, set->klen) != i)
- continue;
- RCU_INIT_POINTER(ntbl->buckets[i], he);
- break;
- }
- }
-
- /* Publish new table */
- rcu_assign_pointer(priv->tbl, ntbl);
-
- /* Unzip interleaved hash chains */
- do {
- /* Wait for readers to use new table/unzipped chains */
- synchronize_rcu();
-
- complete = true;
- for (i = 0; i < tbl->size; i++) {
- nft_hash_chain_unzip(set, ntbl, tbl, i);
- if (tbl->buckets[i] != NULL)
- complete = false;
- }
- } while (!complete);
-
- nft_hash_tbl_free(tbl);
- return 0;
-}
-
-static int nft_hash_tbl_shrink(const struct nft_set *set, struct nft_hash *priv)
-{
- struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl;
- struct nft_hash_elem __rcu **pprev;
- unsigned int i;
-
- ntbl = nft_hash_tbl_alloc(tbl->size / 2);
- if (ntbl == NULL)
- return -ENOMEM;
-
- for (i = 0; i < ntbl->size; i++) {
- ntbl->buckets[i] = tbl->buckets[i];
-
- for (pprev = &ntbl->buckets[i]; *pprev != NULL;
- pprev = &nft_dereference(*pprev)->next)
- ;
- RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]);
- }
-
- /* Publish new table */
- rcu_assign_pointer(priv->tbl, ntbl);
- synchronize_rcu();
-
- nft_hash_tbl_free(tbl);
- return 0;
+ return !!he;
}
static int nft_hash_insert(const struct nft_set *set,
const struct nft_set_elem *elem)
{
- struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_table *tbl = nft_dereference(priv->tbl);
+ struct rhashtable *priv = nft_set_priv(set);
struct nft_hash_elem *he;
- unsigned int size, h;
+ unsigned int size;
if (elem->flags != 0)
return -EINVAL;
@@ -234,13 +65,7 @@ static int nft_hash_insert(const struct nft_set *set,
if (set->flags & NFT_SET_MAP)
nft_data_copy(he->data, &elem->data);
- h = nft_hash_data(&he->key, tbl->size, set->klen);
- RCU_INIT_POINTER(he->next, tbl->buckets[h]);
- rcu_assign_pointer(tbl->buckets[h], he);
-
- /* Expand table when exceeding 75% load */
- if (set->nelems + 1 > tbl->size / 4 * 3)
- nft_hash_tbl_expand(set, priv);
+ rhashtable_insert(priv, &he->node, GFP_KERNEL);
return 0;
}
@@ -257,36 +82,31 @@ static void nft_hash_elem_destroy(const struct nft_set *set,
static void nft_hash_remove(const struct nft_set *set,
const struct nft_set_elem *elem)
{
- struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_table *tbl = nft_dereference(priv->tbl);
- struct nft_hash_elem *he, __rcu **pprev;
+ struct rhashtable *priv = nft_set_priv(set);
+ struct rhash_head *he, __rcu **pprev;
pprev = elem->cookie;
- he = nft_dereference((*pprev));
+ he = rht_dereference((*pprev), priv);
+
+ rhashtable_remove_pprev(priv, he, pprev, GFP_KERNEL);
- RCU_INIT_POINTER(*pprev, he->next);
synchronize_rcu();
kfree(he);
-
- /* Shrink table beneath 30% load */
- if (set->nelems - 1 < tbl->size * 3 / 10 &&
- tbl->size > NFT_HASH_MIN_SIZE)
- nft_hash_tbl_shrink(set, priv);
}
static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
{
- const struct nft_hash *priv = nft_set_priv(set);
- const struct nft_hash_table *tbl = nft_dereference(priv->tbl);
- struct nft_hash_elem __rcu * const *pprev;
+ const struct rhashtable *priv = nft_set_priv(set);
+ const struct bucket_table *tbl = rht_dereference_rcu(priv->tbl, priv);
+ struct rhash_head __rcu * const *pprev;
struct nft_hash_elem *he;
- unsigned int h;
+ u32 h;
- h = nft_hash_data(&elem->key, tbl->size, set->klen);
+ h = rhashtable_hashfn(priv, &elem->key, set->klen);
pprev = &tbl->buckets[h];
- nft_hash_for_each_entry(he, tbl->buckets[h]) {
+ rht_for_each_entry_rcu(he, tbl->buckets[h], node) {
if (nft_data_cmp(&he->key, &elem->key, set->klen)) {
- pprev = &he->next;
+ pprev = &he->node.next;
continue;
}
@@ -302,14 +122,15 @@ static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
struct nft_set_iter *iter)
{
- const struct nft_hash *priv = nft_set_priv(set);
- const struct nft_hash_table *tbl = nft_dereference(priv->tbl);
+ const struct rhashtable *priv = nft_set_priv(set);
+ const struct bucket_table *tbl;
const struct nft_hash_elem *he;
struct nft_set_elem elem;
unsigned int i;
+ tbl = rht_dereference_rcu(priv->tbl, priv);
for (i = 0; i < tbl->size; i++) {
- nft_hash_for_each_entry(he, tbl->buckets[i]) {
+ rht_for_each_entry_rcu(he, tbl->buckets[i], node) {
if (iter->count < iter->skip)
goto cont;
@@ -329,48 +150,48 @@ cont:
static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
{
- return sizeof(struct nft_hash);
+ return sizeof(struct rhashtable);
+}
+
+static int lockdep_nfnl_lock_is_held(void)
+{
+ return lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES);
}
static int nft_hash_init(const struct nft_set *set,
const struct nft_set_desc *desc,
const struct nlattr * const tb[])
{
- struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_table *tbl;
- unsigned int size;
+ struct rhashtable *priv = nft_set_priv(set);
+ struct rhashtable_params params = {
+ .nelem_hint = desc->size ? : NFT_HASH_ELEMENT_HINT,
+ .head_offset = offsetof(struct nft_hash_elem, node),
+ .key_offset = offsetof(struct nft_hash_elem, key),
+ .key_len = set->klen,
+ .hashfn = jhash,
+ .grow_decision = rht_grow_above_75,
+ .shrink_decision = rht_shrink_below_30,
+ .mutex_is_held = lockdep_nfnl_lock_is_held,
+ };
- if (unlikely(!nft_hash_rnd_initted)) {
- get_random_bytes(&nft_hash_rnd, 4);
- nft_hash_rnd_initted = true;
- }
-
- size = NFT_HASH_MIN_SIZE;
- if (desc->size)
- size = nft_hash_tbl_size(desc->size);
-
- tbl = nft_hash_tbl_alloc(size);
- if (tbl == NULL)
- return -ENOMEM;
- RCU_INIT_POINTER(priv->tbl, tbl);
- return 0;
+ return rhashtable_init(priv, &params);
}
static void nft_hash_destroy(const struct nft_set *set)
{
- const struct nft_hash *priv = nft_set_priv(set);
- const struct nft_hash_table *tbl = nft_dereference(priv->tbl);
+ const struct rhashtable *priv = nft_set_priv(set);
+ const struct bucket_table *tbl = priv->tbl;
struct nft_hash_elem *he, *next;
unsigned int i;
for (i = 0; i < tbl->size; i++) {
- for (he = nft_dereference(tbl->buckets[i]); he != NULL;
- he = next) {
- next = nft_dereference(he->next);
+ for (he = rht_entry(tbl->buckets[i], struct nft_hash_elem, node);
+ he != NULL; he = next) {
+ next = rht_entry(he->node.next, struct nft_hash_elem, node);
nft_hash_elem_destroy(set, he);
}
}
- kfree(tbl);
+ rhashtable_destroy(priv);
}
static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
@@ -383,8 +204,8 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]);
if (desc->size) {
- est->size = sizeof(struct nft_hash) +
- nft_hash_tbl_size(desc->size) *
+ est->size = sizeof(struct rhashtable) +
+ roundup_pow_of_two(desc->size * 4 / 3) *
sizeof(struct nft_hash_elem *) +
desc->size * esize;
} else {
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 10cfb156cdf4..bde05f28cf14 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012-2014 Pablo Neira Ayuso <pablo@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -41,6 +42,8 @@ static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = {
[NFTA_LOG_PREFIX] = { .type = NLA_STRING },
[NFTA_LOG_SNAPLEN] = { .type = NLA_U32 },
[NFTA_LOG_QTHRESHOLD] = { .type = NLA_U16 },
+ [NFTA_LOG_LEVEL] = { .type = NLA_U32 },
+ [NFTA_LOG_FLAGS] = { .type = NLA_U32 },
};
static int nft_log_init(const struct nft_ctx *ctx,
@@ -50,6 +53,7 @@ static int nft_log_init(const struct nft_ctx *ctx,
struct nft_log *priv = nft_expr_priv(expr);
struct nf_loginfo *li = &priv->loginfo;
const struct nlattr *nla;
+ int ret;
nla = tb[NFTA_LOG_PREFIX];
if (nla != NULL) {
@@ -57,30 +61,74 @@ static int nft_log_init(const struct nft_ctx *ctx,
if (priv->prefix == NULL)
return -ENOMEM;
nla_strlcpy(priv->prefix, nla, nla_len(nla) + 1);
- } else
+ } else {
priv->prefix = (char *)nft_log_null_prefix;
+ }
- li->type = NF_LOG_TYPE_ULOG;
+ li->type = NF_LOG_TYPE_LOG;
+ if (tb[NFTA_LOG_LEVEL] != NULL &&
+ tb[NFTA_LOG_GROUP] != NULL)
+ return -EINVAL;
if (tb[NFTA_LOG_GROUP] != NULL)
+ li->type = NF_LOG_TYPE_ULOG;
+
+ switch (li->type) {
+ case NF_LOG_TYPE_LOG:
+ if (tb[NFTA_LOG_LEVEL] != NULL) {
+ li->u.log.level =
+ ntohl(nla_get_be32(tb[NFTA_LOG_LEVEL]));
+ } else {
+ li->u.log.level = 4;
+ }
+ if (tb[NFTA_LOG_FLAGS] != NULL) {
+ li->u.log.logflags =
+ ntohl(nla_get_be32(tb[NFTA_LOG_FLAGS]));
+ }
+ break;
+ case NF_LOG_TYPE_ULOG:
li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP]));
+ if (tb[NFTA_LOG_SNAPLEN] != NULL) {
+ li->u.ulog.copy_len =
+ ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN]));
+ }
+ if (tb[NFTA_LOG_QTHRESHOLD] != NULL) {
+ li->u.ulog.qthreshold =
+ ntohs(nla_get_be16(tb[NFTA_LOG_QTHRESHOLD]));
+ }
+ break;
+ }
- if (tb[NFTA_LOG_SNAPLEN] != NULL)
- li->u.ulog.copy_len = ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN]));
- if (tb[NFTA_LOG_QTHRESHOLD] != NULL) {
- li->u.ulog.qthreshold =
- ntohs(nla_get_be16(tb[NFTA_LOG_QTHRESHOLD]));
+ if (ctx->afi->family == NFPROTO_INET) {
+ ret = nf_logger_find_get(NFPROTO_IPV4, li->type);
+ if (ret < 0)
+ return ret;
+
+ ret = nf_logger_find_get(NFPROTO_IPV6, li->type);
+ if (ret < 0) {
+ nf_logger_put(NFPROTO_IPV4, li->type);
+ return ret;
+ }
+ return 0;
}
- return 0;
+ return nf_logger_find_get(ctx->afi->family, li->type);
}
static void nft_log_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
struct nft_log *priv = nft_expr_priv(expr);
+ struct nf_loginfo *li = &priv->loginfo;
if (priv->prefix != nft_log_null_prefix)
kfree(priv->prefix);
+
+ if (ctx->afi->family == NFPROTO_INET) {
+ nf_logger_put(NFPROTO_IPV4, li->type);
+ nf_logger_put(NFPROTO_IPV6, li->type);
+ } else {
+ nf_logger_put(ctx->afi->family, li->type);
+ }
}
static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -91,17 +139,33 @@ static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr)
if (priv->prefix != nft_log_null_prefix)
if (nla_put_string(skb, NFTA_LOG_PREFIX, priv->prefix))
goto nla_put_failure;
- if (li->u.ulog.group)
- if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group)))
- goto nla_put_failure;
- if (li->u.ulog.copy_len)
- if (nla_put_be32(skb, NFTA_LOG_SNAPLEN,
- htonl(li->u.ulog.copy_len)))
+ switch (li->type) {
+ case NF_LOG_TYPE_LOG:
+ if (nla_put_be32(skb, NFTA_LOG_LEVEL, htonl(li->u.log.level)))
goto nla_put_failure;
- if (li->u.ulog.qthreshold)
- if (nla_put_be16(skb, NFTA_LOG_QTHRESHOLD,
- htons(li->u.ulog.qthreshold)))
+
+ if (li->u.log.logflags) {
+ if (nla_put_be32(skb, NFTA_LOG_FLAGS,
+ htonl(li->u.log.logflags)))
+ goto nla_put_failure;
+ }
+ break;
+ case NF_LOG_TYPE_ULOG:
+ if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group)))
goto nla_put_failure;
+
+ if (li->u.ulog.copy_len) {
+ if (nla_put_be32(skb, NFTA_LOG_SNAPLEN,
+ htonl(li->u.ulog.copy_len)))
+ goto nla_put_failure;
+ }
+ if (li->u.ulog.qthreshold) {
+ if (nla_put_be16(skb, NFTA_LOG_QTHRESHOLD,
+ htons(li->u.ulog.qthreshold)))
+ goto nla_put_failure;
+ }
+ break;
+ }
return 0;
nla_put_failure:
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
new file mode 100644
index 000000000000..6637bab00567
--- /dev/null
+++ b/net/netfilter/nft_masq.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nft_masq.h>
+
+const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = {
+ [NFTA_MASQ_FLAGS] = { .type = NLA_U32 },
+};
+EXPORT_SYMBOL_GPL(nft_masq_policy);
+
+int nft_masq_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_masq *priv = nft_expr_priv(expr);
+
+ if (tb[NFTA_MASQ_FLAGS] == NULL)
+ return 0;
+
+ priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS]));
+ if (priv->flags & ~NF_NAT_RANGE_MASK)
+ return -EINVAL;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_masq_init);
+
+int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_masq *priv = nft_expr_priv(expr);
+
+ if (priv->flags == 0)
+ return 0;
+
+ if (nla_put_be32(skb, NFTA_MASQ_FLAGS, htonl(priv->flags)))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+EXPORT_SYMBOL_GPL(nft_masq_dump);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 852b178c6ae7..1e7c076ca63a 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -14,6 +14,10 @@
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/smp.h>
#include <net/dst.h>
#include <net/sock.h>
#include <net/tcp_states.h> /* for TCP_TIME_WAIT */
@@ -124,6 +128,43 @@ void nft_meta_get_eval(const struct nft_expr *expr,
dest->data[0] = skb->secmark;
break;
#endif
+ case NFT_META_PKTTYPE:
+ if (skb->pkt_type != PACKET_LOOPBACK) {
+ dest->data[0] = skb->pkt_type;
+ break;
+ }
+
+ switch (pkt->ops->pf) {
+ case NFPROTO_IPV4:
+ if (ipv4_is_multicast(ip_hdr(skb)->daddr))
+ dest->data[0] = PACKET_MULTICAST;
+ else
+ dest->data[0] = PACKET_BROADCAST;
+ break;
+ case NFPROTO_IPV6:
+ if (ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF)
+ dest->data[0] = PACKET_MULTICAST;
+ else
+ dest->data[0] = PACKET_BROADCAST;
+ break;
+ default:
+ WARN_ON(1);
+ goto err;
+ }
+ break;
+ case NFT_META_CPU:
+ dest->data[0] = smp_processor_id();
+ break;
+ case NFT_META_IIFGROUP:
+ if (in == NULL)
+ goto err;
+ dest->data[0] = in->group;
+ break;
+ case NFT_META_OIFGROUP:
+ if (out == NULL)
+ goto err;
+ dest->data[0] = out->group;
+ break;
default:
WARN_ON(1);
goto err;
@@ -195,6 +236,10 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
#ifdef CONFIG_NETWORK_SECMARK
case NFT_META_SECMARK:
#endif
+ case NFT_META_PKTTYPE:
+ case NFT_META_CPU:
+ case NFT_META_IIFGROUP:
+ case NFT_META_OIFGROUP:
break;
default:
return -EOPNOTSUPP;
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 79ff58cd36dc..799550b476fb 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -33,6 +33,7 @@ struct nft_nat {
enum nft_registers sreg_proto_max:8;
enum nf_nat_manip_type type:8;
u8 family;
+ u16 flags;
};
static void nft_nat_eval(const struct nft_expr *expr,
@@ -71,6 +72,8 @@ static void nft_nat_eval(const struct nft_expr *expr,
range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
+ range.flags |= priv->flags;
+
data[NFT_REG_VERDICT].verdict =
nf_nat_setup_info(ct, &range, priv->type);
}
@@ -82,6 +85,7 @@ static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = {
[NFTA_NAT_REG_ADDR_MAX] = { .type = NLA_U32 },
[NFTA_NAT_REG_PROTO_MIN] = { .type = NLA_U32 },
[NFTA_NAT_REG_PROTO_MAX] = { .type = NLA_U32 },
+ [NFTA_NAT_FLAGS] = { .type = NLA_U32 },
};
static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
@@ -149,6 +153,12 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
} else
priv->sreg_proto_max = priv->sreg_proto_min;
+ if (tb[NFTA_NAT_FLAGS]) {
+ priv->flags = ntohl(nla_get_be32(tb[NFTA_NAT_FLAGS]));
+ if (priv->flags & ~NF_NAT_RANGE_MASK)
+ return -EINVAL;
+ }
+
return 0;
}
@@ -183,6 +193,12 @@ static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr)
htonl(priv->sreg_proto_max)))
goto nla_put_failure;
}
+
+ if (priv->flags != 0) {
+ if (nla_put_be32(skb, NFTA_NAT_FLAGS, htonl(priv->flags)))
+ goto nla_put_failure;
+ }
+
return 0;
nla_put_failure:
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index e1836ff88199..46214f245665 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -234,13 +234,11 @@ static void nft_rbtree_destroy(const struct nft_set *set)
struct nft_rbtree_elem *rbe;
struct rb_node *node;
- spin_lock_bh(&nft_rbtree_lock);
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
nft_rbtree_elem_destroy(set, rbe);
}
- spin_unlock_bh(&nft_rbtree_lock);
}
static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index f3448c296446..57d3e1af5630 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c
@@ -17,6 +17,8 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nft_reject.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = {
[NFTA_REJECT_TYPE] = { .type = NLA_U32 },
@@ -70,5 +72,38 @@ nla_put_failure:
}
EXPORT_SYMBOL_GPL(nft_reject_dump);
+static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX + 1] = {
+ [NFT_REJECT_ICMPX_NO_ROUTE] = ICMP_NET_UNREACH,
+ [NFT_REJECT_ICMPX_PORT_UNREACH] = ICMP_PORT_UNREACH,
+ [NFT_REJECT_ICMPX_HOST_UNREACH] = ICMP_HOST_UNREACH,
+ [NFT_REJECT_ICMPX_ADMIN_PROHIBITED] = ICMP_PKT_FILTERED,
+};
+
+int nft_reject_icmp_code(u8 code)
+{
+ BUG_ON(code > NFT_REJECT_ICMPX_MAX);
+
+ return icmp_code_v4[code];
+}
+
+EXPORT_SYMBOL_GPL(nft_reject_icmp_code);
+
+
+static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX + 1] = {
+ [NFT_REJECT_ICMPX_NO_ROUTE] = ICMPV6_NOROUTE,
+ [NFT_REJECT_ICMPX_PORT_UNREACH] = ICMPV6_PORT_UNREACH,
+ [NFT_REJECT_ICMPX_HOST_UNREACH] = ICMPV6_ADDR_UNREACH,
+ [NFT_REJECT_ICMPX_ADMIN_PROHIBITED] = ICMPV6_ADM_PROHIBITED,
+};
+
+int nft_reject_icmpv6_code(u8 code)
+{
+ BUG_ON(code > NFT_REJECT_ICMPX_MAX);
+
+ return icmp_code_v6[code];
+}
+
+EXPORT_SYMBOL_GPL(nft_reject_icmpv6_code);
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index b718a52a4654..7b5f9d58680a 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -14,17 +14,103 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nft_reject.h>
+#include <net/netfilter/ipv4/nf_reject.h>
+#include <net/netfilter/ipv6/nf_reject.h>
static void nft_reject_inet_eval(const struct nft_expr *expr,
struct nft_data data[NFT_REG_MAX + 1],
const struct nft_pktinfo *pkt)
{
+ struct nft_reject *priv = nft_expr_priv(expr);
+ struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
+
switch (pkt->ops->pf) {
case NFPROTO_IPV4:
- return nft_reject_ipv4_eval(expr, data, pkt);
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ nf_send_unreach(pkt->skb, priv->icmp_code);
+ break;
+ case NFT_REJECT_TCP_RST:
+ nf_send_reset(pkt->skb, pkt->ops->hooknum);
+ break;
+ case NFT_REJECT_ICMPX_UNREACH:
+ nf_send_unreach(pkt->skb,
+ nft_reject_icmp_code(priv->icmp_code));
+ break;
+ }
+ break;
case NFPROTO_IPV6:
- return nft_reject_ipv6_eval(expr, data, pkt);
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ nf_send_unreach6(net, pkt->skb, priv->icmp_code,
+ pkt->ops->hooknum);
+ break;
+ case NFT_REJECT_TCP_RST:
+ nf_send_reset6(net, pkt->skb, pkt->ops->hooknum);
+ break;
+ case NFT_REJECT_ICMPX_UNREACH:
+ nf_send_unreach6(net, pkt->skb,
+ nft_reject_icmpv6_code(priv->icmp_code),
+ pkt->ops->hooknum);
+ break;
+ }
+ break;
+ }
+ data[NFT_REG_VERDICT].verdict = NF_DROP;
+}
+
+static int nft_reject_inet_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_reject *priv = nft_expr_priv(expr);
+ int icmp_code;
+
+ if (tb[NFTA_REJECT_TYPE] == NULL)
+ return -EINVAL;
+
+ priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE]));
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ case NFT_REJECT_ICMPX_UNREACH:
+ if (tb[NFTA_REJECT_ICMP_CODE] == NULL)
+ return -EINVAL;
+
+ icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]);
+ if (priv->type == NFT_REJECT_ICMPX_UNREACH &&
+ icmp_code > NFT_REJECT_ICMPX_MAX)
+ return -EINVAL;
+
+ priv->icmp_code = icmp_code;
+ break;
+ case NFT_REJECT_TCP_RST:
+ break;
+ default:
+ return -EINVAL;
}
+ return 0;
+}
+
+static int nft_reject_inet_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
+{
+ const struct nft_reject *priv = nft_expr_priv(expr);
+
+ if (nla_put_be32(skb, NFTA_REJECT_TYPE, htonl(priv->type)))
+ goto nla_put_failure;
+
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ case NFT_REJECT_ICMPX_UNREACH:
+ if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code))
+ goto nla_put_failure;
+ break;
+ }
+
+ return 0;
+
+nla_put_failure:
+ return -1;
}
static struct nft_expr_type nft_reject_inet_type;
@@ -32,8 +118,8 @@ static const struct nft_expr_ops nft_reject_inet_ops = {
.type = &nft_reject_inet_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_reject)),
.eval = nft_reject_inet_eval,
- .init = nft_reject_init,
- .dump = nft_reject_dump,
+ .init = nft_reject_inet_init,
+ .dump = nft_reject_inet_dump,
};
static struct nft_expr_type nft_reject_inet_type __read_mostly = {
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 227aa11e8409..133eb4772f12 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -71,18 +71,14 @@ static const char *const xt_prefix[NFPROTO_NUMPROTO] = {
static const unsigned int xt_jumpstack_multiplier = 2;
/* Registration hooks for targets. */
-int
-xt_register_target(struct xt_target *target)
+int xt_register_target(struct xt_target *target)
{
u_int8_t af = target->family;
- int ret;
- ret = mutex_lock_interruptible(&xt[af].mutex);
- if (ret != 0)
- return ret;
+ mutex_lock(&xt[af].mutex);
list_add(&target->list, &xt[af].target);
mutex_unlock(&xt[af].mutex);
- return ret;
+ return 0;
}
EXPORT_SYMBOL(xt_register_target);
@@ -125,20 +121,14 @@ xt_unregister_targets(struct xt_target *target, unsigned int n)
}
EXPORT_SYMBOL(xt_unregister_targets);
-int
-xt_register_match(struct xt_match *match)
+int xt_register_match(struct xt_match *match)
{
u_int8_t af = match->family;
- int ret;
-
- ret = mutex_lock_interruptible(&xt[af].mutex);
- if (ret != 0)
- return ret;
+ mutex_lock(&xt[af].mutex);
list_add(&match->list, &xt[af].match);
mutex_unlock(&xt[af].mutex);
-
- return ret;
+ return 0;
}
EXPORT_SYMBOL(xt_register_match);
@@ -194,9 +184,7 @@ struct xt_match *xt_find_match(u8 af, const char *name, u8 revision)
struct xt_match *m;
int err = -ENOENT;
- if (mutex_lock_interruptible(&xt[af].mutex) != 0)
- return ERR_PTR(-EINTR);
-
+ mutex_lock(&xt[af].mutex);
list_for_each_entry(m, &xt[af].match, list) {
if (strcmp(m->name, name) == 0) {
if (m->revision == revision) {
@@ -239,9 +227,7 @@ struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
struct xt_target *t;
int err = -ENOENT;
- if (mutex_lock_interruptible(&xt[af].mutex) != 0)
- return ERR_PTR(-EINTR);
-
+ mutex_lock(&xt[af].mutex);
list_for_each_entry(t, &xt[af].target, list) {
if (strcmp(t->name, name) == 0) {
if (t->revision == revision) {
@@ -323,10 +309,7 @@ int xt_find_revision(u8 af, const char *name, u8 revision, int target,
{
int have_rev, best = -1;
- if (mutex_lock_interruptible(&xt[af].mutex) != 0) {
- *err = -EINTR;
- return 1;
- }
+ mutex_lock(&xt[af].mutex);
if (target == 1)
have_rev = target_revfn(af, name, revision, &best);
else
@@ -711,28 +694,15 @@ void xt_free_table_info(struct xt_table_info *info)
{
int cpu;
- for_each_possible_cpu(cpu) {
- if (info->size <= PAGE_SIZE)
- kfree(info->entries[cpu]);
- else
- vfree(info->entries[cpu]);
- }
+ for_each_possible_cpu(cpu)
+ kvfree(info->entries[cpu]);
if (info->jumpstack != NULL) {
- if (sizeof(void *) * info->stacksize > PAGE_SIZE) {
- for_each_possible_cpu(cpu)
- vfree(info->jumpstack[cpu]);
- } else {
- for_each_possible_cpu(cpu)
- kfree(info->jumpstack[cpu]);
- }
+ for_each_possible_cpu(cpu)
+ kvfree(info->jumpstack[cpu]);
+ kvfree(info->jumpstack);
}
- if (sizeof(void **) * nr_cpu_ids > PAGE_SIZE)
- vfree(info->jumpstack);
- else
- kfree(info->jumpstack);
-
free_percpu(info->stackptr);
kfree(info);
@@ -745,9 +715,7 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
{
struct xt_table *t;
- if (mutex_lock_interruptible(&xt[af].mutex) != 0)
- return ERR_PTR(-EINTR);
-
+ mutex_lock(&xt[af].mutex);
list_for_each_entry(t, &net->xt.tables[af], list)
if (strcmp(t->name, name) == 0 && try_module_get(t->me))
return t;
@@ -896,10 +864,7 @@ struct xt_table *xt_register_table(struct net *net,
goto out;
}
- ret = mutex_lock_interruptible(&xt[table->af].mutex);
- if (ret != 0)
- goto out_free;
-
+ mutex_lock(&xt[table->af].mutex);
/* Don't autoload: we'd eat our tail... */
list_for_each_entry(t, &net->xt.tables[table->af], list) {
if (strcmp(t->name, table->name) == 0) {
@@ -924,9 +889,8 @@ struct xt_table *xt_register_table(struct net *net,
mutex_unlock(&xt[table->af].mutex);
return table;
- unlock:
+unlock:
mutex_unlock(&xt[table->af].mutex);
-out_free:
kfree(table);
out:
return ERR_PTR(ret);
@@ -1137,22 +1101,11 @@ static const struct seq_operations xt_match_seq_ops = {
static int xt_match_open(struct inode *inode, struct file *file)
{
- struct seq_file *seq;
struct nf_mttg_trav *trav;
- int ret;
-
- trav = kmalloc(sizeof(*trav), GFP_KERNEL);
- if (trav == NULL)
+ trav = __seq_open_private(file, &xt_match_seq_ops, sizeof(*trav));
+ if (!trav)
return -ENOMEM;
- ret = seq_open(file, &xt_match_seq_ops);
- if (ret < 0) {
- kfree(trav);
- return ret;
- }
-
- seq = file->private_data;
- seq->private = trav;
trav->nfproto = (unsigned long)PDE_DATA(inode);
return 0;
}
@@ -1201,22 +1154,11 @@ static const struct seq_operations xt_target_seq_ops = {
static int xt_target_open(struct inode *inode, struct file *file)
{
- struct seq_file *seq;
struct nf_mttg_trav *trav;
- int ret;
-
- trav = kmalloc(sizeof(*trav), GFP_KERNEL);
- if (trav == NULL)
+ trav = __seq_open_private(file, &xt_target_seq_ops, sizeof(*trav));
+ if (!trav)
return -ENOMEM;
- ret = seq_open(file, &xt_target_seq_ops);
- if (ret < 0) {
- kfree(trav);
- return ret;
- }
-
- seq = file->private_data;
- seq->private = trav;
trav->nfproto = (unsigned long)PDE_DATA(inode);
return 0;
}
diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c
index 73b73f687c58..02afaf48a729 100644
--- a/net/netfilter/xt_HMARK.c
+++ b/net/netfilter/xt_HMARK.c
@@ -126,7 +126,7 @@ hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info)
hash = jhash_3words(src, dst, t->uports.v32, info->hashrnd);
hash = hash ^ (t->proto & info->proto_mask);
- return (((u64)hash * info->hmodulus) >> 32) + info->hoffset;
+ return reciprocal_scale(hash, info->hmodulus) + info->hoffset;
}
static void
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index 993de2ba89d3..3ba31c194cce 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -50,11 +50,14 @@ struct xt_led_info_internal {
struct timer_list timer;
};
+#define XT_LED_BLINK_DELAY 50 /* ms */
+
static unsigned int
led_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_led_info *ledinfo = par->targinfo;
struct xt_led_info_internal *ledinternal = ledinfo->internal_data;
+ unsigned long led_delay = XT_LED_BLINK_DELAY;
/*
* If "always blink" is enabled, and there's still some time until the
@@ -62,9 +65,10 @@ led_tg(struct sk_buff *skb, const struct xt_action_param *par)
*/
if ((ledinfo->delay > 0) && ledinfo->always_blink &&
timer_pending(&ledinternal->timer))
- led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF);
-
- led_trigger_event(&ledinternal->netfilter_led_trigger, LED_FULL);
+ led_trigger_blink_oneshot(&ledinternal->netfilter_led_trigger,
+ &led_delay, &led_delay, 1);
+ else
+ led_trigger_event(&ledinternal->netfilter_led_trigger, LED_FULL);
/* If there's a positive delay, start/update the timer */
if (ledinfo->delay > 0) {
@@ -133,9 +137,7 @@ static int led_tg_check(const struct xt_tgchk_param *par)
err = led_trigger_register(&ledinternal->netfilter_led_trigger);
if (err) {
- pr_warning("led_trigger_register() failed\n");
- if (err == -EEXIST)
- pr_warning("Trigger name is already in use.\n");
+ pr_err("Trigger name is already in use.\n");
goto exit_alloc;
}
diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c
index 5ab24843370a..c13b79440ede 100644
--- a/net/netfilter/xt_LOG.c
+++ b/net/netfilter/xt_LOG.c
@@ -27,806 +27,6 @@
#include <linux/netfilter/xt_LOG.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/netfilter/nf_log.h>
-#include <net/netfilter/xt_log.h>
-
-static struct nf_loginfo default_loginfo = {
- .type = NF_LOG_TYPE_LOG,
- .u = {
- .log = {
- .level = 5,
- .logflags = NF_LOG_MASK,
- },
- },
-};
-
-static int dump_udp_header(struct sbuff *m, const struct sk_buff *skb,
- u8 proto, int fragment, unsigned int offset)
-{
- struct udphdr _udph;
- const struct udphdr *uh;
-
- if (proto == IPPROTO_UDP)
- /* Max length: 10 "PROTO=UDP " */
- sb_add(m, "PROTO=UDP ");
- else /* Max length: 14 "PROTO=UDPLITE " */
- sb_add(m, "PROTO=UDPLITE ");
-
- if (fragment)
- goto out;
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph);
- if (uh == NULL) {
- sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset);
-
- return 1;
- }
-
- /* Max length: 20 "SPT=65535 DPT=65535 " */
- sb_add(m, "SPT=%u DPT=%u LEN=%u ", ntohs(uh->source), ntohs(uh->dest),
- ntohs(uh->len));
-
-out:
- return 0;
-}
-
-static int dump_tcp_header(struct sbuff *m, const struct sk_buff *skb,
- u8 proto, int fragment, unsigned int offset,
- unsigned int logflags)
-{
- struct tcphdr _tcph;
- const struct tcphdr *th;
-
- /* Max length: 10 "PROTO=TCP " */
- sb_add(m, "PROTO=TCP ");
-
- if (fragment)
- return 0;
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
- if (th == NULL) {
- sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset);
- return 1;
- }
-
- /* Max length: 20 "SPT=65535 DPT=65535 " */
- sb_add(m, "SPT=%u DPT=%u ", ntohs(th->source), ntohs(th->dest));
- /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
- if (logflags & XT_LOG_TCPSEQ)
- sb_add(m, "SEQ=%u ACK=%u ", ntohl(th->seq), ntohl(th->ack_seq));
-
- /* Max length: 13 "WINDOW=65535 " */
- sb_add(m, "WINDOW=%u ", ntohs(th->window));
- /* Max length: 9 "RES=0x3C " */
- sb_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) &
- TCP_RESERVED_BITS) >> 22));
- /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
- if (th->cwr)
- sb_add(m, "CWR ");
- if (th->ece)
- sb_add(m, "ECE ");
- if (th->urg)
- sb_add(m, "URG ");
- if (th->ack)
- sb_add(m, "ACK ");
- if (th->psh)
- sb_add(m, "PSH ");
- if (th->rst)
- sb_add(m, "RST ");
- if (th->syn)
- sb_add(m, "SYN ");
- if (th->fin)
- sb_add(m, "FIN ");
- /* Max length: 11 "URGP=65535 " */
- sb_add(m, "URGP=%u ", ntohs(th->urg_ptr));
-
- if ((logflags & XT_LOG_TCPOPT) && th->doff*4 > sizeof(struct tcphdr)) {
- u_int8_t _opt[60 - sizeof(struct tcphdr)];
- const u_int8_t *op;
- unsigned int i;
- unsigned int optsize = th->doff*4 - sizeof(struct tcphdr);
-
- op = skb_header_pointer(skb, offset + sizeof(struct tcphdr),
- optsize, _opt);
- if (op == NULL) {
- sb_add(m, "OPT (TRUNCATED)");
- return 1;
- }
-
- /* Max length: 127 "OPT (" 15*4*2chars ") " */
- sb_add(m, "OPT (");
- for (i = 0; i < optsize; i++)
- sb_add(m, "%02X", op[i]);
-
- sb_add(m, ") ");
- }
-
- return 0;
-}
-
-static void dump_sk_uid_gid(struct sbuff *m, struct sock *sk)
-{
- if (!sk || sk->sk_state == TCP_TIME_WAIT)
- return;
-
- read_lock_bh(&sk->sk_callback_lock);
- if (sk->sk_socket && sk->sk_socket->file) {
- const struct cred *cred = sk->sk_socket->file->f_cred;
- sb_add(m, "UID=%u GID=%u ",
- from_kuid_munged(&init_user_ns, cred->fsuid),
- from_kgid_munged(&init_user_ns, cred->fsgid));
- }
- read_unlock_bh(&sk->sk_callback_lock);
-}
-
-/* One level of recursion won't kill us */
-static void dump_ipv4_packet(struct sbuff *m,
- const struct nf_loginfo *info,
- const struct sk_buff *skb,
- unsigned int iphoff)
-{
- struct iphdr _iph;
- const struct iphdr *ih;
- unsigned int logflags;
-
- if (info->type == NF_LOG_TYPE_LOG)
- logflags = info->u.log.logflags;
- else
- logflags = NF_LOG_MASK;
-
- ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
- if (ih == NULL) {
- sb_add(m, "TRUNCATED");
- return;
- }
-
- /* Important fields:
- * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */
- /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */
- sb_add(m, "SRC=%pI4 DST=%pI4 ",
- &ih->saddr, &ih->daddr);
-
- /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
- sb_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
- ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK,
- ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
-
- /* Max length: 6 "CE DF MF " */
- if (ntohs(ih->frag_off) & IP_CE)
- sb_add(m, "CE ");
- if (ntohs(ih->frag_off) & IP_DF)
- sb_add(m, "DF ");
- if (ntohs(ih->frag_off) & IP_MF)
- sb_add(m, "MF ");
-
- /* Max length: 11 "FRAG:65535 " */
- if (ntohs(ih->frag_off) & IP_OFFSET)
- sb_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
-
- if ((logflags & XT_LOG_IPOPT) &&
- ih->ihl * 4 > sizeof(struct iphdr)) {
- const unsigned char *op;
- unsigned char _opt[4 * 15 - sizeof(struct iphdr)];
- unsigned int i, optsize;
-
- optsize = ih->ihl * 4 - sizeof(struct iphdr);
- op = skb_header_pointer(skb, iphoff+sizeof(_iph),
- optsize, _opt);
- if (op == NULL) {
- sb_add(m, "TRUNCATED");
- return;
- }
-
- /* Max length: 127 "OPT (" 15*4*2chars ") " */
- sb_add(m, "OPT (");
- for (i = 0; i < optsize; i++)
- sb_add(m, "%02X", op[i]);
- sb_add(m, ") ");
- }
-
- switch (ih->protocol) {
- case IPPROTO_TCP:
- if (dump_tcp_header(m, skb, ih->protocol,
- ntohs(ih->frag_off) & IP_OFFSET,
- iphoff+ih->ihl*4, logflags))
- return;
- break;
- case IPPROTO_UDP:
- case IPPROTO_UDPLITE:
- if (dump_udp_header(m, skb, ih->protocol,
- ntohs(ih->frag_off) & IP_OFFSET,
- iphoff+ih->ihl*4))
- return;
- break;
- case IPPROTO_ICMP: {
- struct icmphdr _icmph;
- const struct icmphdr *ich;
- static const size_t required_len[NR_ICMP_TYPES+1]
- = { [ICMP_ECHOREPLY] = 4,
- [ICMP_DEST_UNREACH]
- = 8 + sizeof(struct iphdr),
- [ICMP_SOURCE_QUENCH]
- = 8 + sizeof(struct iphdr),
- [ICMP_REDIRECT]
- = 8 + sizeof(struct iphdr),
- [ICMP_ECHO] = 4,
- [ICMP_TIME_EXCEEDED]
- = 8 + sizeof(struct iphdr),
- [ICMP_PARAMETERPROB]
- = 8 + sizeof(struct iphdr),
- [ICMP_TIMESTAMP] = 20,
- [ICMP_TIMESTAMPREPLY] = 20,
- [ICMP_ADDRESS] = 12,
- [ICMP_ADDRESSREPLY] = 12 };
-
- /* Max length: 11 "PROTO=ICMP " */
- sb_add(m, "PROTO=ICMP ");
-
- if (ntohs(ih->frag_off) & IP_OFFSET)
- break;
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- ich = skb_header_pointer(skb, iphoff + ih->ihl * 4,
- sizeof(_icmph), &_icmph);
- if (ich == NULL) {
- sb_add(m, "INCOMPLETE [%u bytes] ",
- skb->len - iphoff - ih->ihl*4);
- break;
- }
-
- /* Max length: 18 "TYPE=255 CODE=255 " */
- sb_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code);
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- if (ich->type <= NR_ICMP_TYPES &&
- required_len[ich->type] &&
- skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) {
- sb_add(m, "INCOMPLETE [%u bytes] ",
- skb->len - iphoff - ih->ihl*4);
- break;
- }
-
- switch (ich->type) {
- case ICMP_ECHOREPLY:
- case ICMP_ECHO:
- /* Max length: 19 "ID=65535 SEQ=65535 " */
- sb_add(m, "ID=%u SEQ=%u ",
- ntohs(ich->un.echo.id),
- ntohs(ich->un.echo.sequence));
- break;
-
- case ICMP_PARAMETERPROB:
- /* Max length: 14 "PARAMETER=255 " */
- sb_add(m, "PARAMETER=%u ",
- ntohl(ich->un.gateway) >> 24);
- break;
- case ICMP_REDIRECT:
- /* Max length: 24 "GATEWAY=255.255.255.255 " */
- sb_add(m, "GATEWAY=%pI4 ", &ich->un.gateway);
- /* Fall through */
- case ICMP_DEST_UNREACH:
- case ICMP_SOURCE_QUENCH:
- case ICMP_TIME_EXCEEDED:
- /* Max length: 3+maxlen */
- if (!iphoff) { /* Only recurse once. */
- sb_add(m, "[");
- dump_ipv4_packet(m, info, skb,
- iphoff + ih->ihl*4+sizeof(_icmph));
- sb_add(m, "] ");
- }
-
- /* Max length: 10 "MTU=65535 " */
- if (ich->type == ICMP_DEST_UNREACH &&
- ich->code == ICMP_FRAG_NEEDED)
- sb_add(m, "MTU=%u ", ntohs(ich->un.frag.mtu));
- }
- break;
- }
- /* Max Length */
- case IPPROTO_AH: {
- struct ip_auth_hdr _ahdr;
- const struct ip_auth_hdr *ah;
-
- if (ntohs(ih->frag_off) & IP_OFFSET)
- break;
-
- /* Max length: 9 "PROTO=AH " */
- sb_add(m, "PROTO=AH ");
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- ah = skb_header_pointer(skb, iphoff+ih->ihl*4,
- sizeof(_ahdr), &_ahdr);
- if (ah == NULL) {
- sb_add(m, "INCOMPLETE [%u bytes] ",
- skb->len - iphoff - ih->ihl*4);
- break;
- }
-
- /* Length: 15 "SPI=0xF1234567 " */
- sb_add(m, "SPI=0x%x ", ntohl(ah->spi));
- break;
- }
- case IPPROTO_ESP: {
- struct ip_esp_hdr _esph;
- const struct ip_esp_hdr *eh;
-
- /* Max length: 10 "PROTO=ESP " */
- sb_add(m, "PROTO=ESP ");
-
- if (ntohs(ih->frag_off) & IP_OFFSET)
- break;
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- eh = skb_header_pointer(skb, iphoff+ih->ihl*4,
- sizeof(_esph), &_esph);
- if (eh == NULL) {
- sb_add(m, "INCOMPLETE [%u bytes] ",
- skb->len - iphoff - ih->ihl*4);
- break;
- }
-
- /* Length: 15 "SPI=0xF1234567 " */
- sb_add(m, "SPI=0x%x ", ntohl(eh->spi));
- break;
- }
- /* Max length: 10 "PROTO 255 " */
- default:
- sb_add(m, "PROTO=%u ", ih->protocol);
- }
-
- /* Max length: 15 "UID=4294967295 " */
- if ((logflags & XT_LOG_UID) && !iphoff)
- dump_sk_uid_gid(m, skb->sk);
-
- /* Max length: 16 "MARK=0xFFFFFFFF " */
- if (!iphoff && skb->mark)
- sb_add(m, "MARK=0x%x ", skb->mark);
-
- /* Proto Max log string length */
- /* IP: 40+46+6+11+127 = 230 */
- /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */
- /* UDP: 10+max(25,20) = 35 */
- /* UDPLITE: 14+max(25,20) = 39 */
- /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */
- /* ESP: 10+max(25)+15 = 50 */
- /* AH: 9+max(25)+15 = 49 */
- /* unknown: 10 */
-
- /* (ICMP allows recursion one level deep) */
- /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */
- /* maxlen = 230+ 91 + 230 + 252 = 803 */
-}
-
-static void dump_ipv4_mac_header(struct sbuff *m,
- const struct nf_loginfo *info,
- const struct sk_buff *skb)
-{
- struct net_device *dev = skb->dev;
- unsigned int logflags = 0;
-
- if (info->type == NF_LOG_TYPE_LOG)
- logflags = info->u.log.logflags;
-
- if (!(logflags & XT_LOG_MACDECODE))
- goto fallback;
-
- switch (dev->type) {
- case ARPHRD_ETHER:
- sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
- eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
- ntohs(eth_hdr(skb)->h_proto));
- return;
- default:
- break;
- }
-
-fallback:
- sb_add(m, "MAC=");
- if (dev->hard_header_len &&
- skb->mac_header != skb->network_header) {
- const unsigned char *p = skb_mac_header(skb);
- unsigned int i;
-
- sb_add(m, "%02x", *p++);
- for (i = 1; i < dev->hard_header_len; i++, p++)
- sb_add(m, ":%02x", *p);
- }
- sb_add(m, " ");
-}
-
-static void
-log_packet_common(struct sbuff *m,
- u_int8_t pf,
- unsigned int hooknum,
- const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const struct nf_loginfo *loginfo,
- const char *prefix)
-{
- sb_add(m, KERN_SOH "%c%sIN=%s OUT=%s ",
- '0' + loginfo->u.log.level, prefix,
- in ? in->name : "",
- out ? out->name : "");
-#ifdef CONFIG_BRIDGE_NETFILTER
- if (skb->nf_bridge) {
- const struct net_device *physindev;
- const struct net_device *physoutdev;
-
- physindev = skb->nf_bridge->physindev;
- if (physindev && in != physindev)
- sb_add(m, "PHYSIN=%s ", physindev->name);
- physoutdev = skb->nf_bridge->physoutdev;
- if (physoutdev && out != physoutdev)
- sb_add(m, "PHYSOUT=%s ", physoutdev->name);
- }
-#endif
-}
-
-
-static void
-ipt_log_packet(struct net *net,
- u_int8_t pf,
- unsigned int hooknum,
- const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const struct nf_loginfo *loginfo,
- const char *prefix)
-{
- struct sbuff *m;
-
- /* FIXME: Disabled from containers until syslog ns is supported */
- if (!net_eq(net, &init_net))
- return;
-
- m = sb_open();
-
- if (!loginfo)
- loginfo = &default_loginfo;
-
- log_packet_common(m, pf, hooknum, skb, in, out, loginfo, prefix);
-
- if (in != NULL)
- dump_ipv4_mac_header(m, loginfo, skb);
-
- dump_ipv4_packet(m, loginfo, skb, 0);
-
- sb_close(m);
-}
-
-#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
-/* One level of recursion won't kill us */
-static void dump_ipv6_packet(struct sbuff *m,
- const struct nf_loginfo *info,
- const struct sk_buff *skb, unsigned int ip6hoff,
- int recurse)
-{
- u_int8_t currenthdr;
- int fragment;
- struct ipv6hdr _ip6h;
- const struct ipv6hdr *ih;
- unsigned int ptr;
- unsigned int hdrlen = 0;
- unsigned int logflags;
-
- if (info->type == NF_LOG_TYPE_LOG)
- logflags = info->u.log.logflags;
- else
- logflags = NF_LOG_MASK;
-
- ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h);
- if (ih == NULL) {
- sb_add(m, "TRUNCATED");
- return;
- }
-
- /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */
- sb_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr);
-
- /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */
- sb_add(m, "LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ",
- ntohs(ih->payload_len) + sizeof(struct ipv6hdr),
- (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20,
- ih->hop_limit,
- (ntohl(*(__be32 *)ih) & 0x000fffff));
-
- fragment = 0;
- ptr = ip6hoff + sizeof(struct ipv6hdr);
- currenthdr = ih->nexthdr;
- while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) {
- struct ipv6_opt_hdr _hdr;
- const struct ipv6_opt_hdr *hp;
-
- hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
- if (hp == NULL) {
- sb_add(m, "TRUNCATED");
- return;
- }
-
- /* Max length: 48 "OPT (...) " */
- if (logflags & XT_LOG_IPOPT)
- sb_add(m, "OPT ( ");
-
- switch (currenthdr) {
- case IPPROTO_FRAGMENT: {
- struct frag_hdr _fhdr;
- const struct frag_hdr *fh;
-
- sb_add(m, "FRAG:");
- fh = skb_header_pointer(skb, ptr, sizeof(_fhdr),
- &_fhdr);
- if (fh == NULL) {
- sb_add(m, "TRUNCATED ");
- return;
- }
-
- /* Max length: 6 "65535 " */
- sb_add(m, "%u ", ntohs(fh->frag_off) & 0xFFF8);
-
- /* Max length: 11 "INCOMPLETE " */
- if (fh->frag_off & htons(0x0001))
- sb_add(m, "INCOMPLETE ");
-
- sb_add(m, "ID:%08x ", ntohl(fh->identification));
-
- if (ntohs(fh->frag_off) & 0xFFF8)
- fragment = 1;
-
- hdrlen = 8;
-
- break;
- }
- case IPPROTO_DSTOPTS:
- case IPPROTO_ROUTING:
- case IPPROTO_HOPOPTS:
- if (fragment) {
- if (logflags & XT_LOG_IPOPT)
- sb_add(m, ")");
- return;
- }
- hdrlen = ipv6_optlen(hp);
- break;
- /* Max Length */
- case IPPROTO_AH:
- if (logflags & XT_LOG_IPOPT) {
- struct ip_auth_hdr _ahdr;
- const struct ip_auth_hdr *ah;
-
- /* Max length: 3 "AH " */
- sb_add(m, "AH ");
-
- if (fragment) {
- sb_add(m, ")");
- return;
- }
-
- ah = skb_header_pointer(skb, ptr, sizeof(_ahdr),
- &_ahdr);
- if (ah == NULL) {
- /*
- * Max length: 26 "INCOMPLETE [65535
- * bytes] )"
- */
- sb_add(m, "INCOMPLETE [%u bytes] )",
- skb->len - ptr);
- return;
- }
-
- /* Length: 15 "SPI=0xF1234567 */
- sb_add(m, "SPI=0x%x ", ntohl(ah->spi));
-
- }
-
- hdrlen = (hp->hdrlen+2)<<2;
- break;
- case IPPROTO_ESP:
- if (logflags & XT_LOG_IPOPT) {
- struct ip_esp_hdr _esph;
- const struct ip_esp_hdr *eh;
-
- /* Max length: 4 "ESP " */
- sb_add(m, "ESP ");
-
- if (fragment) {
- sb_add(m, ")");
- return;
- }
-
- /*
- * Max length: 26 "INCOMPLETE [65535 bytes] )"
- */
- eh = skb_header_pointer(skb, ptr, sizeof(_esph),
- &_esph);
- if (eh == NULL) {
- sb_add(m, "INCOMPLETE [%u bytes] )",
- skb->len - ptr);
- return;
- }
-
- /* Length: 16 "SPI=0xF1234567 )" */
- sb_add(m, "SPI=0x%x )", ntohl(eh->spi));
-
- }
- return;
- default:
- /* Max length: 20 "Unknown Ext Hdr 255" */
- sb_add(m, "Unknown Ext Hdr %u", currenthdr);
- return;
- }
- if (logflags & XT_LOG_IPOPT)
- sb_add(m, ") ");
-
- currenthdr = hp->nexthdr;
- ptr += hdrlen;
- }
-
- switch (currenthdr) {
- case IPPROTO_TCP:
- if (dump_tcp_header(m, skb, currenthdr, fragment, ptr,
- logflags))
- return;
- break;
- case IPPROTO_UDP:
- case IPPROTO_UDPLITE:
- if (dump_udp_header(m, skb, currenthdr, fragment, ptr))
- return;
- break;
- case IPPROTO_ICMPV6: {
- struct icmp6hdr _icmp6h;
- const struct icmp6hdr *ic;
-
- /* Max length: 13 "PROTO=ICMPv6 " */
- sb_add(m, "PROTO=ICMPv6 ");
-
- if (fragment)
- break;
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h);
- if (ic == NULL) {
- sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr);
- return;
- }
-
- /* Max length: 18 "TYPE=255 CODE=255 " */
- sb_add(m, "TYPE=%u CODE=%u ", ic->icmp6_type, ic->icmp6_code);
-
- switch (ic->icmp6_type) {
- case ICMPV6_ECHO_REQUEST:
- case ICMPV6_ECHO_REPLY:
- /* Max length: 19 "ID=65535 SEQ=65535 " */
- sb_add(m, "ID=%u SEQ=%u ",
- ntohs(ic->icmp6_identifier),
- ntohs(ic->icmp6_sequence));
- break;
- case ICMPV6_MGM_QUERY:
- case ICMPV6_MGM_REPORT:
- case ICMPV6_MGM_REDUCTION:
- break;
-
- case ICMPV6_PARAMPROB:
- /* Max length: 17 "POINTER=ffffffff " */
- sb_add(m, "POINTER=%08x ", ntohl(ic->icmp6_pointer));
- /* Fall through */
- case ICMPV6_DEST_UNREACH:
- case ICMPV6_PKT_TOOBIG:
- case ICMPV6_TIME_EXCEED:
- /* Max length: 3+maxlen */
- if (recurse) {
- sb_add(m, "[");
- dump_ipv6_packet(m, info, skb,
- ptr + sizeof(_icmp6h), 0);
- sb_add(m, "] ");
- }
-
- /* Max length: 10 "MTU=65535 " */
- if (ic->icmp6_type == ICMPV6_PKT_TOOBIG)
- sb_add(m, "MTU=%u ", ntohl(ic->icmp6_mtu));
- }
- break;
- }
- /* Max length: 10 "PROTO=255 " */
- default:
- sb_add(m, "PROTO=%u ", currenthdr);
- }
-
- /* Max length: 15 "UID=4294967295 " */
- if ((logflags & XT_LOG_UID) && recurse)
- dump_sk_uid_gid(m, skb->sk);
-
- /* Max length: 16 "MARK=0xFFFFFFFF " */
- if (recurse && skb->mark)
- sb_add(m, "MARK=0x%x ", skb->mark);
-}
-
-static void dump_ipv6_mac_header(struct sbuff *m,
- const struct nf_loginfo *info,
- const struct sk_buff *skb)
-{
- struct net_device *dev = skb->dev;
- unsigned int logflags = 0;
-
- if (info->type == NF_LOG_TYPE_LOG)
- logflags = info->u.log.logflags;
-
- if (!(logflags & XT_LOG_MACDECODE))
- goto fallback;
-
- switch (dev->type) {
- case ARPHRD_ETHER:
- sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
- eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
- ntohs(eth_hdr(skb)->h_proto));
- return;
- default:
- break;
- }
-
-fallback:
- sb_add(m, "MAC=");
- if (dev->hard_header_len &&
- skb->mac_header != skb->network_header) {
- const unsigned char *p = skb_mac_header(skb);
- unsigned int len = dev->hard_header_len;
- unsigned int i;
-
- if (dev->type == ARPHRD_SIT) {
- p -= ETH_HLEN;
-
- if (p < skb->head)
- p = NULL;
- }
-
- if (p != NULL) {
- sb_add(m, "%02x", *p++);
- for (i = 1; i < len; i++)
- sb_add(m, ":%02x", *p++);
- }
- sb_add(m, " ");
-
- if (dev->type == ARPHRD_SIT) {
- const struct iphdr *iph =
- (struct iphdr *)skb_mac_header(skb);
- sb_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr,
- &iph->daddr);
- }
- } else
- sb_add(m, " ");
-}
-
-static void
-ip6t_log_packet(struct net *net,
- u_int8_t pf,
- unsigned int hooknum,
- const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const struct nf_loginfo *loginfo,
- const char *prefix)
-{
- struct sbuff *m;
-
- /* FIXME: Disabled from containers until syslog ns is supported */
- if (!net_eq(net, &init_net))
- return;
-
- m = sb_open();
-
- if (!loginfo)
- loginfo = &default_loginfo;
-
- log_packet_common(m, pf, hooknum, skb, in, out, loginfo, prefix);
-
- if (in != NULL)
- dump_ipv6_mac_header(m, loginfo, skb);
-
- dump_ipv6_packet(m, loginfo, skb, skb_network_offset(skb), 1);
-
- sb_close(m);
-}
-#endif
static unsigned int
log_tg(struct sk_buff *skb, const struct xt_action_param *par)
@@ -839,17 +39,8 @@ log_tg(struct sk_buff *skb, const struct xt_action_param *par)
li.u.log.level = loginfo->level;
li.u.log.logflags = loginfo->logflags;
- if (par->family == NFPROTO_IPV4)
- ipt_log_packet(net, NFPROTO_IPV4, par->hooknum, skb, par->in,
- par->out, &li, loginfo->prefix);
-#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
- else if (par->family == NFPROTO_IPV6)
- ip6t_log_packet(net, NFPROTO_IPV6, par->hooknum, skb, par->in,
- par->out, &li, loginfo->prefix);
-#endif
- else
- WARN_ON_ONCE(1);
-
+ nf_log_packet(net, par->family, par->hooknum, skb, par->in, par->out,
+ &li, "%s", loginfo->prefix);
return XT_CONTINUE;
}
@@ -870,7 +61,12 @@ static int log_tg_check(const struct xt_tgchk_param *par)
return -EINVAL;
}
- return 0;
+ return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
+}
+
+static void log_tg_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_logger_put(par->family, NF_LOG_TYPE_LOG);
}
static struct xt_target log_tg_regs[] __read_mostly = {
@@ -880,6 +76,7 @@ static struct xt_target log_tg_regs[] __read_mostly = {
.target = log_tg,
.targetsize = sizeof(struct xt_log_info),
.checkentry = log_tg_check,
+ .destroy = log_tg_destroy,
.me = THIS_MODULE,
},
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
@@ -889,78 +86,19 @@ static struct xt_target log_tg_regs[] __read_mostly = {
.target = log_tg,
.targetsize = sizeof(struct xt_log_info),
.checkentry = log_tg_check,
+ .destroy = log_tg_destroy,
.me = THIS_MODULE,
},
#endif
};
-static struct nf_logger ipt_log_logger __read_mostly = {
- .name = "ipt_LOG",
- .logfn = &ipt_log_packet,
- .me = THIS_MODULE,
-};
-
-#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
-static struct nf_logger ip6t_log_logger __read_mostly = {
- .name = "ip6t_LOG",
- .logfn = &ip6t_log_packet,
- .me = THIS_MODULE,
-};
-#endif
-
-static int __net_init log_net_init(struct net *net)
-{
- nf_log_set(net, NFPROTO_IPV4, &ipt_log_logger);
-#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
- nf_log_set(net, NFPROTO_IPV6, &ip6t_log_logger);
-#endif
- return 0;
-}
-
-static void __net_exit log_net_exit(struct net *net)
-{
- nf_log_unset(net, &ipt_log_logger);
-#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
- nf_log_unset(net, &ip6t_log_logger);
-#endif
-}
-
-static struct pernet_operations log_net_ops = {
- .init = log_net_init,
- .exit = log_net_exit,
-};
-
static int __init log_tg_init(void)
{
- int ret;
-
- ret = register_pernet_subsys(&log_net_ops);
- if (ret < 0)
- goto err_pernet;
-
- ret = xt_register_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs));
- if (ret < 0)
- goto err_target;
-
- nf_log_register(NFPROTO_IPV4, &ipt_log_logger);
-#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
- nf_log_register(NFPROTO_IPV6, &ip6t_log_logger);
-#endif
- return 0;
-
-err_target:
- unregister_pernet_subsys(&log_net_ops);
-err_pernet:
- return ret;
+ return xt_register_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs));
}
static void __exit log_tg_exit(void)
{
- unregister_pernet_subsys(&log_net_ops);
- nf_log_unregister(&ipt_log_logger);
-#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
- nf_log_unregister(&ip6t_log_logger);
-#endif
xt_unregister_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs));
}
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 370adf622cef..604df6fae6fc 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -136,7 +136,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
cfg.est.interval = info->interval;
cfg.est.ewma_log = info->ewma_log;
- ret = gen_new_estimator(&est->bstats, &est->rstats,
+ ret = gen_new_estimator(&est->bstats, NULL, &est->rstats,
&est->lock, &cfg.opt);
if (ret < 0)
goto err2;
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index bbffdbdaf603..dffee9d47ec4 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -28,7 +28,7 @@ static int bpf_mt_check(const struct xt_mtchk_param *par)
program.len = info->bpf_program_num_elem;
program.filter = info->bpf_program;
- if (sk_unattached_filter_create(&info->filter, &program)) {
+ if (bpf_prog_create(&info->filter, &program)) {
pr_info("bpf: check failed: parse error\n");
return -EINVAL;
}
@@ -40,13 +40,13 @@ static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_bpf_info *info = par->matchinfo;
- return SK_RUN_FILTER(info->filter, skb);
+ return BPF_PROG_RUN(info->filter, skb);
}
static void bpf_mt_destroy(const struct xt_mtdtor_param *par)
{
const struct xt_bpf_info *info = par->matchinfo;
- sk_unattached_filter_destroy(info->filter);
+ bpf_prog_destroy(info->filter);
}
static struct xt_match bpf_mt_reg __read_mostly = {
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
index f4e833005320..7198d660b4de 100644
--- a/net/netfilter/xt_cgroup.c
+++ b/net/netfilter/xt_cgroup.c
@@ -31,7 +31,7 @@ static int cgroup_mt_check(const struct xt_mtchk_param *par)
if (info->invert & ~1)
return -EINVAL;
- return info->id ? 0 : -EINVAL;
+ return 0;
}
static bool
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index f4af1bfafb1c..96fa26b20b67 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -55,7 +55,8 @@ xt_cluster_hash(const struct nf_conn *ct,
WARN_ON(1);
break;
}
- return (((u64)hash * info->total_nodes) >> 32);
+
+ return reciprocal_scale(hash, info->total_nodes);
}
static inline bool
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 1e634615ab9d..d4bec261e74e 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -120,7 +120,7 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par)
* accounting is enabled, so complain in the hope that someone notices.
*/
if (!nf_ct_acct_enabled(par->net)) {
- pr_warning("Forcing CT accounting to be enabled\n");
+ pr_warn("Forcing CT accounting to be enabled\n");
nf_ct_set_acct(par->net, true);
}
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index a3910fc2122b..05fbc2a0be46 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -104,7 +104,7 @@ struct xt_hashlimit_htable {
spinlock_t lock; /* lock for list_head */
u_int32_t rnd; /* random seed for hash */
unsigned int count; /* number entries in table */
- struct timer_list timer; /* timer for gc */
+ struct delayed_work gc_work;
/* seq_file stuff */
struct proc_dir_entry *pde;
@@ -135,7 +135,7 @@ hash_dst(const struct xt_hashlimit_htable *ht, const struct dsthash_dst *dst)
* give results between [0 and cfg.size-1] and same hash distribution,
* but using a multiply, less expensive than a divide
*/
- return ((u64)hash * ht->cfg.size) >> 32;
+ return reciprocal_scale(hash, ht->cfg.size);
}
static struct dsthash_ent *
@@ -213,7 +213,7 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
call_rcu_bh(&ent->rcu, dsthash_free_rcu);
ht->count--;
}
-static void htable_gc(unsigned long htlong);
+static void htable_gc(struct work_struct *work);
static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
u_int8_t family)
@@ -273,9 +273,9 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
}
hinfo->net = net;
- setup_timer(&hinfo->timer, htable_gc, (unsigned long)hinfo);
- hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
- add_timer(&hinfo->timer);
+ INIT_DEFERRABLE_WORK(&hinfo->gc_work, htable_gc);
+ queue_delayed_work(system_power_efficient_wq, &hinfo->gc_work,
+ msecs_to_jiffies(hinfo->cfg.gc_interval));
hlist_add_head(&hinfo->node, &hashlimit_net->htables);
@@ -300,29 +300,30 @@ static void htable_selective_cleanup(struct xt_hashlimit_htable *ht,
{
unsigned int i;
- /* lock hash table and iterate over it */
- spin_lock_bh(&ht->lock);
for (i = 0; i < ht->cfg.size; i++) {
struct dsthash_ent *dh;
struct hlist_node *n;
+
+ spin_lock_bh(&ht->lock);
hlist_for_each_entry_safe(dh, n, &ht->hash[i], node) {
if ((*select)(ht, dh))
dsthash_free(ht, dh);
}
+ spin_unlock_bh(&ht->lock);
+ cond_resched();
}
- spin_unlock_bh(&ht->lock);
}
-/* hash table garbage collector, run by timer */
-static void htable_gc(unsigned long htlong)
+static void htable_gc(struct work_struct *work)
{
- struct xt_hashlimit_htable *ht = (struct xt_hashlimit_htable *)htlong;
+ struct xt_hashlimit_htable *ht;
+
+ ht = container_of(work, struct xt_hashlimit_htable, gc_work.work);
htable_selective_cleanup(ht, select_gc);
- /* re-add the timer accordingly */
- ht->timer.expires = jiffies + msecs_to_jiffies(ht->cfg.gc_interval);
- add_timer(&ht->timer);
+ queue_delayed_work(system_power_efficient_wq,
+ &ht->gc_work, msecs_to_jiffies(ht->cfg.gc_interval));
}
static void htable_remove_proc_entry(struct xt_hashlimit_htable *hinfo)
@@ -341,7 +342,7 @@ static void htable_remove_proc_entry(struct xt_hashlimit_htable *hinfo)
static void htable_destroy(struct xt_hashlimit_htable *hinfo)
{
- del_timer_sync(&hinfo->timer);
+ cancel_delayed_work_sync(&hinfo->gc_work);
htable_remove_proc_entry(hinfo);
htable_selective_cleanup(hinfo, select_all);
kfree(hinfo->name);
@@ -942,7 +943,7 @@ static int __init hashlimit_mt_init(void)
sizeof(struct dsthash_ent), 0, 0,
NULL);
if (!hashlimit_cachep) {
- pr_warning("unable to create slab cache\n");
+ pr_warn("unable to create slab cache\n");
goto err2;
}
return 0;
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index d7ca16b8b8df..f440f57a452f 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -13,6 +13,7 @@
#include <linux/netfilter_bridge.h>
#include <linux/netfilter/xt_physdev.h>
#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/br_netfilter.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
@@ -87,6 +88,8 @@ static int physdev_mt_check(const struct xt_mtchk_param *par)
{
const struct xt_physdev_info *info = par->matchinfo;
+ br_netfilter_enable();
+
if (!(info->bitmask & XT_PHYSDEV_OP_MASK) ||
info->bitmask & ~XT_PHYSDEV_OP_MASK)
return -EINVAL;
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 80c2e2d603e0..5732cd64acc0 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -84,13 +84,12 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par)
index = ip_set_nfnl_get_byindex(par->net, info->match_set.index);
if (index == IPSET_INVALID_ID) {
- pr_warning("Cannot find set identified by id %u to match\n",
- info->match_set.index);
+ pr_warn("Cannot find set identified by id %u to match\n",
+ info->match_set.index);
return -ENOENT;
}
if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) {
- pr_warning("Protocol error: set match dimension "
- "is over the limit!\n");
+ pr_warn("Protocol error: set match dimension is over the limit!\n");
ip_set_nfnl_put(par->net, info->match_set.index);
return -ERANGE;
}
@@ -134,13 +133,12 @@ set_match_v1_checkentry(const struct xt_mtchk_param *par)
index = ip_set_nfnl_get_byindex(par->net, info->match_set.index);
if (index == IPSET_INVALID_ID) {
- pr_warning("Cannot find set identified by id %u to match\n",
- info->match_set.index);
+ pr_warn("Cannot find set identified by id %u to match\n",
+ info->match_set.index);
return -ENOENT;
}
if (info->match_set.dim > IPSET_DIM_MAX) {
- pr_warning("Protocol error: set match dimension "
- "is over the limit!\n");
+ pr_warn("Protocol error: set match dimension is over the limit!\n");
ip_set_nfnl_put(par->net, info->match_set.index);
return -ERANGE;
}
@@ -230,8 +228,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
if (info->add_set.index != IPSET_INVALID_ID) {
index = ip_set_nfnl_get_byindex(par->net, info->add_set.index);
if (index == IPSET_INVALID_ID) {
- pr_warning("Cannot find add_set index %u as target\n",
- info->add_set.index);
+ pr_warn("Cannot find add_set index %u as target\n",
+ info->add_set.index);
return -ENOENT;
}
}
@@ -239,8 +237,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
if (info->del_set.index != IPSET_INVALID_ID) {
index = ip_set_nfnl_get_byindex(par->net, info->del_set.index);
if (index == IPSET_INVALID_ID) {
- pr_warning("Cannot find del_set index %u as target\n",
- info->del_set.index);
+ pr_warn("Cannot find del_set index %u as target\n",
+ info->del_set.index);
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_nfnl_put(par->net, info->add_set.index);
return -ENOENT;
@@ -248,8 +246,7 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
}
if (info->add_set.u.flags[IPSET_DIM_MAX-1] != 0 ||
info->del_set.u.flags[IPSET_DIM_MAX-1] != 0) {
- pr_warning("Protocol error: SET target dimension "
- "is over the limit!\n");
+ pr_warn("Protocol error: SET target dimension is over the limit!\n");
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_nfnl_put(par->net, info->add_set.index);
if (info->del_set.index != IPSET_INVALID_ID)
@@ -303,8 +300,8 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par)
if (info->add_set.index != IPSET_INVALID_ID) {
index = ip_set_nfnl_get_byindex(par->net, info->add_set.index);
if (index == IPSET_INVALID_ID) {
- pr_warning("Cannot find add_set index %u as target\n",
- info->add_set.index);
+ pr_warn("Cannot find add_set index %u as target\n",
+ info->add_set.index);
return -ENOENT;
}
}
@@ -312,8 +309,8 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par)
if (info->del_set.index != IPSET_INVALID_ID) {
index = ip_set_nfnl_get_byindex(par->net, info->del_set.index);
if (index == IPSET_INVALID_ID) {
- pr_warning("Cannot find del_set index %u as target\n",
- info->del_set.index);
+ pr_warn("Cannot find del_set index %u as target\n",
+ info->del_set.index);
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_nfnl_put(par->net, info->add_set.index);
return -ENOENT;
@@ -321,8 +318,7 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par)
}
if (info->add_set.dim > IPSET_DIM_MAX ||
info->del_set.dim > IPSET_DIM_MAX) {
- pr_warning("Protocol error: SET target dimension "
- "is over the limit!\n");
+ pr_warn("Protocol error: SET target dimension is over the limit!\n");
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_nfnl_put(par->net, info->add_set.index);
if (info->del_set.index != IPSET_INVALID_ID)
@@ -370,6 +366,140 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
#define set_target_v2_checkentry set_target_v1_checkentry
#define set_target_v2_destroy set_target_v1_destroy
+/* Revision 3 target */
+
+static unsigned int
+set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_set_info_target_v3 *info = par->targinfo;
+ ADT_OPT(add_opt, par->family, info->add_set.dim,
+ info->add_set.flags, info->flags, info->timeout);
+ ADT_OPT(del_opt, par->family, info->del_set.dim,
+ info->del_set.flags, 0, UINT_MAX);
+ ADT_OPT(map_opt, par->family, info->map_set.dim,
+ info->map_set.flags, 0, UINT_MAX);
+
+ int ret;
+
+ /* Normalize to fit into jiffies */
+ if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
+ add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC)
+ add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC;
+ if (info->add_set.index != IPSET_INVALID_ID)
+ ip_set_add(info->add_set.index, skb, par, &add_opt);
+ if (info->del_set.index != IPSET_INVALID_ID)
+ ip_set_del(info->del_set.index, skb, par, &del_opt);
+ if (info->map_set.index != IPSET_INVALID_ID) {
+ map_opt.cmdflags |= info->flags & (IPSET_FLAG_MAP_SKBMARK |
+ IPSET_FLAG_MAP_SKBPRIO |
+ IPSET_FLAG_MAP_SKBQUEUE);
+ ret = match_set(info->map_set.index, skb, par, &map_opt,
+ info->map_set.flags & IPSET_INV_MATCH);
+ if (!ret)
+ return XT_CONTINUE;
+ if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBMARK)
+ skb->mark = (skb->mark & ~(map_opt.ext.skbmarkmask))
+ ^ (map_opt.ext.skbmark);
+ if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBPRIO)
+ skb->priority = map_opt.ext.skbprio;
+ if ((map_opt.cmdflags & IPSET_FLAG_MAP_SKBQUEUE) &&
+ skb->dev &&
+ skb->dev->real_num_tx_queues > map_opt.ext.skbqueue)
+ skb_set_queue_mapping(skb, map_opt.ext.skbqueue);
+ }
+ return XT_CONTINUE;
+}
+
+
+static int
+set_target_v3_checkentry(const struct xt_tgchk_param *par)
+{
+ const struct xt_set_info_target_v3 *info = par->targinfo;
+ ip_set_id_t index;
+
+ if (info->add_set.index != IPSET_INVALID_ID) {
+ index = ip_set_nfnl_get_byindex(par->net,
+ info->add_set.index);
+ if (index == IPSET_INVALID_ID) {
+ pr_warn("Cannot find add_set index %u as target\n",
+ info->add_set.index);
+ return -ENOENT;
+ }
+ }
+
+ if (info->del_set.index != IPSET_INVALID_ID) {
+ index = ip_set_nfnl_get_byindex(par->net,
+ info->del_set.index);
+ if (index == IPSET_INVALID_ID) {
+ pr_warn("Cannot find del_set index %u as target\n",
+ info->del_set.index);
+ if (info->add_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net,
+ info->add_set.index);
+ return -ENOENT;
+ }
+ }
+
+ if (info->map_set.index != IPSET_INVALID_ID) {
+ if (strncmp(par->table, "mangle", 7)) {
+ pr_warn("--map-set only usable from mangle table\n");
+ return -EINVAL;
+ }
+ if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) |
+ (info->flags & IPSET_FLAG_MAP_SKBQUEUE)) &&
+ !(par->hook_mask & (1 << NF_INET_FORWARD |
+ 1 << NF_INET_LOCAL_OUT |
+ 1 << NF_INET_POST_ROUTING))) {
+ pr_warn("mapping of prio or/and queue is allowed only"
+ "from OUTPUT/FORWARD/POSTROUTING chains\n");
+ return -EINVAL;
+ }
+ index = ip_set_nfnl_get_byindex(par->net,
+ info->map_set.index);
+ if (index == IPSET_INVALID_ID) {
+ pr_warn("Cannot find map_set index %u as target\n",
+ info->map_set.index);
+ if (info->add_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net,
+ info->add_set.index);
+ if (info->del_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net,
+ info->del_set.index);
+ return -ENOENT;
+ }
+ }
+
+ if (info->add_set.dim > IPSET_DIM_MAX ||
+ info->del_set.dim > IPSET_DIM_MAX ||
+ info->map_set.dim > IPSET_DIM_MAX) {
+ pr_warn("Protocol error: SET target dimension "
+ "is over the limit!\n");
+ if (info->add_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net, info->add_set.index);
+ if (info->del_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net, info->del_set.index);
+ if (info->map_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net, info->map_set.index);
+ return -ERANGE;
+ }
+
+ return 0;
+}
+
+static void
+set_target_v3_destroy(const struct xt_tgdtor_param *par)
+{
+ const struct xt_set_info_target_v3 *info = par->targinfo;
+
+ if (info->add_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net, info->add_set.index);
+ if (info->del_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net, info->del_set.index);
+ if (info->map_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net, info->map_set.index);
+}
+
+
static struct xt_match set_matches[] __read_mostly = {
{
.name = "set",
@@ -497,6 +627,27 @@ static struct xt_target set_targets[] __read_mostly = {
.destroy = set_target_v2_destroy,
.me = THIS_MODULE
},
+ /* --map-set support */
+ {
+ .name = "SET",
+ .revision = 3,
+ .family = NFPROTO_IPV4,
+ .target = set_target_v3,
+ .targetsize = sizeof(struct xt_set_info_target_v3),
+ .checkentry = set_target_v3_checkentry,
+ .destroy = set_target_v3_destroy,
+ .me = THIS_MODULE
+ },
+ {
+ .name = "SET",
+ .revision = 3,
+ .family = NFPROTO_IPV6,
+ .target = set_target_v3,
+ .targetsize = sizeof(struct xt_set_info_target_v3),
+ .checkentry = set_target_v3_checkentry,
+ .destroy = set_target_v3_destroy,
+ .me = THIS_MODULE
+ },
};
static int __init xt_set_init(void)
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index d3c48b14ab94..5699adb97652 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -29,7 +29,6 @@ string_mt(const struct sk_buff *skb, struct xt_action_param *par)
struct ts_state state;
bool invert;
- memset(&state, 0, sizeof(struct ts_state));
invert = conf->u.v1.flags & XT_STRING_FLAG_INVERT;
return (skb_find_text((struct sk_buff *)skb, conf->from_offset,
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 05ea4a4cc0ac..a845cd4cf21e 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -170,7 +170,6 @@ int netlbl_cfg_unlbl_map_add(const char *domain,
#endif /* IPv6 */
default:
goto cfg_unlbl_map_add_failure;
- break;
}
entry->def.addrsel = addrmap;
@@ -247,7 +246,6 @@ int netlbl_cfg_unlbl_static_add(struct net *net,
* @addr: IP address in network byte order (struct in[6]_addr)
* @mask: address mask in network byte order (struct in[6]_addr)
* @family: address family
- * @secid: LSM secid value for the entry
* @audit_info: NetLabel audit information
*
* Description:
diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c
index 1e779bb7fa43..adf8b7900da2 100644
--- a/net/netlabel/netlabel_user.c
+++ b/net/netlabel/netlabel_user.c
@@ -71,11 +71,7 @@ int __init netlbl_netlink_init(void)
if (ret_val != 0)
return ret_val;
- ret_val = netlbl_unlabel_genl_init();
- if (ret_val != 0)
- return ret_val;
-
- return 0;
+ return netlbl_unlabel_genl_init();
}
/*
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index e6fac7e3db52..c416725d28c4 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -58,7 +58,9 @@
#include <linux/mutex.h>
#include <linux/vmalloc.h>
#include <linux/if_arp.h>
+#include <linux/rhashtable.h>
#include <asm/cacheflush.h>
+#include <linux/hash.h>
#include <net/net_namespace.h>
#include <net/sock.h>
@@ -100,6 +102,19 @@ static atomic_t nl_table_users = ATOMIC_INIT(0);
#define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
+/* Protects netlink socket hash table mutations */
+DEFINE_MUTEX(nl_sk_hash_lock);
+EXPORT_SYMBOL_GPL(nl_sk_hash_lock);
+
+static int lockdep_nl_sk_hash_is_held(void)
+{
+#ifdef CONFIG_LOCKDEP
+ return (debug_locks) ? lockdep_is_held(&nl_sk_hash_lock) : 1;
+#else
+ return 1;
+#endif
+}
+
static ATOMIC_NOTIFIER_HEAD(netlink_chain);
static DEFINE_SPINLOCK(netlink_tap_lock);
@@ -110,11 +125,6 @@ static inline u32 netlink_group_mask(u32 group)
return group ? 1 << (group - 1) : 0;
}
-static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u32 portid)
-{
- return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask];
-}
-
int netlink_add_tap(struct netlink_tap *nt)
{
if (unlikely(nt->dev->type != ARPHRD_NETLINK))
@@ -170,7 +180,6 @@ EXPORT_SYMBOL_GPL(netlink_remove_tap);
static bool netlink_filter_tap(const struct sk_buff *skb)
{
struct sock *sk = skb->sk;
- bool pass = false;
/* We take the more conservative approach and
* whitelist socket protocols that may pass.
@@ -184,11 +193,10 @@ static bool netlink_filter_tap(const struct sk_buff *skb)
case NETLINK_FIB_LOOKUP:
case NETLINK_NETFILTER:
case NETLINK_GENERIC:
- pass = true;
- break;
+ return true;
}
- return pass;
+ return false;
}
static int __netlink_deliver_tap_skb(struct sk_buff *skb,
@@ -205,7 +213,7 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,
nskb->protocol = htons((u16) sk->sk_protocol);
nskb->pkt_type = netlink_is_kernel(sk) ?
PACKET_KERNEL : PACKET_USER;
-
+ skb_reset_network_header(nskb);
ret = dev_queue_xmit(nskb);
if (unlikely(ret > 0))
ret = net_xmit_errno(ret);
@@ -376,7 +384,7 @@ static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
if ((int)req->nm_block_size <= 0)
return -EINVAL;
- if (!IS_ALIGNED(req->nm_block_size, PAGE_SIZE))
+ if (!PAGE_ALIGNED(req->nm_block_size))
return -EINVAL;
if (req->nm_frame_size < NL_MMAP_HDRLEN)
return -EINVAL;
@@ -985,105 +993,48 @@ netlink_unlock_table(void)
wake_up(&nl_table_wait);
}
-static bool netlink_compare(struct net *net, struct sock *sk)
+struct netlink_compare_arg
{
- return net_eq(sock_net(sk), net);
-}
-
-static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
-{
- struct netlink_table *table = &nl_table[protocol];
- struct nl_portid_hash *hash = &table->hash;
- struct hlist_head *head;
- struct sock *sk;
-
- read_lock(&nl_table_lock);
- head = nl_portid_hashfn(hash, portid);
- sk_for_each(sk, head) {
- if (table->compare(net, sk) &&
- (nlk_sk(sk)->portid == portid)) {
- sock_hold(sk);
- goto found;
- }
- }
- sk = NULL;
-found:
- read_unlock(&nl_table_lock);
- return sk;
-}
+ struct net *net;
+ u32 portid;
+};
-static struct hlist_head *nl_portid_hash_zalloc(size_t size)
+static bool netlink_compare(void *ptr, void *arg)
{
- if (size <= PAGE_SIZE)
- return kzalloc(size, GFP_ATOMIC);
- else
- return (struct hlist_head *)
- __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
- get_order(size));
-}
+ struct netlink_compare_arg *x = arg;
+ struct sock *sk = ptr;
-static void nl_portid_hash_free(struct hlist_head *table, size_t size)
-{
- if (size <= PAGE_SIZE)
- kfree(table);
- else
- free_pages((unsigned long)table, get_order(size));
+ return nlk_sk(sk)->portid == x->portid &&
+ net_eq(sock_net(sk), x->net);
}
-static int nl_portid_hash_rehash(struct nl_portid_hash *hash, int grow)
+static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid,
+ struct net *net)
{
- unsigned int omask, mask, shift;
- size_t osize, size;
- struct hlist_head *otable, *table;
- int i;
-
- omask = mask = hash->mask;
- osize = size = (mask + 1) * sizeof(*table);
- shift = hash->shift;
-
- if (grow) {
- if (++shift > hash->max_shift)
- return 0;
- mask = mask * 2 + 1;
- size *= 2;
- }
-
- table = nl_portid_hash_zalloc(size);
- if (!table)
- return 0;
-
- otable = hash->table;
- hash->table = table;
- hash->mask = mask;
- hash->shift = shift;
- get_random_bytes(&hash->rnd, sizeof(hash->rnd));
+ struct netlink_compare_arg arg = {
+ .net = net,
+ .portid = portid,
+ };
+ u32 hash;
- for (i = 0; i <= omask; i++) {
- struct sock *sk;
- struct hlist_node *tmp;
-
- sk_for_each_safe(sk, tmp, &otable[i])
- __sk_add_node(sk, nl_portid_hashfn(hash, nlk_sk(sk)->portid));
- }
+ hash = rhashtable_hashfn(&table->hash, &portid, sizeof(portid));
- nl_portid_hash_free(otable, osize);
- hash->rehash_time = jiffies + 10 * 60 * HZ;
- return 1;
+ return rhashtable_lookup_compare(&table->hash, hash,
+ &netlink_compare, &arg);
}
-static inline int nl_portid_hash_dilute(struct nl_portid_hash *hash, int len)
+static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
{
- int avg = hash->entries >> hash->shift;
-
- if (unlikely(avg > 1) && nl_portid_hash_rehash(hash, 1))
- return 1;
+ struct netlink_table *table = &nl_table[protocol];
+ struct sock *sk;
- if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) {
- nl_portid_hash_rehash(hash, 0);
- return 1;
- }
+ rcu_read_lock();
+ sk = __netlink_lookup(table, portid, net);
+ if (sk)
+ sock_hold(sk);
+ rcu_read_unlock();
- return 0;
+ return sk;
}
static const struct proto_ops netlink_ops;
@@ -1115,22 +1066,10 @@ netlink_update_listeners(struct sock *sk)
static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
{
struct netlink_table *table = &nl_table[sk->sk_protocol];
- struct nl_portid_hash *hash = &table->hash;
- struct hlist_head *head;
int err = -EADDRINUSE;
- struct sock *osk;
- int len;
- netlink_table_grab();
- head = nl_portid_hashfn(hash, portid);
- len = 0;
- sk_for_each(osk, head) {
- if (table->compare(net, osk) &&
- (nlk_sk(osk)->portid == portid))
- break;
- len++;
- }
- if (osk)
+ mutex_lock(&nl_sk_hash_lock);
+ if (__netlink_lookup(table, portid, net))
goto err;
err = -EBUSY;
@@ -1138,26 +1077,31 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
goto err;
err = -ENOMEM;
- if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX))
+ if (BITS_PER_LONG > 32 && unlikely(table->hash.nelems >= UINT_MAX))
goto err;
- if (len && nl_portid_hash_dilute(hash, len))
- head = nl_portid_hashfn(hash, portid);
- hash->entries++;
nlk_sk(sk)->portid = portid;
- sk_add_node(sk, head);
+ sock_hold(sk);
+ rhashtable_insert(&table->hash, &nlk_sk(sk)->node, GFP_KERNEL);
err = 0;
-
err:
- netlink_table_ungrab();
+ mutex_unlock(&nl_sk_hash_lock);
return err;
}
static void netlink_remove(struct sock *sk)
{
+ struct netlink_table *table;
+
+ mutex_lock(&nl_sk_hash_lock);
+ table = &nl_table[sk->sk_protocol];
+ if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node, GFP_KERNEL)) {
+ WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
+ __sock_put(sk);
+ }
+ mutex_unlock(&nl_sk_hash_lock);
+
netlink_table_grab();
- if (sk_del_node_init(sk))
- nl_table[sk->sk_protocol].hash.entries--;
if (nlk_sk(sk)->subscriptions)
__sk_del_bind_node(sk);
netlink_table_ungrab();
@@ -1313,6 +1257,9 @@ static int netlink_release(struct socket *sock)
}
netlink_table_ungrab();
+ /* Wait for readers to complete */
+ synchronize_net();
+
kfree(nlk->groups);
nlk->groups = NULL;
@@ -1328,30 +1275,22 @@ static int netlink_autobind(struct socket *sock)
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
struct netlink_table *table = &nl_table[sk->sk_protocol];
- struct nl_portid_hash *hash = &table->hash;
- struct hlist_head *head;
- struct sock *osk;
s32 portid = task_tgid_vnr(current);
int err;
static s32 rover = -4097;
retry:
cond_resched();
- netlink_table_grab();
- head = nl_portid_hashfn(hash, portid);
- sk_for_each(osk, head) {
- if (!table->compare(net, osk))
- continue;
- if (nlk_sk(osk)->portid == portid) {
- /* Bind collision, search negative portid values. */
- portid = rover--;
- if (rover > -4097)
- rover = -4097;
- netlink_table_ungrab();
- goto retry;
- }
+ rcu_read_lock();
+ if (__netlink_lookup(table, portid, net)) {
+ /* Bind collision, search negative portid values. */
+ portid = rover--;
+ if (rover > -4097)
+ rover = -4097;
+ rcu_read_unlock();
+ goto retry;
}
- netlink_table_ungrab();
+ rcu_read_unlock();
err = netlink_insert(sk, net, portid);
if (err == -EADDRINUSE)
@@ -1961,25 +1900,25 @@ struct netlink_broadcast_data {
void *tx_data;
};
-static int do_one_broadcast(struct sock *sk,
- struct netlink_broadcast_data *p)
+static void do_one_broadcast(struct sock *sk,
+ struct netlink_broadcast_data *p)
{
struct netlink_sock *nlk = nlk_sk(sk);
int val;
if (p->exclude_sk == sk)
- goto out;
+ return;
if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
!test_bit(p->group - 1, nlk->groups))
- goto out;
+ return;
if (!net_eq(sock_net(sk), p->net))
- goto out;
+ return;
if (p->failure) {
netlink_overrun(sk);
- goto out;
+ return;
}
sock_hold(sk);
@@ -2017,9 +1956,6 @@ static int do_one_broadcast(struct sock *sk,
p->skb2 = NULL;
}
sock_put(sk);
-
-out:
- return 0;
}
int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid,
@@ -2958,14 +2894,18 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
{
struct nl_seq_iter *iter = seq->private;
int i, j;
+ struct netlink_sock *nlk;
struct sock *s;
loff_t off = 0;
for (i = 0; i < MAX_LINKS; i++) {
- struct nl_portid_hash *hash = &nl_table[i].hash;
+ struct rhashtable *ht = &nl_table[i].hash;
+ const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
+
+ for (j = 0; j < tbl->size; j++) {
+ rht_for_each_entry_rcu(nlk, tbl->buckets[j], node) {
+ s = (struct sock *)nlk;
- for (j = 0; j <= hash->mask; j++) {
- sk_for_each(s, &hash->table[j]) {
if (sock_net(s) != seq_file_net(seq))
continue;
if (off == pos) {
@@ -2981,15 +2921,15 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
}
static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(nl_table_lock)
+ __acquires(RCU)
{
- read_lock(&nl_table_lock);
+ rcu_read_lock();
return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}
static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct sock *s;
+ struct netlink_sock *nlk;
struct nl_seq_iter *iter;
struct net *net;
int i, j;
@@ -3001,28 +2941,26 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
net = seq_file_net(seq);
iter = seq->private;
- s = v;
- do {
- s = sk_next(s);
- } while (s && !nl_table[s->sk_protocol].compare(net, s));
- if (s)
- return s;
+ nlk = v;
+
+ rht_for_each_entry_rcu(nlk, nlk->node.next, node)
+ if (net_eq(sock_net((struct sock *)nlk), net))
+ return nlk;
i = iter->link;
j = iter->hash_idx + 1;
do {
- struct nl_portid_hash *hash = &nl_table[i].hash;
-
- for (; j <= hash->mask; j++) {
- s = sk_head(&hash->table[j]);
+ struct rhashtable *ht = &nl_table[i].hash;
+ const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
- while (s && !nl_table[s->sk_protocol].compare(net, s))
- s = sk_next(s);
- if (s) {
- iter->link = i;
- iter->hash_idx = j;
- return s;
+ for (; j < tbl->size; j++) {
+ rht_for_each_entry_rcu(nlk, tbl->buckets[j], node) {
+ if (net_eq(sock_net((struct sock *)nlk), net)) {
+ iter->link = i;
+ iter->hash_idx = j;
+ return nlk;
+ }
}
}
@@ -3033,9 +2971,9 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
static void netlink_seq_stop(struct seq_file *seq, void *v)
- __releases(nl_table_lock)
+ __releases(RCU)
{
- read_unlock(&nl_table_lock);
+ rcu_read_unlock();
}
@@ -3173,9 +3111,17 @@ static struct pernet_operations __net_initdata netlink_net_ops = {
static int __init netlink_proto_init(void)
{
int i;
- unsigned long limit;
- unsigned int order;
int err = proto_register(&netlink_proto, 0);
+ struct rhashtable_params ht_params = {
+ .head_offset = offsetof(struct netlink_sock, node),
+ .key_offset = offsetof(struct netlink_sock, portid),
+ .key_len = sizeof(u32), /* portid */
+ .hashfn = arch_fast_hash,
+ .max_shift = 16, /* 64K */
+ .grow_decision = rht_grow_above_75,
+ .shrink_decision = rht_shrink_below_30,
+ .mutex_is_held = lockdep_nl_sk_hash_is_held,
+ };
if (err != 0)
goto out;
@@ -3186,32 +3132,13 @@ static int __init netlink_proto_init(void)
if (!nl_table)
goto panic;
- if (totalram_pages >= (128 * 1024))
- limit = totalram_pages >> (21 - PAGE_SHIFT);
- else
- limit = totalram_pages >> (23 - PAGE_SHIFT);
-
- order = get_bitmask_order(limit) - 1 + PAGE_SHIFT;
- limit = (1UL << order) / sizeof(struct hlist_head);
- order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1;
-
for (i = 0; i < MAX_LINKS; i++) {
- struct nl_portid_hash *hash = &nl_table[i].hash;
-
- hash->table = nl_portid_hash_zalloc(1 * sizeof(*hash->table));
- if (!hash->table) {
- while (i-- > 0)
- nl_portid_hash_free(nl_table[i].hash.table,
- 1 * sizeof(*hash->table));
+ if (rhashtable_init(&nl_table[i].hash, &ht_params) < 0) {
+ while (--i > 0)
+ rhashtable_destroy(&nl_table[i].hash);
kfree(nl_table);
goto panic;
}
- hash->max_shift = order;
- hash->shift = 0;
- hash->mask = 0;
- hash->rehash_time = jiffies;
-
- nl_table[i].compare = netlink_compare;
}
INIT_LIST_HEAD(&netlink_tap_all);
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 0b59d441f5b6..b20a1731759b 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -1,6 +1,7 @@
#ifndef _AF_NETLINK_H
#define _AF_NETLINK_H
+#include <linux/rhashtable.h>
#include <net/sock.h>
#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
@@ -47,6 +48,8 @@ struct netlink_sock {
struct netlink_ring tx_ring;
atomic_t mapped;
#endif /* CONFIG_NETLINK_MMAP */
+
+ struct rhash_head node;
};
static inline struct netlink_sock *nlk_sk(struct sock *sk)
@@ -54,21 +57,8 @@ static inline struct netlink_sock *nlk_sk(struct sock *sk)
return container_of(sk, struct netlink_sock, sk);
}
-struct nl_portid_hash {
- struct hlist_head *table;
- unsigned long rehash_time;
-
- unsigned int mask;
- unsigned int shift;
-
- unsigned int entries;
- unsigned int max_shift;
-
- u32 rnd;
-};
-
struct netlink_table {
- struct nl_portid_hash hash;
+ struct rhashtable hash;
struct hlist_head mc_list;
struct listeners __rcu *listeners;
unsigned int flags;
@@ -83,5 +73,6 @@ struct netlink_table {
extern struct netlink_table *nl_table;
extern rwlock_t nl_table_lock;
+extern struct mutex nl_sk_hash_lock;
#endif
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index 1af29624b92f..de8c74a3c061 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -4,6 +4,7 @@
#include <linux/netlink.h>
#include <linux/sock_diag.h>
#include <linux/netlink_diag.h>
+#include <linux/rhashtable.h>
#include "af_netlink.h"
@@ -101,16 +102,20 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
int protocol, int s_num)
{
struct netlink_table *tbl = &nl_table[protocol];
- struct nl_portid_hash *hash = &tbl->hash;
+ struct rhashtable *ht = &tbl->hash;
+ const struct bucket_table *htbl = rht_dereference(ht->tbl, ht);
struct net *net = sock_net(skb->sk);
struct netlink_diag_req *req;
+ struct netlink_sock *nlsk;
struct sock *sk;
int ret = 0, num = 0, i;
req = nlmsg_data(cb->nlh);
- for (i = 0; i <= hash->mask; i++) {
- sk_for_each(sk, &hash->table[i]) {
+ for (i = 0; i < htbl->size; i++) {
+ rht_for_each_entry(nlsk, htbl->buckets[i], ht, node) {
+ sk = (struct sock *)nlsk;
+
if (!net_eq(sock_net(sk), net))
continue;
if (num < s_num) {
@@ -165,6 +170,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
req = nlmsg_data(cb->nlh);
+ mutex_lock(&nl_sk_hash_lock);
read_lock(&nl_table_lock);
if (req->sdiag_protocol == NDIAG_PROTO_ALL) {
@@ -178,6 +184,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
} else {
if (req->sdiag_protocol >= MAX_LINKS) {
read_unlock(&nl_table_lock);
+ mutex_unlock(&nl_sk_hash_lock);
return -ENOENT;
}
@@ -185,6 +192,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
}
read_unlock(&nl_table_lock);
+ mutex_unlock(&nl_sk_hash_lock);
return skb->len;
}
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index ede50d197e10..71cf1bffea06 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1418,7 +1418,7 @@ static int __init nr_proto_init(void)
struct net_device *dev;
sprintf(name, "nr%d", i);
- dev = alloc_netdev(0, name, nr_setup);
+ dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, nr_setup);
if (!dev) {
printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device structure\n");
goto fail;
diff --git a/net/nfc/digital.h b/net/nfc/digital.h
index 71ad7eefddd4..3c39c72eb038 100644
--- a/net/nfc/digital.h
+++ b/net/nfc/digital.h
@@ -29,6 +29,7 @@
#define DIGITAL_CMD_TG_SEND 1
#define DIGITAL_CMD_TG_LISTEN 2
#define DIGITAL_CMD_TG_LISTEN_MDAA 3
+#define DIGITAL_CMD_TG_LISTEN_MD 4
#define DIGITAL_MAX_HEADER_LEN 7
#define DIGITAL_CRC_LEN 2
@@ -121,6 +122,8 @@ int digital_tg_send_dep_res(struct nfc_digital_dev *ddev, struct sk_buff *skb);
int digital_tg_listen_nfca(struct nfc_digital_dev *ddev, u8 rf_tech);
int digital_tg_listen_nfcf(struct nfc_digital_dev *ddev, u8 rf_tech);
+void digital_tg_recv_md_req(struct nfc_digital_dev *ddev, void *arg,
+ struct sk_buff *resp);
typedef u16 (*crc_func_t)(u16, const u8 *, size_t);
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index a6ce3c627e4e..009bcf317101 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -201,6 +201,11 @@ static void digital_wq_cmd(struct work_struct *work)
digital_send_cmd_complete, cmd);
break;
+ case DIGITAL_CMD_TG_LISTEN_MD:
+ rc = ddev->ops->tg_listen_md(ddev, cmd->timeout,
+ digital_send_cmd_complete, cmd);
+ break;
+
default:
pr_err("Unknown cmd type %d\n", cmd->type);
return;
@@ -293,12 +298,19 @@ static int digital_tg_listen_mdaa(struct nfc_digital_dev *ddev, u8 rf_tech)
500, digital_tg_recv_atr_req, NULL);
}
+static int digital_tg_listen_md(struct nfc_digital_dev *ddev, u8 rf_tech)
+{
+ return digital_send_cmd(ddev, DIGITAL_CMD_TG_LISTEN_MD, NULL, NULL, 500,
+ digital_tg_recv_md_req, NULL);
+}
+
int digital_target_found(struct nfc_digital_dev *ddev,
struct nfc_target *target, u8 protocol)
{
int rc;
u8 framing;
u8 rf_tech;
+ u8 poll_tech_count;
int (*check_crc)(struct sk_buff *skb);
void (*add_crc)(struct sk_buff *skb);
@@ -375,12 +387,16 @@ int digital_target_found(struct nfc_digital_dev *ddev,
return rc;
target->supported_protocols = (1 << protocol);
- rc = nfc_targets_found(ddev->nfc_dev, target, 1);
- if (rc)
- return rc;
+ poll_tech_count = ddev->poll_tech_count;
ddev->poll_tech_count = 0;
+ rc = nfc_targets_found(ddev->nfc_dev, target, 1);
+ if (rc) {
+ ddev->poll_tech_count = poll_tech_count;
+ return rc;
+ }
+
return 0;
}
@@ -505,6 +521,9 @@ static int digital_start_poll(struct nfc_dev *nfc_dev, __u32 im_protocols,
if (ddev->ops->tg_listen_mdaa) {
digital_add_poll_tech(ddev, 0,
digital_tg_listen_mdaa);
+ } else if (ddev->ops->tg_listen_md) {
+ digital_add_poll_tech(ddev, 0,
+ digital_tg_listen_md);
} else {
digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_106A,
digital_tg_listen_nfca);
@@ -732,7 +751,7 @@ struct nfc_digital_dev *nfc_digital_allocate_device(struct nfc_digital_ops *ops,
if (!ops->in_configure_hw || !ops->in_send_cmd || !ops->tg_listen ||
!ops->tg_configure_hw || !ops->tg_send_cmd || !ops->abort_cmd ||
- !ops->switch_rf)
+ !ops->switch_rf || (ops->tg_listen_md && !ops->tg_get_rf_tech))
return NULL;
ddev = kzalloc(sizeof(struct nfc_digital_dev), GFP_KERNEL);
diff --git a/net/nfc/digital_dep.c b/net/nfc/digital_dep.c
index 171cb9949ab5..b60aa35c074f 100644
--- a/net/nfc/digital_dep.c
+++ b/net/nfc/digital_dep.c
@@ -33,6 +33,8 @@
#define DIGITAL_ATR_REQ_MAX_SIZE 64
#define DIGITAL_LR_BITS_PAYLOAD_SIZE_254B 0x30
+#define DIGITAL_FSL_BITS_PAYLOAD_SIZE_254B \
+ (DIGITAL_LR_BITS_PAYLOAD_SIZE_254B >> 4)
#define DIGITAL_GB_BIT 0x02
#define DIGITAL_NFC_DEP_PFB_TYPE(pfb) ((pfb) & 0xE0)
@@ -127,6 +129,98 @@ static int digital_skb_pull_dep_sod(struct nfc_digital_dev *ddev,
return 0;
}
+static void digital_in_recv_psl_res(struct nfc_digital_dev *ddev, void *arg,
+ struct sk_buff *resp)
+{
+ struct nfc_target *target = arg;
+ struct digital_psl_res *psl_res;
+ int rc;
+
+ if (IS_ERR(resp)) {
+ rc = PTR_ERR(resp);
+ resp = NULL;
+ goto exit;
+ }
+
+ rc = ddev->skb_check_crc(resp);
+ if (rc) {
+ PROTOCOL_ERR("14.4.1.6");
+ goto exit;
+ }
+
+ rc = digital_skb_pull_dep_sod(ddev, resp);
+ if (rc) {
+ PROTOCOL_ERR("14.4.1.2");
+ goto exit;
+ }
+
+ psl_res = (struct digital_psl_res *)resp->data;
+
+ if ((resp->len != sizeof(*psl_res)) ||
+ (psl_res->dir != DIGITAL_NFC_DEP_FRAME_DIR_IN) ||
+ (psl_res->cmd != DIGITAL_CMD_PSL_RES)) {
+ rc = -EIO;
+ goto exit;
+ }
+
+ rc = digital_in_configure_hw(ddev, NFC_DIGITAL_CONFIG_RF_TECH,
+ NFC_DIGITAL_RF_TECH_424F);
+ if (rc)
+ goto exit;
+
+ rc = digital_in_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING,
+ NFC_DIGITAL_FRAMING_NFCF_NFC_DEP);
+ if (rc)
+ goto exit;
+
+ if (!DIGITAL_DRV_CAPS_IN_CRC(ddev) &&
+ (ddev->curr_rf_tech == NFC_DIGITAL_RF_TECH_106A)) {
+ ddev->skb_add_crc = digital_skb_add_crc_f;
+ ddev->skb_check_crc = digital_skb_check_crc_f;
+ }
+
+ ddev->curr_rf_tech = NFC_DIGITAL_RF_TECH_424F;
+
+ nfc_dep_link_is_up(ddev->nfc_dev, target->idx, NFC_COMM_ACTIVE,
+ NFC_RF_INITIATOR);
+
+ ddev->curr_nfc_dep_pni = 0;
+
+exit:
+ dev_kfree_skb(resp);
+
+ if (rc)
+ ddev->curr_protocol = 0;
+}
+
+static int digital_in_send_psl_req(struct nfc_digital_dev *ddev,
+ struct nfc_target *target)
+{
+ struct sk_buff *skb;
+ struct digital_psl_req *psl_req;
+
+ skb = digital_skb_alloc(ddev, sizeof(*psl_req));
+ if (!skb)
+ return -ENOMEM;
+
+ skb_put(skb, sizeof(*psl_req));
+
+ psl_req = (struct digital_psl_req *)skb->data;
+
+ psl_req->dir = DIGITAL_NFC_DEP_FRAME_DIR_OUT;
+ psl_req->cmd = DIGITAL_CMD_PSL_REQ;
+ psl_req->did = 0;
+ psl_req->brs = (0x2 << 3) | 0x2; /* 424F both directions */
+ psl_req->fsl = DIGITAL_FSL_BITS_PAYLOAD_SIZE_254B;
+
+ digital_skb_push_dep_sod(ddev, skb);
+
+ ddev->skb_add_crc(skb);
+
+ return digital_in_send_cmd(ddev, skb, 500, digital_in_recv_psl_res,
+ target);
+}
+
static void digital_in_recv_atr_res(struct nfc_digital_dev *ddev, void *arg,
struct sk_buff *resp)
{
@@ -166,6 +260,13 @@ static void digital_in_recv_atr_res(struct nfc_digital_dev *ddev, void *arg,
if (rc)
goto exit;
+ if ((ddev->protocols & NFC_PROTO_FELICA_MASK) &&
+ (ddev->curr_rf_tech != NFC_DIGITAL_RF_TECH_424F)) {
+ rc = digital_in_send_psl_req(ddev, target);
+ if (!rc)
+ goto exit;
+ }
+
rc = nfc_dep_link_is_up(ddev->nfc_dev, target->idx, NFC_COMM_ACTIVE,
NFC_RF_INITIATOR);
@@ -457,12 +558,10 @@ static void digital_tg_recv_dep_req(struct nfc_digital_dev *ddev, void *arg,
pr_err("Received a ACK/NACK PDU\n");
rc = -EINVAL;
goto exit;
- break;
case DIGITAL_NFC_DEP_PFB_SUPERVISOR_PDU:
pr_err("Received a SUPERVISOR PDU\n");
rc = -EINVAL;
goto exit;
- break;
}
skb_pull(resp, size);
@@ -673,6 +772,7 @@ void digital_tg_recv_atr_req(struct nfc_digital_dev *ddev, void *arg,
int rc;
struct digital_atr_req *atr_req;
size_t gb_len, min_size;
+ u8 poll_tech_count;
if (IS_ERR(resp)) {
rc = PTR_ERR(resp);
@@ -730,12 +830,16 @@ void digital_tg_recv_atr_req(struct nfc_digital_dev *ddev, void *arg,
goto exit;
gb_len = resp->len - sizeof(struct digital_atr_req);
+
+ poll_tech_count = ddev->poll_tech_count;
+ ddev->poll_tech_count = 0;
+
rc = nfc_tm_activated(ddev->nfc_dev, NFC_PROTO_NFC_DEP_MASK,
NFC_COMM_PASSIVE, atr_req->gb, gb_len);
- if (rc)
+ if (rc) {
+ ddev->poll_tech_count = poll_tech_count;
goto exit;
-
- ddev->poll_tech_count = 0;
+ }
rc = 0;
exit:
diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c
index c2c1c0189b7c..fb58ed2dd41d 100644
--- a/net/nfc/digital_technology.c
+++ b/net/nfc/digital_technology.c
@@ -318,6 +318,8 @@ static void digital_in_recv_sel_res(struct nfc_digital_dev *ddev, void *arg,
if (DIGITAL_SEL_RES_IS_T2T(sel_res)) {
nfc_proto = NFC_PROTO_MIFARE;
+ } else if (DIGITAL_SEL_RES_IS_NFC_DEP(sel_res)) {
+ nfc_proto = NFC_PROTO_NFC_DEP;
} else if (DIGITAL_SEL_RES_IS_T4T(sel_res)) {
rc = digital_in_send_rats(ddev, target);
if (rc)
@@ -327,8 +329,6 @@ static void digital_in_recv_sel_res(struct nfc_digital_dev *ddev, void *arg,
* done when receiving the ATS
*/
goto exit_free_skb;
- } else if (DIGITAL_SEL_RES_IS_NFC_DEP(sel_res)) {
- nfc_proto = NFC_PROTO_NFC_DEP;
} else {
rc = -EOPNOTSUPP;
goto exit;
@@ -944,6 +944,13 @@ static int digital_tg_send_sel_res(struct nfc_digital_dev *ddev)
if (!DIGITAL_DRV_CAPS_TG_CRC(ddev))
digital_skb_add_crc_a(skb);
+ rc = digital_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING,
+ NFC_DIGITAL_FRAMING_NFCA_ANTICOL_COMPLETE);
+ if (rc) {
+ kfree_skb(skb);
+ return rc;
+ }
+
rc = digital_tg_send_cmd(ddev, skb, 300, digital_tg_recv_atr_req,
NULL);
if (rc)
@@ -1002,6 +1009,13 @@ static int digital_tg_send_sdd_res(struct nfc_digital_dev *ddev)
for (i = 0; i < 4; i++)
sdd_res->bcc ^= sdd_res->nfcid1[i];
+ rc = digital_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING,
+ NFC_DIGITAL_FRAMING_NFCA_STANDARD_WITH_CRC_A);
+ if (rc) {
+ kfree_skb(skb);
+ return rc;
+ }
+
rc = digital_tg_send_cmd(ddev, skb, 300, digital_tg_recv_sel_req,
NULL);
if (rc)
@@ -1054,6 +1068,13 @@ static int digital_tg_send_sens_res(struct nfc_digital_dev *ddev)
sens_res[0] = (DIGITAL_SENS_RES_NFC_DEP >> 8) & 0xFF;
sens_res[1] = DIGITAL_SENS_RES_NFC_DEP & 0xFF;
+ rc = digital_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING,
+ NFC_DIGITAL_FRAMING_NFCA_STANDARD);
+ if (rc) {
+ kfree_skb(skb);
+ return rc;
+ }
+
rc = digital_tg_send_cmd(ddev, skb, 300, digital_tg_recv_sdd_req,
NULL);
if (rc)
@@ -1197,33 +1218,48 @@ exit:
dev_kfree_skb(resp);
}
-int digital_tg_listen_nfca(struct nfc_digital_dev *ddev, u8 rf_tech)
+static int digital_tg_config_nfca(struct nfc_digital_dev *ddev)
{
int rc;
- rc = digital_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_RF_TECH, rf_tech);
+ rc = digital_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_RF_TECH,
+ NFC_DIGITAL_RF_TECH_106A);
if (rc)
return rc;
- rc = digital_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING,
- NFC_DIGITAL_FRAMING_NFCA_NFC_DEP);
+ return digital_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING,
+ NFC_DIGITAL_FRAMING_NFCA_NFC_DEP);
+}
+
+int digital_tg_listen_nfca(struct nfc_digital_dev *ddev, u8 rf_tech)
+{
+ int rc;
+
+ rc = digital_tg_config_nfca(ddev);
if (rc)
return rc;
return digital_tg_listen(ddev, 300, digital_tg_recv_sens_req, NULL);
}
-int digital_tg_listen_nfcf(struct nfc_digital_dev *ddev, u8 rf_tech)
+static int digital_tg_config_nfcf(struct nfc_digital_dev *ddev, u8 rf_tech)
{
int rc;
- u8 *nfcid2;
rc = digital_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_RF_TECH, rf_tech);
if (rc)
return rc;
- rc = digital_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING,
- NFC_DIGITAL_FRAMING_NFCF_NFC_DEP);
+ return digital_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING,
+ NFC_DIGITAL_FRAMING_NFCF_NFC_DEP);
+}
+
+int digital_tg_listen_nfcf(struct nfc_digital_dev *ddev, u8 rf_tech)
+{
+ int rc;
+ u8 *nfcid2;
+
+ rc = digital_tg_config_nfcf(ddev, rf_tech);
if (rc)
return rc;
@@ -1237,3 +1273,43 @@ int digital_tg_listen_nfcf(struct nfc_digital_dev *ddev, u8 rf_tech)
return digital_tg_listen(ddev, 300, digital_tg_recv_sensf_req, nfcid2);
}
+
+void digital_tg_recv_md_req(struct nfc_digital_dev *ddev, void *arg,
+ struct sk_buff *resp)
+{
+ u8 rf_tech;
+ int rc;
+
+ if (IS_ERR(resp)) {
+ resp = NULL;
+ goto exit_free_skb;
+ }
+
+ rc = ddev->ops->tg_get_rf_tech(ddev, &rf_tech);
+ if (rc)
+ goto exit_free_skb;
+
+ switch (rf_tech) {
+ case NFC_DIGITAL_RF_TECH_106A:
+ rc = digital_tg_config_nfca(ddev);
+ if (rc)
+ goto exit_free_skb;
+ digital_tg_recv_sens_req(ddev, arg, resp);
+ break;
+ case NFC_DIGITAL_RF_TECH_212F:
+ case NFC_DIGITAL_RF_TECH_424F:
+ rc = digital_tg_config_nfcf(ddev, rf_tech);
+ if (rc)
+ goto exit_free_skb;
+ digital_tg_recv_sensf_req(ddev, arg, resp);
+ break;
+ default:
+ goto exit_free_skb;
+ }
+
+ return;
+
+exit_free_skb:
+ digital_poll_next_tech(ddev);
+ dev_kfree_skb(resp);
+}
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 47403705197e..117708263ced 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -553,8 +553,11 @@ static void hci_stop_poll(struct nfc_dev *nfc_dev)
{
struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev);
- nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE,
- NFC_HCI_EVT_END_OPERATION, NULL, 0);
+ if (hdev->ops->stop_poll)
+ hdev->ops->stop_poll(hdev);
+ else
+ nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE,
+ NFC_HCI_EVT_END_OPERATION, NULL, 0);
}
static int hci_dep_link_up(struct nfc_dev *nfc_dev, struct nfc_target *target,
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 2b400e1a8695..90b16cb40058 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -231,6 +231,14 @@ static void nci_rf_discover_req(struct nci_dev *ndev, unsigned long opt)
cmd.num_disc_configs++;
}
+ if ((cmd.num_disc_configs < NCI_MAX_NUM_RF_CONFIGS) &&
+ (protocols & NFC_PROTO_ISO15693_MASK)) {
+ cmd.disc_configs[cmd.num_disc_configs].rf_tech_and_mode =
+ NCI_NFC_V_PASSIVE_POLL_MODE;
+ cmd.disc_configs[cmd.num_disc_configs].frequency = 1;
+ cmd.num_disc_configs++;
+ }
+
nci_send_cmd(ndev, NCI_OP_RF_DISCOVER_CMD,
(1 + (cmd.num_disc_configs * sizeof(struct disc_config))),
&cmd);
@@ -751,10 +759,6 @@ int nci_register_device(struct nci_dev *ndev)
struct device *dev = &ndev->nfc_dev->dev;
char name[32];
- rc = nfc_register_device(ndev->nfc_dev);
- if (rc)
- goto exit;
-
ndev->flags = 0;
INIT_WORK(&ndev->cmd_work, nci_cmd_work);
@@ -762,7 +766,7 @@ int nci_register_device(struct nci_dev *ndev)
ndev->cmd_wq = create_singlethread_workqueue(name);
if (!ndev->cmd_wq) {
rc = -ENOMEM;
- goto unreg_exit;
+ goto exit;
}
INIT_WORK(&ndev->rx_work, nci_rx_work);
@@ -792,6 +796,10 @@ int nci_register_device(struct nci_dev *ndev)
mutex_init(&ndev->req_lock);
+ rc = nfc_register_device(ndev->nfc_dev);
+ if (rc)
+ goto destroy_rx_wq_exit;
+
goto exit;
destroy_rx_wq_exit:
@@ -800,9 +808,6 @@ destroy_rx_wq_exit:
destroy_cmd_wq_exit:
destroy_workqueue(ndev->cmd_wq);
-unreg_exit:
- nfc_unregister_device(ndev->nfc_dev);
-
exit:
return rc;
}
diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c
index 6c3aef852876..427ef2c7ab68 100644
--- a/net/nfc/nci/data.c
+++ b/net/nfc/nci/data.c
@@ -241,9 +241,12 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb)
/* strip the nci data header */
skb_pull(skb, NCI_DATA_HDR_SIZE);
- if (ndev->target_active_prot == NFC_PROTO_MIFARE) {
+ if (ndev->target_active_prot == NFC_PROTO_MIFARE ||
+ ndev->target_active_prot == NFC_PROTO_JEWEL ||
+ ndev->target_active_prot == NFC_PROTO_FELICA ||
+ ndev->target_active_prot == NFC_PROTO_ISO15693) {
/* frame I/F => remove the status byte */
- pr_debug("NFC_PROTO_MIFARE => remove the status byte\n");
+ pr_debug("frame I/F => remove the status byte\n");
skb_trim(skb, (skb->len - 1));
}
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index f8f6af231381..205b35f666db 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -2,6 +2,7 @@
* The NFC Controller Interface is the communication protocol between an
* NFC Controller (NFCC) and a Device Host (DH).
*
+ * Copyright (C) 2014 Marvell International Ltd.
* Copyright (C) 2011 Texas Instruments, Inc.
*
* Written by Ilan Elias <ilane@ti.com>
@@ -155,6 +156,24 @@ static __u8 *nci_extract_rf_params_nfcf_passive_poll(struct nci_dev *ndev,
return data;
}
+static __u8 *nci_extract_rf_params_nfcv_passive_poll(struct nci_dev *ndev,
+ struct rf_tech_specific_params_nfcv_poll *nfcv_poll,
+ __u8 *data)
+{
+ ++data;
+ nfcv_poll->dsfid = *data++;
+ memcpy(nfcv_poll->uid, data, NFC_ISO15693_UID_MAXSIZE);
+ data += NFC_ISO15693_UID_MAXSIZE;
+ return data;
+}
+
+__u32 nci_get_prop_rf_protocol(struct nci_dev *ndev, __u8 rf_protocol)
+{
+ if (ndev->ops->get_rfprotocol)
+ return ndev->ops->get_rfprotocol(ndev, rf_protocol);
+ return 0;
+}
+
static int nci_add_new_protocol(struct nci_dev *ndev,
struct nfc_target *target,
__u8 rf_protocol,
@@ -164,9 +183,12 @@ static int nci_add_new_protocol(struct nci_dev *ndev,
struct rf_tech_specific_params_nfca_poll *nfca_poll;
struct rf_tech_specific_params_nfcb_poll *nfcb_poll;
struct rf_tech_specific_params_nfcf_poll *nfcf_poll;
+ struct rf_tech_specific_params_nfcv_poll *nfcv_poll;
__u32 protocol;
- if (rf_protocol == NCI_RF_PROTOCOL_T2T)
+ if (rf_protocol == NCI_RF_PROTOCOL_T1T)
+ protocol = NFC_PROTO_JEWEL_MASK;
+ else if (rf_protocol == NCI_RF_PROTOCOL_T2T)
protocol = NFC_PROTO_MIFARE_MASK;
else if (rf_protocol == NCI_RF_PROTOCOL_ISO_DEP)
if (rf_tech_and_mode == NCI_NFC_A_PASSIVE_POLL_MODE)
@@ -177,8 +199,10 @@ static int nci_add_new_protocol(struct nci_dev *ndev,
protocol = NFC_PROTO_FELICA_MASK;
else if (rf_protocol == NCI_RF_PROTOCOL_NFC_DEP)
protocol = NFC_PROTO_NFC_DEP_MASK;
+ else if (rf_protocol == NCI_RF_PROTOCOL_T5T)
+ protocol = NFC_PROTO_ISO15693_MASK;
else
- protocol = 0;
+ protocol = nci_get_prop_rf_protocol(ndev, rf_protocol);
if (!(protocol & ndev->poll_prots)) {
pr_err("the target found does not have the desired protocol\n");
@@ -211,6 +235,12 @@ static int nci_add_new_protocol(struct nci_dev *ndev,
memcpy(target->sensf_res, nfcf_poll->sensf_res,
target->sensf_res_len);
}
+ } else if (rf_tech_and_mode == NCI_NFC_V_PASSIVE_POLL_MODE) {
+ nfcv_poll = (struct rf_tech_specific_params_nfcv_poll *)params;
+
+ target->is_iso15693 = 1;
+ target->iso15693_dsfid = nfcv_poll->dsfid;
+ memcpy(target->iso15693_uid, nfcv_poll->uid, NFC_ISO15693_UID_MAXSIZE);
} else {
pr_err("unsupported rf_tech_and_mode 0x%x\n", rf_tech_and_mode);
return -EPROTO;
@@ -303,6 +333,11 @@ static void nci_rf_discover_ntf_packet(struct nci_dev *ndev,
&(ntf.rf_tech_specific_params.nfcf_poll), data);
break;
+ case NCI_NFC_V_PASSIVE_POLL_MODE:
+ data = nci_extract_rf_params_nfcv_passive_poll(ndev,
+ &(ntf.rf_tech_specific_params.nfcv_poll), data);
+ break;
+
default:
pr_err("unsupported rf_tech_and_mode 0x%x\n",
ntf.rf_tech_and_mode);
@@ -453,6 +488,11 @@ static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev,
&(ntf.rf_tech_specific_params.nfcf_poll), data);
break;
+ case NCI_NFC_V_PASSIVE_POLL_MODE:
+ data = nci_extract_rf_params_nfcv_passive_poll(ndev,
+ &(ntf.rf_tech_specific_params.nfcv_poll), data);
+ break;
+
default:
pr_err("unsupported activation_rf_tech_and_mode 0x%x\n",
ntf.activation_rf_tech_and_mode);
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index 6ecf491ad509..ba3bb8203b99 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -54,3 +54,14 @@ config OPENVSWITCH_VXLAN
Say N to exclude this support and reduce the binary size.
If unsure, say Y.
+
+config OPENVSWITCH_GENEVE
+ bool "Open vSwitch Geneve tunneling support"
+ depends on INET
+ depends on OPENVSWITCH
+ depends on GENEVE && !(OPENVSWITCH=y && GENEVE=m)
+ default y
+ ---help---
+ If you say Y here, then the Open vSwitch will be able create geneve vport.
+
+ Say N to exclude this support and reduce the binary size.
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 3591cb5dae91..9a33a273c375 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -15,6 +15,10 @@ openvswitch-y := \
vport-internal_dev.o \
vport-netdev.o
+ifneq ($(CONFIG_OPENVSWITCH_GENEVE),)
+openvswitch-y += vport-geneve.o
+endif
+
ifneq ($(CONFIG_OPENVSWITCH_VXLAN),)
openvswitch-y += vport-vxlan.o
endif
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index e70d8b18e962..006886dbee36 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2013 Nicira, Inc.
+ * Copyright (c) 2007-2014 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -35,13 +35,83 @@
#include <net/sctp/checksum.h>
#include "datapath.h"
+#include "flow.h"
#include "vport.h"
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
- const struct nlattr *attr, int len, bool keep_skb);
+ struct sw_flow_key *key,
+ const struct nlattr *attr, int len);
+
+struct deferred_action {
+ struct sk_buff *skb;
+ const struct nlattr *actions;
+
+ /* Store pkt_key clone when creating deferred action. */
+ struct sw_flow_key pkt_key;
+};
+
+#define DEFERRED_ACTION_FIFO_SIZE 10
+struct action_fifo {
+ int head;
+ int tail;
+ /* Deferred action fifo queue storage. */
+ struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
+};
+
+static struct action_fifo __percpu *action_fifos;
+static DEFINE_PER_CPU(int, exec_actions_level);
+
+static void action_fifo_init(struct action_fifo *fifo)
+{
+ fifo->head = 0;
+ fifo->tail = 0;
+}
+
+static bool action_fifo_is_empty(struct action_fifo *fifo)
+{
+ return (fifo->head == fifo->tail);
+}
+
+static struct deferred_action *action_fifo_get(struct action_fifo *fifo)
+{
+ if (action_fifo_is_empty(fifo))
+ return NULL;
+
+ return &fifo->fifo[fifo->tail++];
+}
+
+static struct deferred_action *action_fifo_put(struct action_fifo *fifo)
+{
+ if (fifo->head >= DEFERRED_ACTION_FIFO_SIZE - 1)
+ return NULL;
+
+ return &fifo->fifo[fifo->head++];
+}
+
+/* Return true if fifo is not full */
+static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
+ struct sw_flow_key *key,
+ const struct nlattr *attr)
+{
+ struct action_fifo *fifo;
+ struct deferred_action *da;
+
+ fifo = this_cpu_ptr(action_fifos);
+ da = action_fifo_put(fifo);
+ if (da) {
+ da->skb = skb;
+ da->actions = attr;
+ da->pkt_key = *key;
+ }
+
+ return da;
+}
static int make_writable(struct sk_buff *skb, int write_len)
{
+ if (!pskb_may_pull(skb, write_len))
+ return -ENOMEM;
+
if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
return 0;
@@ -70,6 +140,8 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
vlan_set_encap_proto(skb, vhdr);
skb->mac_header += VLAN_HLEN;
+ if (skb_network_offset(skb) < ETH_HLEN)
+ skb_set_network_header(skb, ETH_HLEN);
skb_reset_mac_len(skb);
return 0;
@@ -405,16 +477,14 @@ static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
}
static int output_userspace(struct datapath *dp, struct sk_buff *skb,
- const struct nlattr *attr)
+ struct sw_flow_key *key, const struct nlattr *attr)
{
struct dp_upcall_info upcall;
const struct nlattr *a;
int rem;
- BUG_ON(!OVS_CB(skb)->pkt_key);
-
upcall.cmd = OVS_PACKET_CMD_ACTION;
- upcall.key = OVS_CB(skb)->pkt_key;
+ upcall.key = key;
upcall.userdata = NULL;
upcall.portid = 0;
@@ -434,8 +504,13 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
return ovs_dp_upcall(dp, skb, &upcall);
}
+static bool last_action(const struct nlattr *a, int rem)
+{
+ return a->nla_len == rem;
+}
+
static int sample(struct datapath *dp, struct sk_buff *skb,
- const struct nlattr *attr)
+ struct sw_flow_key *key, const struct nlattr *attr)
{
const struct nlattr *acts_list = NULL;
const struct nlattr *a;
@@ -455,8 +530,50 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
}
}
- return do_execute_actions(dp, skb, nla_data(acts_list),
- nla_len(acts_list), true);
+ rem = nla_len(acts_list);
+ a = nla_data(acts_list);
+
+ /* Actions list is empty, do nothing */
+ if (unlikely(!rem))
+ return 0;
+
+ /* The only known usage of sample action is having a single user-space
+ * action. Treat this usage as a special case.
+ * The output_userspace() should clone the skb to be sent to the
+ * user space. This skb will be consumed by its caller.
+ */
+ if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE &&
+ last_action(a, rem)))
+ return output_userspace(dp, skb, key, a);
+
+ skb = skb_clone(skb, GFP_ATOMIC);
+ if (!skb)
+ /* Skip the sample action when out of memory. */
+ return 0;
+
+ if (!add_deferred_actions(skb, key, a)) {
+ if (net_ratelimit())
+ pr_warn("%s: deferred actions limit reached, dropping sample action\n",
+ ovs_dp_name(dp));
+
+ kfree_skb(skb);
+ }
+ return 0;
+}
+
+static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
+ const struct nlattr *attr)
+{
+ struct ovs_action_hash *hash_act = nla_data(attr);
+ u32 hash = 0;
+
+ /* OVS_HASH_ALG_L4 is the only possible hash algorithm. */
+ hash = skb_get_hash(skb);
+ hash = jhash_1word(hash, hash_act->hash_basis);
+ if (!hash)
+ hash = 0x1;
+
+ key->ovs_flow_hash = hash;
}
static int execute_set_action(struct sk_buff *skb,
@@ -473,8 +590,8 @@ static int execute_set_action(struct sk_buff *skb,
skb->mark = nla_get_u32(nested_attr);
break;
- case OVS_KEY_ATTR_IPV4_TUNNEL:
- OVS_CB(skb)->tun_key = nla_data(nested_attr);
+ case OVS_KEY_ATTR_TUNNEL_INFO:
+ OVS_CB(skb)->egress_tun_info = nla_data(nested_attr);
break;
case OVS_KEY_ATTR_ETHERNET:
@@ -505,9 +622,48 @@ static int execute_set_action(struct sk_buff *skb,
return err;
}
+static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
+ struct sw_flow_key *key,
+ const struct nlattr *a, int rem)
+{
+ struct deferred_action *da;
+ int err;
+
+ err = ovs_flow_key_update(skb, key);
+ if (err)
+ return err;
+
+ if (!last_action(a, rem)) {
+ /* Recirc action is the not the last action
+ * of the action list, need to clone the skb.
+ */
+ skb = skb_clone(skb, GFP_ATOMIC);
+
+ /* Skip the recirc action when out of memory, but
+ * continue on with the rest of the action list.
+ */
+ if (!skb)
+ return 0;
+ }
+
+ da = add_deferred_actions(skb, key, NULL);
+ if (da) {
+ da->pkt_key.recirc_id = nla_get_u32(a);
+ } else {
+ kfree_skb(skb);
+
+ if (net_ratelimit())
+ pr_warn("%s: deferred action limit reached, drop recirc action\n",
+ ovs_dp_name(dp));
+ }
+
+ return 0;
+}
+
/* Execute a list of actions against 'skb'. */
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
- const struct nlattr *attr, int len, bool keep_skb)
+ struct sw_flow_key *key,
+ const struct nlattr *attr, int len)
{
/* Every output action needs a separate clone of 'skb', but the common
* case is just a single output action, so that doing a clone and
@@ -532,7 +688,11 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
break;
case OVS_ACTION_ATTR_USERSPACE:
- output_userspace(dp, skb, a);
+ output_userspace(dp, skb, key, a);
+ break;
+
+ case OVS_ACTION_ATTR_HASH:
+ execute_hash(skb, key, a);
break;
case OVS_ACTION_ATTR_PUSH_VLAN:
@@ -545,12 +705,23 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
err = pop_vlan(skb);
break;
+ case OVS_ACTION_ATTR_RECIRC:
+ err = execute_recirc(dp, skb, key, a, rem);
+ if (last_action(a, rem)) {
+ /* If this is the last action, the skb has
+ * been consumed or freed.
+ * Return immediately.
+ */
+ return err;
+ }
+ break;
+
case OVS_ACTION_ATTR_SET:
err = execute_set_action(skb, nla_data(a));
break;
case OVS_ACTION_ATTR_SAMPLE:
- err = sample(dp, skb, a);
+ err = sample(dp, skb, key, a);
if (unlikely(err)) /* skb already freed. */
return err;
break;
@@ -562,23 +733,72 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
}
}
- if (prev_port != -1) {
- if (keep_skb)
- skb = skb_clone(skb, GFP_ATOMIC);
-
+ if (prev_port != -1)
do_output(dp, skb, prev_port);
- } else if (!keep_skb)
+ else
consume_skb(skb);
return 0;
}
+static void process_deferred_actions(struct datapath *dp)
+{
+ struct action_fifo *fifo = this_cpu_ptr(action_fifos);
+
+ /* Do not touch the FIFO in case there is no deferred actions. */
+ if (action_fifo_is_empty(fifo))
+ return;
+
+ /* Finishing executing all deferred actions. */
+ do {
+ struct deferred_action *da = action_fifo_get(fifo);
+ struct sk_buff *skb = da->skb;
+ struct sw_flow_key *key = &da->pkt_key;
+ const struct nlattr *actions = da->actions;
+
+ if (actions)
+ do_execute_actions(dp, skb, key, actions,
+ nla_len(actions));
+ else
+ ovs_dp_process_packet(skb, key);
+ } while (!action_fifo_is_empty(fifo));
+
+ /* Reset FIFO for the next packet. */
+ action_fifo_init(fifo);
+}
+
/* Execute a list of actions against 'skb'. */
-int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb)
+int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
+ struct sw_flow_key *key)
{
- struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
+ int level = this_cpu_read(exec_actions_level);
+ struct sw_flow_actions *acts;
+ int err;
+
+ acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
+
+ this_cpu_inc(exec_actions_level);
+ OVS_CB(skb)->egress_tun_info = NULL;
+ err = do_execute_actions(dp, skb, key,
+ acts->actions, acts->actions_len);
+
+ if (!level)
+ process_deferred_actions(dp);
- OVS_CB(skb)->tun_key = NULL;
- return do_execute_actions(dp, skb, acts->actions,
- acts->actions_len, false);
+ this_cpu_dec(exec_actions_level);
+ return err;
+}
+
+int action_fifos_init(void)
+{
+ action_fifos = alloc_percpu(struct action_fifo);
+ if (!action_fifos)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void action_fifos_exit(void)
+{
+ free_percpu(action_fifos);
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 9db4bf6740d1..2e31d9e7f4dc 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -47,8 +47,6 @@
#include <linux/openvswitch.h>
#include <linux/rculist.h>
#include <linux/dmi.h>
-#include <linux/genetlink.h>
-#include <net/genetlink.h>
#include <net/genetlink.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
@@ -66,25 +64,26 @@ static struct genl_family dp_packet_genl_family;
static struct genl_family dp_flow_genl_family;
static struct genl_family dp_datapath_genl_family;
-static struct genl_multicast_group ovs_dp_flow_multicast_group = {
- .name = OVS_FLOW_MCGROUP
+static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
+ .name = OVS_FLOW_MCGROUP,
};
-static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
- .name = OVS_DATAPATH_MCGROUP
+static const struct genl_multicast_group ovs_dp_datapath_multicast_group = {
+ .name = OVS_DATAPATH_MCGROUP,
};
-struct genl_multicast_group ovs_dp_vport_multicast_group = {
- .name = OVS_VPORT_MCGROUP
+static const struct genl_multicast_group ovs_dp_vport_multicast_group = {
+ .name = OVS_VPORT_MCGROUP,
};
/* Check if need to build a reply message.
* OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
-static bool ovs_must_notify(struct genl_info *info,
- const struct genl_multicast_group *grp)
+static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
+ unsigned int group)
{
return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
- netlink_has_listeners(genl_info_net(info)->genl_sock, 0);
+ genl_has_listeners(family, genl_info_net(info)->genl_sock,
+ group);
}
static void ovs_notify(struct genl_family *family,
@@ -158,7 +157,7 @@ static struct datapath *get_dp(struct net *net, int dp_ifindex)
}
/* Must be called with rcu_read_lock or ovs_mutex. */
-static const char *ovs_dp_name(const struct datapath *dp)
+const char *ovs_dp_name(const struct datapath *dp)
{
struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
return vport->ops->get_name(vport);
@@ -239,45 +238,40 @@ void ovs_dp_detach_port(struct vport *p)
}
/* Must be called with rcu_read_lock. */
-void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
+void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
{
+ const struct vport *p = OVS_CB(skb)->input_vport;
struct datapath *dp = p->dp;
struct sw_flow *flow;
struct dp_stats_percpu *stats;
- struct sw_flow_key key;
u64 *stats_counter;
u32 n_mask_hit;
- int error;
stats = this_cpu_ptr(dp->stats_percpu);
- /* Extract flow from 'skb' into 'key'. */
- error = ovs_flow_extract(skb, p->port_no, &key);
- if (unlikely(error)) {
- kfree_skb(skb);
- return;
- }
-
/* Look up flow. */
- flow = ovs_flow_tbl_lookup_stats(&dp->table, &key, &n_mask_hit);
+ flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
if (unlikely(!flow)) {
struct dp_upcall_info upcall;
+ int error;
upcall.cmd = OVS_PACKET_CMD_MISS;
- upcall.key = &key;
+ upcall.key = key;
upcall.userdata = NULL;
- upcall.portid = p->upcall_portid;
- ovs_dp_upcall(dp, skb, &upcall);
- consume_skb(skb);
+ upcall.portid = ovs_vport_find_upcall_portid(p, skb);
+ error = ovs_dp_upcall(dp, skb, &upcall);
+ if (unlikely(error))
+ kfree_skb(skb);
+ else
+ consume_skb(skb);
stats_counter = &stats->n_missed;
goto out;
}
OVS_CB(skb)->flow = flow;
- OVS_CB(skb)->pkt_key = &key;
- ovs_flow_stats_update(OVS_CB(skb)->flow, key.tp.flags, skb);
- ovs_execute_actions(dp, skb);
+ ovs_flow_stats_update(OVS_CB(skb)->flow, key->tp.flags, skb);
+ ovs_execute_actions(dp, skb, key);
stats_counter = &stats->n_hit;
out:
@@ -375,6 +369,8 @@ static size_t key_attr_size(void)
+ nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
+ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
+ + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
+ nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
+ nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
+ nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
@@ -406,7 +402,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
{
struct ovs_header *upcall;
struct sk_buff *nskb = NULL;
- struct sk_buff *user_skb; /* to be queued to userspace */
+ struct sk_buff *user_skb = NULL; /* to be queued to userspace */
struct nlattr *nla;
struct genl_info info = {
.dst_sk = ovs_dp_get_net(dp)->genl_sock,
@@ -464,7 +460,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
upcall->dp_ifindex = dp_ifindex;
nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
- ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb);
+ err = ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb);
+ BUG_ON(err);
nla_nest_end(user_skb, nla);
if (upcall_info->userdata)
@@ -495,9 +492,11 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
+ user_skb = NULL;
out:
if (err)
skb_tx_error(skb);
+ kfree_skb(user_skb);
kfree_skb(nskb);
return err;
}
@@ -511,6 +510,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
struct sw_flow *flow;
struct datapath *dp;
struct ethhdr *eth;
+ struct vport *input_vport;
int len;
int err;
@@ -545,13 +545,11 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
if (IS_ERR(flow))
goto err_kfree_skb;
- err = ovs_flow_extract(packet, -1, &flow->key);
+ err = ovs_flow_key_extract_userspace(a[OVS_PACKET_ATTR_KEY], packet,
+ &flow->key);
if (err)
goto err_flow_free;
- err = ovs_nla_get_flow_metadata(flow, a[OVS_PACKET_ATTR_KEY]);
- if (err)
- goto err_flow_free;
acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
err = PTR_ERR(acts);
if (IS_ERR(acts))
@@ -559,12 +557,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
&flow->key, 0, &acts);
- rcu_assign_pointer(flow->sf_acts, acts);
if (err)
goto err_flow_free;
+ rcu_assign_pointer(flow->sf_acts, acts);
+
+ OVS_CB(packet)->egress_tun_info = NULL;
OVS_CB(packet)->flow = flow;
- OVS_CB(packet)->pkt_key = &flow->key;
packet->priority = flow->key.phy.priority;
packet->mark = flow->key.phy.skb_mark;
@@ -574,8 +573,17 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
if (!dp)
goto err_unlock;
+ input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
+ if (!input_vport)
+ input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
+
+ if (!input_vport)
+ goto err_unlock;
+
+ OVS_CB(packet)->input_vport = input_vport;
+
local_bh_disable();
- err = ovs_execute_actions(dp, packet);
+ err = ovs_execute_actions(dp, packet, &flow->key);
local_bh_enable();
rcu_read_unlock();
@@ -759,7 +767,7 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act
{
struct sk_buff *skb;
- if (!always && !ovs_must_notify(info, &ovs_dp_flow_multicast_group))
+ if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
return NULL;
skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL);
@@ -780,7 +788,7 @@ static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info,
always);
- if (!skb || IS_ERR(skb))
+ if (IS_ERR_OR_NULL(skb))
return skb;
retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
@@ -928,11 +936,34 @@ error:
return error;
}
+static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
+ const struct sw_flow_key *key,
+ const struct sw_flow_mask *mask)
+{
+ struct sw_flow_actions *acts;
+ struct sw_flow_key masked_key;
+ int error;
+
+ acts = ovs_nla_alloc_flow_actions(nla_len(a));
+ if (IS_ERR(acts))
+ return acts;
+
+ ovs_flow_mask_key(&masked_key, key, mask);
+ error = ovs_nla_copy_actions(a, &masked_key, 0, &acts);
+ if (error) {
+ OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
+ kfree(acts);
+ return ERR_PTR(error);
+ }
+
+ return acts;
+}
+
static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
struct ovs_header *ovs_header = info->userhdr;
- struct sw_flow_key key, masked_key;
+ struct sw_flow_key key;
struct sw_flow *flow;
struct sw_flow_mask mask;
struct sk_buff *reply = NULL;
@@ -954,17 +985,10 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
/* Validate actions. */
if (a[OVS_FLOW_ATTR_ACTIONS]) {
- acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
- error = PTR_ERR(acts);
- if (IS_ERR(acts))
+ acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask);
+ if (IS_ERR(acts)) {
+ error = PTR_ERR(acts);
goto error;
-
- ovs_flow_mask_key(&masked_key, &key, &mask);
- error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS],
- &masked_key, 0, &acts);
- if (error) {
- OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
- goto err_kfree_acts;
}
}
@@ -1189,7 +1213,7 @@ static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
[OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
};
-static struct genl_ops dp_flow_genl_ops[] = {
+static const struct genl_ops dp_flow_genl_ops[] = {
{ .cmd = OVS_FLOW_CMD_NEW,
.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
.policy = flow_policy,
@@ -1373,7 +1397,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
parms.options = NULL;
parms.dp = dp;
parms.port_no = OVSP_LOCAL;
- parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
+ parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
ovs_dp_change(dp, a);
@@ -1577,7 +1601,7 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
[OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
};
-static struct genl_ops dp_datapath_genl_ops[] = {
+static const struct genl_ops dp_datapath_genl_ops[] = {
{ .cmd = OVS_DP_CMD_NEW,
.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
.policy = datapath_policy,
@@ -1632,8 +1656,8 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
- nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) ||
- nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid))
+ nla_put_string(skb, OVS_VPORT_ATTR_NAME,
+ vport->ops->get_name(vport)))
goto nla_put_failure;
ovs_vport_get_stats(vport, &vport_stats);
@@ -1641,6 +1665,9 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
&vport_stats))
goto nla_put_failure;
+ if (ovs_vport_get_upcall_portids(vport, skb))
+ goto nla_put_failure;
+
err = ovs_vport_get_options(vport, skb);
if (err == -EMSGSIZE)
goto error;
@@ -1762,7 +1789,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
parms.options = a[OVS_VPORT_ATTR_OPTIONS];
parms.dp = dp;
parms.port_no = port_no;
- parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
+ parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
vport = new_vport(&parms);
err = PTR_ERR(vport);
@@ -1812,8 +1839,14 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
goto exit_unlock_free;
}
- if (a[OVS_VPORT_ATTR_UPCALL_PID])
- vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
+
+ if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
+ struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
+
+ err = ovs_vport_set_upcall_portids(vport, ids);
+ if (err)
+ goto exit_unlock_free;
+ }
err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
info->snd_seq, 0, OVS_VPORT_CMD_NEW);
@@ -1944,7 +1977,7 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
};
-static struct genl_ops dp_vport_genl_ops[] = {
+static const struct genl_ops dp_vport_genl_ops[] = {
{ .cmd = OVS_VPORT_CMD_NEW,
.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
.policy = vport_policy,
@@ -2053,10 +2086,18 @@ static int __init dp_init(void)
pr_info("Open vSwitch switching datapath\n");
- err = ovs_flow_init();
+ err = action_fifos_init();
if (err)
goto error;
+ err = ovs_internal_dev_rtnl_link_register();
+ if (err)
+ goto error_action_fifos_exit;
+
+ err = ovs_flow_init();
+ if (err)
+ goto error_unreg_rtnl_link;
+
err = ovs_vport_init();
if (err)
goto error_flow_exit;
@@ -2083,6 +2124,10 @@ error_vport_exit:
ovs_vport_exit();
error_flow_exit:
ovs_flow_exit();
+error_unreg_rtnl_link:
+ ovs_internal_dev_rtnl_link_unregister();
+error_action_fifos_exit:
+ action_fifos_exit();
error:
return err;
}
@@ -2095,6 +2140,8 @@ static void dp_cleanup(void)
rcu_barrier();
ovs_vport_exit();
ovs_flow_exit();
+ ovs_internal_dev_rtnl_link_unregister();
+ action_fifos_exit();
}
module_init(dp_init);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 7ede507500d7..974135439c5c 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2012 Nicira, Inc.
+ * Copyright (c) 2007-2014 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -95,14 +95,15 @@ struct datapath {
/**
* struct ovs_skb_cb - OVS data in skb CB
* @flow: The flow associated with this packet. May be %NULL if no flow.
- * @pkt_key: The flow information extracted from the packet. Must be nonnull.
- * @tun_key: Key for the tunnel that encapsulated this packet. NULL if the
- * packet is not being tunneled.
+ * @egress_tun_key: Tunnel information about this packet on egress path.
+ * NULL if the packet is not being tunneled.
+ * @input_vport: The original vport packet came in on. This value is cached
+ * when a packet is received by OVS.
*/
struct ovs_skb_cb {
struct sw_flow *flow;
- struct sw_flow_key *pkt_key;
- struct ovs_key_ipv4_tunnel *tun_key;
+ struct ovs_tunnel_info *egress_tun_info;
+ struct vport *input_vport;
};
#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
@@ -144,7 +145,7 @@ int lockdep_ovsl_is_held(void);
#define lockdep_ovsl_is_held() 1
#endif
-#define ASSERT_OVSL() WARN_ON(unlikely(!lockdep_ovsl_is_held()))
+#define ASSERT_OVSL() WARN_ON(!lockdep_ovsl_is_held())
#define ovsl_dereference(p) \
rcu_dereference_protected(p, lockdep_ovsl_is_held())
#define rcu_dereference_ovsl(p) \
@@ -183,17 +184,23 @@ static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_n
extern struct notifier_block ovs_dp_device_notifier;
extern struct genl_family dp_vport_genl_family;
-void ovs_dp_process_received_packet(struct vport *, struct sk_buff *);
+void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key);
void ovs_dp_detach_port(struct vport *);
int ovs_dp_upcall(struct datapath *, struct sk_buff *,
const struct dp_upcall_info *);
+const char *ovs_dp_name(const struct datapath *dp);
struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
u8 cmd);
-int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
+int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
+ struct sw_flow_key *);
+
void ovs_dp_notify_wq(struct work_struct *work);
+int action_fifos_init(void);
+void action_fifos_exit(void);
+
#define OVS_NLERR(fmt, ...) \
do { \
if (net_ratelimit()) \
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index d07ab538fc9d..62db02ba36bc 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2013 Nicira, Inc.
+ * Copyright (c) 2007-2014 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -16,8 +16,6 @@
* 02110-1301, USA
*/
-#include "flow.h"
-#include "datapath.h"
#include <linux/uaccess.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
@@ -46,6 +44,10 @@
#include <net/ipv6.h>
#include <net/ndisc.h>
+#include "datapath.h"
+#include "flow.h"
+#include "flow_netlink.h"
+
u64 ovs_flow_used_time(unsigned long flow_jiffies)
{
struct timespec cur_ts;
@@ -89,7 +91,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
* allocated stats as we have already locked them.
*/
if (likely(flow->stats_last_writer != NUMA_NO_NODE)
- && likely(!rcu_dereference(flow->stats[node]))) {
+ && likely(!rcu_access_pointer(flow->stats[node]))) {
/* Try to allocate node-specific stats. */
struct flow_stats *new_stats;
@@ -420,10 +422,9 @@ invalid:
}
/**
- * ovs_flow_extract - extracts a flow key from an Ethernet frame.
+ * key_extract - extracts a flow key from an Ethernet frame.
* @skb: sk_buff that contains the frame, with skb->data pointing to the
* Ethernet header
- * @in_port: port number on which @skb was received.
* @key: output flow key
*
* The caller must ensure that skb->len >= ETH_HLEN.
@@ -442,18 +443,13 @@ invalid:
* of a correct length, otherwise the same as skb->network_header.
* For other key->eth.type values it is left untouched.
*/
-int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
+static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
{
int error;
struct ethhdr *eth;
- memset(key, 0, sizeof(*key));
-
- key->phy.priority = skb->priority;
- if (OVS_CB(skb)->tun_key)
- memcpy(&key->tun_key, OVS_CB(skb)->tun_key, sizeof(key->tun_key));
- key->phy.in_port = in_port;
- key->phy.skb_mark = skb->mark;
+ /* Flags are always used as part of stats */
+ key->tp.flags = 0;
skb_reset_mac_header(skb);
@@ -469,6 +465,7 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
* update skb->csum here.
*/
+ key->eth.tci = 0;
if (vlan_tx_tag_present(skb))
key->eth.tci = htons(skb->vlan_tci);
else if (eth->h_proto == htons(ETH_P_8021Q))
@@ -489,6 +486,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
error = check_iphdr(skb);
if (unlikely(error)) {
+ memset(&key->ip, 0, sizeof(key->ip));
+ memset(&key->ipv4, 0, sizeof(key->ipv4));
if (error == -EINVAL) {
skb->transport_header = skb->network_header;
error = 0;
@@ -510,8 +509,10 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
return 0;
}
if (nh->frag_off & htons(IP_MF) ||
- skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+ skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
key->ip.frag = OVS_FRAG_TYPE_FIRST;
+ else
+ key->ip.frag = OVS_FRAG_TYPE_NONE;
/* Transport layer. */
if (key->ip.proto == IPPROTO_TCP) {
@@ -520,18 +521,25 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
key->tp.src = tcp->source;
key->tp.dst = tcp->dest;
key->tp.flags = TCP_FLAGS_BE16(tcp);
+ } else {
+ memset(&key->tp, 0, sizeof(key->tp));
}
+
} else if (key->ip.proto == IPPROTO_UDP) {
if (udphdr_ok(skb)) {
struct udphdr *udp = udp_hdr(skb);
key->tp.src = udp->source;
key->tp.dst = udp->dest;
+ } else {
+ memset(&key->tp, 0, sizeof(key->tp));
}
} else if (key->ip.proto == IPPROTO_SCTP) {
if (sctphdr_ok(skb)) {
struct sctphdr *sctp = sctp_hdr(skb);
key->tp.src = sctp->source;
key->tp.dst = sctp->dest;
+ } else {
+ memset(&key->tp, 0, sizeof(key->tp));
}
} else if (key->ip.proto == IPPROTO_ICMP) {
if (icmphdr_ok(skb)) {
@@ -541,33 +549,44 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
* them in 16-bit network byte order. */
key->tp.src = htons(icmp->type);
key->tp.dst = htons(icmp->code);
+ } else {
+ memset(&key->tp, 0, sizeof(key->tp));
}
}
- } else if ((key->eth.type == htons(ETH_P_ARP) ||
- key->eth.type == htons(ETH_P_RARP)) && arphdr_ok(skb)) {
+ } else if (key->eth.type == htons(ETH_P_ARP) ||
+ key->eth.type == htons(ETH_P_RARP)) {
struct arp_eth_header *arp;
arp = (struct arp_eth_header *)skb_network_header(skb);
- if (arp->ar_hrd == htons(ARPHRD_ETHER)
- && arp->ar_pro == htons(ETH_P_IP)
- && arp->ar_hln == ETH_ALEN
- && arp->ar_pln == 4) {
+ if (arphdr_ok(skb) &&
+ arp->ar_hrd == htons(ARPHRD_ETHER) &&
+ arp->ar_pro == htons(ETH_P_IP) &&
+ arp->ar_hln == ETH_ALEN &&
+ arp->ar_pln == 4) {
/* We only match on the lower 8 bits of the opcode. */
if (ntohs(arp->ar_op) <= 0xff)
key->ip.proto = ntohs(arp->ar_op);
+ else
+ key->ip.proto = 0;
+
memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
ether_addr_copy(key->ipv4.arp.sha, arp->ar_sha);
ether_addr_copy(key->ipv4.arp.tha, arp->ar_tha);
+ } else {
+ memset(&key->ip, 0, sizeof(key->ip));
+ memset(&key->ipv4, 0, sizeof(key->ipv4));
}
} else if (key->eth.type == htons(ETH_P_IPV6)) {
int nh_len; /* IPv6 Header + Extensions */
nh_len = parse_ipv6hdr(skb, key);
if (unlikely(nh_len < 0)) {
+ memset(&key->ip, 0, sizeof(key->ip));
+ memset(&key->ipv6.addr, 0, sizeof(key->ipv6.addr));
if (nh_len == -EINVAL) {
skb->transport_header = skb->network_header;
error = 0;
@@ -589,27 +608,87 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
key->tp.src = tcp->source;
key->tp.dst = tcp->dest;
key->tp.flags = TCP_FLAGS_BE16(tcp);
+ } else {
+ memset(&key->tp, 0, sizeof(key->tp));
}
} else if (key->ip.proto == NEXTHDR_UDP) {
if (udphdr_ok(skb)) {
struct udphdr *udp = udp_hdr(skb);
key->tp.src = udp->source;
key->tp.dst = udp->dest;
+ } else {
+ memset(&key->tp, 0, sizeof(key->tp));
}
} else if (key->ip.proto == NEXTHDR_SCTP) {
if (sctphdr_ok(skb)) {
struct sctphdr *sctp = sctp_hdr(skb);
key->tp.src = sctp->source;
key->tp.dst = sctp->dest;
+ } else {
+ memset(&key->tp, 0, sizeof(key->tp));
}
} else if (key->ip.proto == NEXTHDR_ICMP) {
if (icmp6hdr_ok(skb)) {
error = parse_icmpv6(skb, key, nh_len);
if (error)
return error;
+ } else {
+ memset(&key->tp, 0, sizeof(key->tp));
}
}
}
-
return 0;
}
+
+int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
+{
+ return key_extract(skb, key);
+}
+
+int ovs_flow_key_extract(struct ovs_tunnel_info *tun_info,
+ struct sk_buff *skb, struct sw_flow_key *key)
+{
+ /* Extract metadata from packet. */
+ if (tun_info) {
+ memcpy(&key->tun_key, &tun_info->tunnel, sizeof(key->tun_key));
+
+ if (tun_info->options) {
+ BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) *
+ 8)) - 1
+ > sizeof(key->tun_opts));
+ memcpy(GENEVE_OPTS(key, tun_info->options_len),
+ tun_info->options, tun_info->options_len);
+ key->tun_opts_len = tun_info->options_len;
+ } else {
+ key->tun_opts_len = 0;
+ }
+ } else {
+ key->tun_opts_len = 0;
+ memset(&key->tun_key, 0, sizeof(key->tun_key));
+ }
+
+ key->phy.priority = skb->priority;
+ key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
+ key->phy.skb_mark = skb->mark;
+ key->ovs_flow_hash = 0;
+ key->recirc_id = 0;
+
+ /* Flags are always used as part of stats */
+ key->tp.flags = 0;
+
+ return key_extract(skb, key);
+}
+
+int ovs_flow_key_extract_userspace(const struct nlattr *attr,
+ struct sk_buff *skb,
+ struct sw_flow_key *key)
+{
+ int err;
+
+ /* Extract metadata from netlink attributes. */
+ err = ovs_nla_get_flow_metadata(attr, key);
+ if (err)
+ return err;
+
+ return key_extract(skb, key);
+}
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 5e5aaed3a85b..71813318c8c7 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -49,29 +49,53 @@ struct ovs_key_ipv4_tunnel {
u8 ipv4_ttl;
} __packed __aligned(4); /* Minimize padding. */
-static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key,
- const struct iphdr *iph, __be64 tun_id,
- __be16 tun_flags)
+struct ovs_tunnel_info {
+ struct ovs_key_ipv4_tunnel tunnel;
+ struct geneve_opt *options;
+ u8 options_len;
+};
+
+/* Store options at the end of the array if they are less than the
+ * maximum size. This allows us to get the benefits of variable length
+ * matching for small options.
+ */
+#define GENEVE_OPTS(flow_key, opt_len) \
+ ((struct geneve_opt *)((flow_key)->tun_opts + \
+ FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \
+ opt_len))
+
+static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
+ const struct iphdr *iph,
+ __be64 tun_id, __be16 tun_flags,
+ struct geneve_opt *opts,
+ u8 opts_len)
{
- tun_key->tun_id = tun_id;
- tun_key->ipv4_src = iph->saddr;
- tun_key->ipv4_dst = iph->daddr;
- tun_key->ipv4_tos = iph->tos;
- tun_key->ipv4_ttl = iph->ttl;
- tun_key->tun_flags = tun_flags;
+ tun_info->tunnel.tun_id = tun_id;
+ tun_info->tunnel.ipv4_src = iph->saddr;
+ tun_info->tunnel.ipv4_dst = iph->daddr;
+ tun_info->tunnel.ipv4_tos = iph->tos;
+ tun_info->tunnel.ipv4_ttl = iph->ttl;
+ tun_info->tunnel.tun_flags = tun_flags;
/* clear struct padding. */
- memset((unsigned char *) tun_key + OVS_TUNNEL_KEY_SIZE, 0,
- sizeof(*tun_key) - OVS_TUNNEL_KEY_SIZE);
+ memset((unsigned char *)&tun_info->tunnel + OVS_TUNNEL_KEY_SIZE, 0,
+ sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE);
+
+ tun_info->options = opts;
+ tun_info->options_len = opts_len;
}
struct sw_flow_key {
+ u8 tun_opts[255];
+ u8 tun_opts_len;
struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */
struct {
u32 priority; /* Packet QoS priority. */
u32 skb_mark; /* SKB mark. */
u16 in_port; /* Input switch port (or DP_MAX_PORTS). */
} __packed phy; /* Safe when right after 'tun_key'. */
+ u32 ovs_flow_hash; /* Datapath computed hash value. */
+ u32 recirc_id; /* Recirculation ID. */
struct {
u8 src[ETH_ALEN]; /* Ethernet source address. */
u8 dst[ETH_ALEN]; /* Ethernet destination address. */
@@ -187,6 +211,12 @@ void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *,
void ovs_flow_stats_clear(struct sw_flow *);
u64 ovs_flow_used_time(unsigned long flow_jiffies);
-int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *);
+int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key);
+int ovs_flow_key_extract(struct ovs_tunnel_info *tun_info, struct sk_buff *skb,
+ struct sw_flow_key *key);
+/* Extract key from packet coming from userspace. */
+int ovs_flow_key_extract_userspace(const struct nlattr *attr,
+ struct sk_buff *skb,
+ struct sw_flow_key *key);
#endif /* flow.h */
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index d757848da89c..368f23307911 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2013 Nicira, Inc.
+ * Copyright (c) 2007-2014 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -42,6 +42,7 @@
#include <linux/icmp.h>
#include <linux/icmpv6.h>
#include <linux/rculist.h>
+#include <net/geneve.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ndisc.h>
@@ -88,18 +89,20 @@ static void update_range__(struct sw_flow_match *match,
} \
} while (0)
-#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
- do { \
- update_range__(match, offsetof(struct sw_flow_key, field), \
- len, is_mask); \
- if (is_mask) { \
- if ((match)->mask) \
- memcpy(&(match)->mask->key.field, value_p, len);\
- } else { \
- memcpy(&(match)->key->field, value_p, len); \
- } \
+#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \
+ do { \
+ update_range__(match, offset, len, is_mask); \
+ if (is_mask) \
+ memcpy((u8 *)&(match)->mask->key + offset, value_p, \
+ len); \
+ else \
+ memcpy((u8 *)(match)->key + offset, value_p, len); \
} while (0)
+#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
+ SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
+ value_p, len, is_mask)
+
static u16 range_n_bytes(const struct sw_flow_key_range *range)
{
return range->end - range->start;
@@ -251,6 +254,8 @@ static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
[OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
[OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
[OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
+ [OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32),
+ [OVS_KEY_ATTR_DP_HASH] = sizeof(u32),
[OVS_KEY_ATTR_TUNNEL] = -1,
};
@@ -333,6 +338,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
int rem;
bool ttl = false;
__be16 tun_flags = 0;
+ unsigned long opt_key_offset;
nla_for_each_nested(a, attr, rem) {
int type = nla_type(a);
@@ -344,6 +350,8 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
[OVS_TUNNEL_KEY_ATTR_TTL] = 1,
[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
[OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
+ [OVS_TUNNEL_KEY_ATTR_OAM] = 0,
+ [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1,
};
if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
@@ -352,7 +360,8 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
return -EINVAL;
}
- if (ovs_tunnel_key_lens[type] != nla_len(a)) {
+ if (ovs_tunnel_key_lens[type] != nla_len(a) &&
+ ovs_tunnel_key_lens[type] != -1) {
OVS_NLERR("IPv4 tunnel attribute type has unexpected "
" length (type=%d, length=%d, expected=%d).\n",
type, nla_len(a), ovs_tunnel_key_lens[type]);
@@ -388,7 +397,63 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
case OVS_TUNNEL_KEY_ATTR_CSUM:
tun_flags |= TUNNEL_CSUM;
break;
+ case OVS_TUNNEL_KEY_ATTR_OAM:
+ tun_flags |= TUNNEL_OAM;
+ break;
+ case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
+ tun_flags |= TUNNEL_OPTIONS_PRESENT;
+ if (nla_len(a) > sizeof(match->key->tun_opts)) {
+ OVS_NLERR("Geneve option length exceeds maximum size (len %d, max %zu).\n",
+ nla_len(a),
+ sizeof(match->key->tun_opts));
+ return -EINVAL;
+ }
+
+ if (nla_len(a) % 4 != 0) {
+ OVS_NLERR("Geneve option length is not a multiple of 4 (len %d).\n",
+ nla_len(a));
+ return -EINVAL;
+ }
+
+ /* We need to record the length of the options passed
+ * down, otherwise packets with the same format but
+ * additional options will be silently matched.
+ */
+ if (!is_mask) {
+ SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
+ false);
+ } else {
+ /* This is somewhat unusual because it looks at
+ * both the key and mask while parsing the
+ * attributes (and by extension assumes the key
+ * is parsed first). Normally, we would verify
+ * that each is the correct length and that the
+ * attributes line up in the validate function.
+ * However, that is difficult because this is
+ * variable length and we won't have the
+ * information later.
+ */
+ if (match->key->tun_opts_len != nla_len(a)) {
+ OVS_NLERR("Geneve option key length (%d) is different from mask length (%d).",
+ match->key->tun_opts_len,
+ nla_len(a));
+ return -EINVAL;
+ }
+
+ SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff,
+ true);
+ }
+
+ opt_key_offset = (unsigned long)GENEVE_OPTS(
+ (struct sw_flow_key *)0,
+ nla_len(a));
+ SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset,
+ nla_data(a), nla_len(a),
+ is_mask);
+ break;
default:
+ OVS_NLERR("Unknown IPv4 tunnel attribute (%d).\n",
+ type);
return -EINVAL;
}
}
@@ -415,45 +480,80 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
return 0;
}
-static int ipv4_tun_to_nlattr(struct sk_buff *skb,
- const struct ovs_key_ipv4_tunnel *tun_key,
- const struct ovs_key_ipv4_tunnel *output)
+static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
+ const struct ovs_key_ipv4_tunnel *output,
+ const struct geneve_opt *tun_opts,
+ int swkey_tun_opts_len)
{
- struct nlattr *nla;
-
- nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
- if (!nla)
- return -EMSGSIZE;
-
if (output->tun_flags & TUNNEL_KEY &&
nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
return -EMSGSIZE;
if (output->ipv4_src &&
- nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src))
+ nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src))
return -EMSGSIZE;
if (output->ipv4_dst &&
- nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst))
+ nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst))
return -EMSGSIZE;
if (output->ipv4_tos &&
- nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
+ nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
return -EMSGSIZE;
if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl))
return -EMSGSIZE;
if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
- nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
+ nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
return -EMSGSIZE;
if ((output->tun_flags & TUNNEL_CSUM) &&
- nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
+ nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
+ return -EMSGSIZE;
+ if ((output->tun_flags & TUNNEL_OAM) &&
+ nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
+ return -EMSGSIZE;
+ if (tun_opts &&
+ nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
+ swkey_tun_opts_len, tun_opts))
return -EMSGSIZE;
- nla_nest_end(skb, nla);
return 0;
}
+static int ipv4_tun_to_nlattr(struct sk_buff *skb,
+ const struct ovs_key_ipv4_tunnel *output,
+ const struct geneve_opt *tun_opts,
+ int swkey_tun_opts_len)
+{
+ struct nlattr *nla;
+ int err;
+
+ nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
+ if (!nla)
+ return -EMSGSIZE;
+
+ err = __ipv4_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len);
+ if (err)
+ return err;
+
+ nla_nest_end(skb, nla);
+ return 0;
+}
+
static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
const struct nlattr **a, bool is_mask)
{
+ if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) {
+ u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
+
+ SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask);
+ *attrs &= ~(1 << OVS_KEY_ATTR_DP_HASH);
+ }
+
+ if (*attrs & (1 << OVS_KEY_ATTR_RECIRC_ID)) {
+ u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]);
+
+ SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask);
+ *attrs &= ~(1 << OVS_KEY_ATTR_RECIRC_ID);
+ }
+
if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
SW_FLOW_KEY_PUT(match, phy.priority,
nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
@@ -836,7 +936,7 @@ int ovs_nla_get_match(struct sw_flow_match *match,
/**
* ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
- * @flow: Receives extracted in_port, priority, tun_key and skb_mark.
+ * @key: Receives extracted in_port, priority, tun_key and skb_mark.
* @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
* sequence.
*
@@ -846,32 +946,24 @@ int ovs_nla_get_match(struct sw_flow_match *match,
* extracted from the packet itself.
*/
-int ovs_nla_get_flow_metadata(struct sw_flow *flow,
- const struct nlattr *attr)
+int ovs_nla_get_flow_metadata(const struct nlattr *attr,
+ struct sw_flow_key *key)
{
- struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key;
const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
+ struct sw_flow_match match;
u64 attrs = 0;
int err;
- struct sw_flow_match match;
-
- flow->key.phy.in_port = DP_MAX_PORTS;
- flow->key.phy.priority = 0;
- flow->key.phy.skb_mark = 0;
- memset(tun_key, 0, sizeof(flow->key.tun_key));
err = parse_flow_nlattrs(attr, a, &attrs);
if (err)
return -EINVAL;
memset(&match, 0, sizeof(match));
- match.key = &flow->key;
+ match.key = key;
- err = metadata_from_nlattrs(&match, &attrs, a, false);
- if (err)
- return err;
+ key->phy.in_port = DP_MAX_PORTS;
- return 0;
+ return metadata_from_nlattrs(&match, &attrs, a, false);
}
int ovs_nla_put_flow(const struct sw_flow_key *swkey,
@@ -881,13 +973,26 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
struct nlattr *nla, *encap;
bool is_mask = (swkey != output);
- if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
+ if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash))
goto nla_put_failure;
- if ((swkey->tun_key.ipv4_dst || is_mask) &&
- ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key))
+ if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
goto nla_put_failure;
+ if ((swkey->tun_key.ipv4_dst || is_mask)) {
+ const struct geneve_opt *opts = NULL;
+
+ if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
+ opts = GENEVE_OPTS(output, swkey->tun_opts_len);
+
+ if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts,
+ swkey->tun_opts_len))
+ goto nla_put_failure;
+ }
+
if (swkey->phy.in_port == DP_MAX_PORTS) {
if (is_mask && (output->phy.in_port == 0xffff))
if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
@@ -1127,13 +1232,14 @@ out:
return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
}
-static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len)
+static struct nlattr *__add_action(struct sw_flow_actions **sfa,
+ int attrtype, void *data, int len)
{
struct nlattr *a;
a = reserve_sfa_size(sfa, nla_attr_size(len));
if (IS_ERR(a))
- return PTR_ERR(a);
+ return a;
a->nla_type = attrtype;
a->nla_len = nla_attr_size(len);
@@ -1142,6 +1248,18 @@ static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, in
memcpy(nla_data(a), data, len);
memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
+ return a;
+}
+
+static int add_action(struct sw_flow_actions **sfa, int attrtype,
+ void *data, int len)
+{
+ struct nlattr *a;
+
+ a = __add_action(sfa, attrtype, data, len);
+ if (IS_ERR(a))
+ return PTR_ERR(a);
+
return 0;
}
@@ -1247,6 +1365,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
{
struct sw_flow_match match;
struct sw_flow_key key;
+ struct ovs_tunnel_info *tun_info;
+ struct nlattr *a;
int err, start;
ovs_match_init(&match, &key, NULL);
@@ -1254,12 +1374,56 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
if (err)
return err;
+ if (key.tun_opts_len) {
+ struct geneve_opt *option = GENEVE_OPTS(&key,
+ key.tun_opts_len);
+ int opts_len = key.tun_opts_len;
+ bool crit_opt = false;
+
+ while (opts_len > 0) {
+ int len;
+
+ if (opts_len < sizeof(*option))
+ return -EINVAL;
+
+ len = sizeof(*option) + option->length * 4;
+ if (len > opts_len)
+ return -EINVAL;
+
+ crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
+
+ option = (struct geneve_opt *)((u8 *)option + len);
+ opts_len -= len;
+ };
+
+ key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
+ };
+
start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET);
if (start < 0)
return start;
- err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key,
- sizeof(match.key->tun_key));
+ a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
+ sizeof(*tun_info) + key.tun_opts_len);
+ if (IS_ERR(a))
+ return PTR_ERR(a);
+
+ tun_info = nla_data(a);
+ tun_info->tunnel = key.tun_key;
+ tun_info->options_len = key.tun_opts_len;
+
+ if (tun_info->options_len) {
+ /* We need to store the options in the action itself since
+ * everything else will go away after flow setup. We can append
+ * it to tun_info and then point there.
+ */
+ memcpy((tun_info + 1), GENEVE_OPTS(&key, key.tun_opts_len),
+ key.tun_opts_len);
+ tun_info->options = (struct geneve_opt *)(tun_info + 1);
+ } else {
+ tun_info->options = NULL;
+ }
+
add_nested_action_end(*sfa, start);
return err;
@@ -1409,11 +1573,13 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
/* Expected argument lengths, (u32)-1 for variable length. */
static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
[OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
+ [OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
[OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
[OVS_ACTION_ATTR_POP_VLAN] = 0,
[OVS_ACTION_ATTR_SET] = (u32)-1,
- [OVS_ACTION_ATTR_SAMPLE] = (u32)-1
+ [OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
+ [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash)
};
const struct ovs_action_push_vlan *vlan;
int type = nla_type(a);
@@ -1440,6 +1606,18 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
return -EINVAL;
break;
+ case OVS_ACTION_ATTR_HASH: {
+ const struct ovs_action_hash *act_hash = nla_data(a);
+
+ switch (act_hash->hash_alg) {
+ case OVS_HASH_ALG_L4:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ break;
+ }
case OVS_ACTION_ATTR_POP_VLAN:
break;
@@ -1452,6 +1630,9 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
return -EINVAL;
break;
+ case OVS_ACTION_ATTR_RECIRC:
+ break;
+
case OVS_ACTION_ATTR_SET:
err = validate_set(a, key, sfa, &skip_copy);
if (err)
@@ -1525,17 +1706,22 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
int err;
switch (key_type) {
- case OVS_KEY_ATTR_IPV4_TUNNEL:
+ case OVS_KEY_ATTR_TUNNEL_INFO: {
+ struct ovs_tunnel_info *tun_info = nla_data(ovs_key);
+
start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
if (!start)
return -EMSGSIZE;
- err = ipv4_tun_to_nlattr(skb, nla_data(ovs_key),
- nla_data(ovs_key));
+ err = ipv4_tun_to_nlattr(skb, &tun_info->tunnel,
+ tun_info->options_len ?
+ tun_info->options : NULL,
+ tun_info->options_len);
if (err)
return err;
nla_nest_end(skb, start);
break;
+ }
default:
if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
return -EMSGSIZE;
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 440151045d39..206e45add888 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -42,8 +42,8 @@ void ovs_match_init(struct sw_flow_match *match,
int ovs_nla_put_flow(const struct sw_flow_key *,
const struct sw_flow_key *, struct sk_buff *);
-int ovs_nla_get_flow_metadata(struct sw_flow *flow,
- const struct nlattr *attr);
+int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *);
+
int ovs_nla_get_match(struct sw_flow_match *match,
const struct nlattr *,
const struct nlattr *);
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
new file mode 100644
index 000000000000..910b3ef2c0d5
--- /dev/null
+++ b/net/openvswitch/vport-geneve.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2014 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/version.h>
+
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/net.h>
+#include <linux/rculist.h>
+#include <linux/udp.h>
+#include <linux/if_vlan.h>
+
+#include <net/geneve.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <net/udp.h>
+#include <net/xfrm.h>
+
+#include "datapath.h"
+#include "vport.h"
+
+/**
+ * struct geneve_port - Keeps track of open UDP ports
+ * @sock: The socket created for this port number.
+ * @name: vport name.
+ */
+struct geneve_port {
+ struct geneve_sock *gs;
+ char name[IFNAMSIZ];
+};
+
+static LIST_HEAD(geneve_ports);
+
+static inline struct geneve_port *geneve_vport(const struct vport *vport)
+{
+ return vport_priv(vport);
+}
+
+static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
+{
+ return (struct genevehdr *)(udp_hdr(skb) + 1);
+}
+
+/* Convert 64 bit tunnel ID to 24 bit VNI. */
+static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
+{
+#ifdef __BIG_ENDIAN
+ vni[0] = (__force __u8)(tun_id >> 16);
+ vni[1] = (__force __u8)(tun_id >> 8);
+ vni[2] = (__force __u8)tun_id;
+#else
+ vni[0] = (__force __u8)((__force u64)tun_id >> 40);
+ vni[1] = (__force __u8)((__force u64)tun_id >> 48);
+ vni[2] = (__force __u8)((__force u64)tun_id >> 56);
+#endif
+}
+
+/* Convert 24 bit VNI to 64 bit tunnel ID. */
+static __be64 vni_to_tunnel_id(__u8 *vni)
+{
+#ifdef __BIG_ENDIAN
+ return (vni[0] << 16) | (vni[1] << 8) | vni[2];
+#else
+ return (__force __be64)(((__force u64)vni[0] << 40) |
+ ((__force u64)vni[1] << 48) |
+ ((__force u64)vni[2] << 56));
+#endif
+}
+
+static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb)
+{
+ struct vport *vport = gs->rcv_data;
+ struct genevehdr *geneveh = geneve_hdr(skb);
+ int opts_len;
+ struct ovs_tunnel_info tun_info;
+ __be64 key;
+ __be16 flags;
+
+ opts_len = geneveh->opt_len * 4;
+
+ flags = TUNNEL_KEY | TUNNEL_OPTIONS_PRESENT |
+ (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) |
+ (geneveh->oam ? TUNNEL_OAM : 0) |
+ (geneveh->critical ? TUNNEL_CRIT_OPT : 0);
+
+ key = vni_to_tunnel_id(geneveh->vni);
+
+ ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key, flags,
+ geneveh->options, opts_len);
+
+ ovs_vport_receive(vport, skb, &tun_info);
+}
+
+static int geneve_get_options(const struct vport *vport,
+ struct sk_buff *skb)
+{
+ struct geneve_port *geneve_port = geneve_vport(vport);
+ struct inet_sock *sk = inet_sk(geneve_port->gs->sock->sk);
+
+ if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(sk->inet_sport)))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static void geneve_tnl_destroy(struct vport *vport)
+{
+ struct geneve_port *geneve_port = geneve_vport(vport);
+
+ geneve_sock_release(geneve_port->gs);
+
+ ovs_vport_deferred_free(vport);
+}
+
+static struct vport *geneve_tnl_create(const struct vport_parms *parms)
+{
+ struct net *net = ovs_dp_get_net(parms->dp);
+ struct nlattr *options = parms->options;
+ struct geneve_port *geneve_port;
+ struct geneve_sock *gs;
+ struct vport *vport;
+ struct nlattr *a;
+ int err;
+ u16 dst_port;
+
+ if (!options) {
+ err = -EINVAL;
+ goto error;
+ }
+
+ a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
+ if (a && nla_len(a) == sizeof(u16)) {
+ dst_port = nla_get_u16(a);
+ } else {
+ /* Require destination port from userspace. */
+ err = -EINVAL;
+ goto error;
+ }
+
+ vport = ovs_vport_alloc(sizeof(struct geneve_port),
+ &ovs_geneve_vport_ops, parms);
+ if (IS_ERR(vport))
+ return vport;
+
+ geneve_port = geneve_vport(vport);
+ strncpy(geneve_port->name, parms->name, IFNAMSIZ);
+
+ gs = geneve_sock_add(net, htons(dst_port), geneve_rcv, vport, true, 0);
+ if (IS_ERR(gs)) {
+ ovs_vport_free(vport);
+ return (void *)gs;
+ }
+ geneve_port->gs = gs;
+
+ return vport;
+error:
+ return ERR_PTR(err);
+}
+
+static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
+{
+ struct ovs_key_ipv4_tunnel *tun_key;
+ struct ovs_tunnel_info *tun_info;
+ struct net *net = ovs_dp_get_net(vport->dp);
+ struct geneve_port *geneve_port = geneve_vport(vport);
+ __be16 dport = inet_sk(geneve_port->gs->sock->sk)->inet_sport;
+ __be16 sport;
+ struct rtable *rt;
+ struct flowi4 fl;
+ u8 vni[3];
+ __be16 df;
+ int err;
+
+ tun_info = OVS_CB(skb)->egress_tun_info;
+ if (unlikely(!tun_info)) {
+ err = -EINVAL;
+ goto error;
+ }
+
+ tun_key = &tun_info->tunnel;
+
+ /* Route lookup */
+ memset(&fl, 0, sizeof(fl));
+ fl.daddr = tun_key->ipv4_dst;
+ fl.saddr = tun_key->ipv4_src;
+ fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
+ fl.flowi4_mark = skb->mark;
+ fl.flowi4_proto = IPPROTO_UDP;
+
+ rt = ip_route_output_key(net, &fl);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ goto error;
+ }
+
+ df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
+ sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
+ tunnel_id_to_vni(tun_key->tun_id, vni);
+ skb->ignore_df = 1;
+
+ err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr,
+ tun_key->ipv4_dst, tun_key->ipv4_tos,
+ tun_key->ipv4_ttl, df, sport, dport,
+ tun_key->tun_flags, vni,
+ tun_info->options_len, (u8 *)tun_info->options,
+ false);
+ if (err < 0)
+ ip_rt_put(rt);
+error:
+ return err;
+}
+
+static const char *geneve_get_name(const struct vport *vport)
+{
+ struct geneve_port *geneve_port = geneve_vport(vport);
+
+ return geneve_port->name;
+}
+
+const struct vport_ops ovs_geneve_vport_ops = {
+ .type = OVS_VPORT_TYPE_GENEVE,
+ .create = geneve_tnl_create,
+ .destroy = geneve_tnl_destroy,
+ .get_name = geneve_get_name,
+ .get_options = geneve_get_options,
+ .send = geneve_tnl_send,
+};
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index f49148a07da2..108b82da2fd9 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2013 Nicira, Inc.
+ * Copyright (c) 2007-2014 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -63,8 +63,10 @@ static __be16 filter_tnl_flags(__be16 flags)
static struct sk_buff *__build_header(struct sk_buff *skb,
int tunnel_hlen)
{
- const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
struct tnl_ptk_info tpi;
+ const struct ovs_key_ipv4_tunnel *tun_key;
+
+ tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM));
if (IS_ERR(skb))
@@ -92,7 +94,7 @@ static __be64 key_to_tunnel_id(__be32 key, __be32 seq)
static int gre_rcv(struct sk_buff *skb,
const struct tnl_ptk_info *tpi)
{
- struct ovs_key_ipv4_tunnel tun_key;
+ struct ovs_tunnel_info tun_info;
struct ovs_net *ovs_net;
struct vport *vport;
__be64 key;
@@ -103,10 +105,10 @@ static int gre_rcv(struct sk_buff *skb,
return PACKET_REJECT;
key = key_to_tunnel_id(tpi->key, tpi->seq);
- ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key,
- filter_tnl_flags(tpi->flags));
+ ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key,
+ filter_tnl_flags(tpi->flags), NULL, 0);
- ovs_vport_receive(vport, skb, &tun_key);
+ ovs_vport_receive(vport, skb, &tun_info);
return PACKET_RCVD;
}
@@ -129,6 +131,7 @@ static int gre_err(struct sk_buff *skb, u32 info,
static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
{
struct net *net = ovs_dp_get_net(vport->dp);
+ struct ovs_key_ipv4_tunnel *tun_key;
struct flowi4 fl;
struct rtable *rt;
int min_headroom;
@@ -136,16 +139,17 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
__be16 df;
int err;
- if (unlikely(!OVS_CB(skb)->tun_key)) {
+ if (unlikely(!OVS_CB(skb)->egress_tun_info)) {
err = -EINVAL;
goto error;
}
+ tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
/* Route lookup */
memset(&fl, 0, sizeof(fl));
- fl.daddr = OVS_CB(skb)->tun_key->ipv4_dst;
- fl.saddr = OVS_CB(skb)->tun_key->ipv4_src;
- fl.flowi4_tos = RT_TOS(OVS_CB(skb)->tun_key->ipv4_tos);
+ fl.daddr = tun_key->ipv4_dst;
+ fl.saddr = tun_key->ipv4_src;
+ fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
fl.flowi4_mark = skb->mark;
fl.flowi4_proto = IPPROTO_GRE;
@@ -153,7 +157,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
if (IS_ERR(rt))
return PTR_ERR(rt);
- tunnel_hlen = ip_gre_calc_hlen(OVS_CB(skb)->tun_key->tun_flags);
+ tunnel_hlen = ip_gre_calc_hlen(tun_key->tun_flags);
min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
+ tunnel_hlen + sizeof(struct iphdr)
@@ -185,15 +189,14 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
goto err_free_rt;
}
- df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
+ df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
htons(IP_DF) : 0;
skb->ignore_df = 1;
return iptunnel_xmit(skb->sk, rt, skb, fl.saddr,
- OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE,
- OVS_CB(skb)->tun_key->ipv4_tos,
- OVS_CB(skb)->tun_key->ipv4_ttl, df, false);
+ tun_key->ipv4_dst, IPPROTO_GRE,
+ tun_key->ipv4_tos, tun_key->ipv4_ttl, df, false);
err_free_rt:
ip_rt_put(rt);
error:
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 789af9280e77..84516126e5f3 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -26,6 +26,7 @@
#include <net/dst.h>
#include <net/xfrm.h>
+#include <net/rtnetlink.h>
#include "datapath.h"
#include "vport-internal_dev.h"
@@ -121,6 +122,10 @@ static const struct net_device_ops internal_dev_netdev_ops = {
.ndo_get_stats64 = internal_dev_get_stats,
};
+static struct rtnl_link_ops internal_dev_link_ops __read_mostly = {
+ .kind = "openvswitch",
+};
+
static void do_setup(struct net_device *netdev)
{
ether_setup(netdev);
@@ -131,14 +136,18 @@ static void do_setup(struct net_device *netdev)
netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
netdev->destructor = internal_dev_destructor;
netdev->ethtool_ops = &internal_dev_ethtool_ops;
+ netdev->rtnl_link_ops = &internal_dev_link_ops;
netdev->tx_queue_len = 0;
netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |
- NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE;
+ NETIF_F_HIGHDMA | NETIF_F_HW_CSUM |
+ NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL;
netdev->vlan_features = netdev->features;
+ netdev->hw_enc_features = netdev->features;
netdev->features |= NETIF_F_HW_VLAN_CTAG_TX;
netdev->hw_features = netdev->features & ~NETIF_F_LLTX;
+
eth_hw_addr_random(netdev);
}
@@ -159,7 +168,8 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
netdev_vport = netdev_vport_priv(vport);
netdev_vport->dev = alloc_netdev(sizeof(struct internal_dev),
- parms->name, do_setup);
+ parms->name, NET_NAME_UNKNOWN,
+ do_setup);
if (!netdev_vport->dev) {
err = -ENOMEM;
goto error_free_vport;
@@ -248,3 +258,13 @@ struct vport *ovs_internal_dev_get_vport(struct net_device *netdev)
return internal_dev_priv(netdev)->vport;
}
+
+int ovs_internal_dev_rtnl_link_register(void)
+{
+ return rtnl_link_register(&internal_dev_link_ops);
+}
+
+void ovs_internal_dev_rtnl_link_unregister(void)
+{
+ rtnl_link_unregister(&internal_dev_link_ops);
+}
diff --git a/net/openvswitch/vport-internal_dev.h b/net/openvswitch/vport-internal_dev.h
index 9a7d30ecc6a2..1b179a190cff 100644
--- a/net/openvswitch/vport-internal_dev.h
+++ b/net/openvswitch/vport-internal_dev.h
@@ -24,5 +24,7 @@
int ovs_is_internal_dev(const struct net_device *);
struct vport *ovs_internal_dev_get_vport(struct net_device *);
+int ovs_internal_dev_rtnl_link_register(void);
+void ovs_internal_dev_rtnl_link_unregister(void);
#endif /* vport-internal_dev.h */
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 0edbd95c60e7..2735e01dca73 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013 Nicira, Inc.
+ * Copyright (c) 2014 Nicira, Inc.
* Copyright (c) 2013 Cisco Systems, Inc.
*
* This program is free software; you can redistribute it and/or
@@ -58,7 +58,7 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
/* Called with rcu_read_lock and BH disabled. */
static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
{
- struct ovs_key_ipv4_tunnel tun_key;
+ struct ovs_tunnel_info tun_info;
struct vport *vport = vs->data;
struct iphdr *iph;
__be64 key;
@@ -66,9 +66,9 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
/* Save outer tunnel values */
iph = ip_hdr(skb);
key = cpu_to_be64(ntohl(vx_vni) >> 8);
- ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY);
+ ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY, NULL, 0);
- ovs_vport_receive(vport, skb, &tun_key);
+ ovs_vport_receive(vport, skb, &tun_info);
}
static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
@@ -140,24 +140,24 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
struct net *net = ovs_dp_get_net(vport->dp);
struct vxlan_port *vxlan_port = vxlan_vport(vport);
__be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
+ struct ovs_key_ipv4_tunnel *tun_key;
struct rtable *rt;
struct flowi4 fl;
__be16 src_port;
- int port_min;
- int port_max;
__be16 df;
int err;
- if (unlikely(!OVS_CB(skb)->tun_key)) {
+ if (unlikely(!OVS_CB(skb)->egress_tun_info)) {
err = -EINVAL;
goto error;
}
+ tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
/* Route lookup */
memset(&fl, 0, sizeof(fl));
- fl.daddr = OVS_CB(skb)->tun_key->ipv4_dst;
- fl.saddr = OVS_CB(skb)->tun_key->ipv4_src;
- fl.flowi4_tos = RT_TOS(OVS_CB(skb)->tun_key->ipv4_tos);
+ fl.daddr = tun_key->ipv4_dst;
+ fl.saddr = tun_key->ipv4_src;
+ fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
fl.flowi4_mark = skb->mark;
fl.flowi4_proto = IPPROTO_UDP;
@@ -167,20 +167,18 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
goto error;
}
- df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
+ df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
htons(IP_DF) : 0;
skb->ignore_df = 1;
- inet_get_local_port_range(net, &port_min, &port_max);
- src_port = vxlan_src_port(port_min, port_max, skb);
+ src_port = udp_flow_src_port(net, skb, 0, 0, true);
err = vxlan_xmit_skb(vxlan_port->vs, rt, skb,
- fl.saddr, OVS_CB(skb)->tun_key->ipv4_dst,
- OVS_CB(skb)->tun_key->ipv4_tos,
- OVS_CB(skb)->tun_key->ipv4_ttl, df,
+ fl.saddr, tun_key->ipv4_dst,
+ tun_key->ipv4_tos, tun_key->ipv4_ttl, df,
src_port, dst_port,
- htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8),
+ htonl(be64_to_cpu(tun_key->tun_id) << 8),
false);
if (err < 0)
ip_rt_put(rt);
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 42c0f4a0b78c..53001b020ca7 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2012 Nicira, Inc.
+ * Copyright (c) 2007-2014 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -48,6 +48,9 @@ static const struct vport_ops *vport_ops_list[] = {
#ifdef CONFIG_OPENVSWITCH_VXLAN
&ovs_vxlan_vport_ops,
#endif
+#ifdef CONFIG_OPENVSWITCH_GENEVE
+ &ovs_geneve_vport_ops,
+#endif
};
/* Protected by RCU read lock for reading, ovs_mutex for writing. */
@@ -134,18 +137,20 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
vport->dp = parms->dp;
vport->port_no = parms->port_no;
- vport->upcall_portid = parms->upcall_portid;
vport->ops = ops;
INIT_HLIST_NODE(&vport->dp_hash_node);
+ if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) {
+ kfree(vport);
+ return ERR_PTR(-EINVAL);
+ }
+
vport->percpu_stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!vport->percpu_stats) {
kfree(vport);
return ERR_PTR(-ENOMEM);
}
- spin_lock_init(&vport->stats_lock);
-
return vport;
}
@@ -161,6 +166,10 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
*/
void ovs_vport_free(struct vport *vport)
{
+ /* vport is freed from RCU callback or error path, Therefore
+ * it is safe to use raw dereference.
+ */
+ kfree(rcu_dereference_raw(vport->upcall_portids));
free_percpu(vport->percpu_stats);
kfree(vport);
}
@@ -260,14 +269,10 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
* netdev-stats can be directly read over netlink-ioctl.
*/
- spin_lock_bh(&vport->stats_lock);
-
- stats->rx_errors = vport->err_stats.rx_errors;
- stats->tx_errors = vport->err_stats.tx_errors;
- stats->tx_dropped = vport->err_stats.tx_dropped;
- stats->rx_dropped = vport->err_stats.rx_dropped;
-
- spin_unlock_bh(&vport->stats_lock);
+ stats->rx_errors = atomic_long_read(&vport->err_stats.rx_errors);
+ stats->tx_errors = atomic_long_read(&vport->err_stats.tx_errors);
+ stats->tx_dropped = atomic_long_read(&vport->err_stats.tx_dropped);
+ stats->rx_dropped = atomic_long_read(&vport->err_stats.rx_dropped);
for_each_possible_cpu(i) {
const struct pcpu_sw_netstats *percpu_stats;
@@ -327,6 +332,99 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
}
/**
+ * ovs_vport_set_upcall_portids - set upcall portids of @vport.
+ *
+ * @vport: vport to modify.
+ * @ids: new configuration, an array of port ids.
+ *
+ * Sets the vport's upcall_portids to @ids.
+ *
+ * Returns 0 if successful, -EINVAL if @ids is zero length or cannot be parsed
+ * as an array of U32.
+ *
+ * Must be called with ovs_mutex.
+ */
+int ovs_vport_set_upcall_portids(struct vport *vport, struct nlattr *ids)
+{
+ struct vport_portids *old, *vport_portids;
+
+ if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
+ return -EINVAL;
+
+ old = ovsl_dereference(vport->upcall_portids);
+
+ vport_portids = kmalloc(sizeof(*vport_portids) + nla_len(ids),
+ GFP_KERNEL);
+ if (!vport_portids)
+ return -ENOMEM;
+
+ vport_portids->n_ids = nla_len(ids) / sizeof(u32);
+ vport_portids->rn_ids = reciprocal_value(vport_portids->n_ids);
+ nla_memcpy(vport_portids->ids, ids, nla_len(ids));
+
+ rcu_assign_pointer(vport->upcall_portids, vport_portids);
+
+ if (old)
+ kfree_rcu(old, rcu);
+ return 0;
+}
+
+/**
+ * ovs_vport_get_upcall_portids - get the upcall_portids of @vport.
+ *
+ * @vport: vport from which to retrieve the portids.
+ * @skb: sk_buff where portids should be appended.
+ *
+ * Retrieves the configuration of the given vport, appending the
+ * %OVS_VPORT_ATTR_UPCALL_PID attribute which is the array of upcall
+ * portids to @skb.
+ *
+ * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room.
+ * If an error occurs, @skb is left unmodified. Must be called with
+ * ovs_mutex or rcu_read_lock.
+ */
+int ovs_vport_get_upcall_portids(const struct vport *vport,
+ struct sk_buff *skb)
+{
+ struct vport_portids *ids;
+
+ ids = rcu_dereference_ovsl(vport->upcall_portids);
+
+ if (vport->dp->user_features & OVS_DP_F_VPORT_PIDS)
+ return nla_put(skb, OVS_VPORT_ATTR_UPCALL_PID,
+ ids->n_ids * sizeof(u32), (void *)ids->ids);
+ else
+ return nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, ids->ids[0]);
+}
+
+/**
+ * ovs_vport_find_upcall_portid - find the upcall portid to send upcall.
+ *
+ * @vport: vport from which the missed packet is received.
+ * @skb: skb that the missed packet was received.
+ *
+ * Uses the skb_get_hash() to select the upcall portid to send the
+ * upcall.
+ *
+ * Returns the portid of the target socket. Must be called with rcu_read_lock.
+ */
+u32 ovs_vport_find_upcall_portid(const struct vport *p, struct sk_buff *skb)
+{
+ struct vport_portids *ids;
+ u32 ids_index;
+ u32 hash;
+
+ ids = rcu_dereference(p->upcall_portids);
+
+ if (ids->n_ids == 1 && ids->ids[0] == 0)
+ return 0;
+
+ hash = skb_get_hash(skb);
+ ids_index = hash - ids->n_ids * reciprocal_divide(hash, ids->rn_ids);
+ return ids->ids[ids_index];
+}
+
+/**
* ovs_vport_receive - pass up received packet to the datapath for processing
*
* @vport: vport that received the packet
@@ -337,9 +435,11 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
* skb->data should point to the Ethernet header.
*/
void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
- struct ovs_key_ipv4_tunnel *tun_key)
+ struct ovs_tunnel_info *tun_info)
{
struct pcpu_sw_netstats *stats;
+ struct sw_flow_key key;
+ int error;
stats = this_cpu_ptr(vport->percpu_stats);
u64_stats_update_begin(&stats->syncp);
@@ -347,8 +447,15 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
stats->rx_bytes += skb->len;
u64_stats_update_end(&stats->syncp);
- OVS_CB(skb)->tun_key = tun_key;
- ovs_dp_process_received_packet(vport, skb);
+ OVS_CB(skb)->input_vport = vport;
+ OVS_CB(skb)->egress_tun_info = NULL;
+ /* Extract flow from 'skb' into 'key'. */
+ error = ovs_flow_key_extract(tun_info, skb, &key);
+ if (unlikely(error)) {
+ kfree_skb(skb);
+ return;
+ }
+ ovs_dp_process_packet(skb, &key);
}
/**
@@ -394,27 +501,24 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
static void ovs_vport_record_error(struct vport *vport,
enum vport_err_type err_type)
{
- spin_lock(&vport->stats_lock);
-
switch (err_type) {
case VPORT_E_RX_DROPPED:
- vport->err_stats.rx_dropped++;
+ atomic_long_inc(&vport->err_stats.rx_dropped);
break;
case VPORT_E_RX_ERROR:
- vport->err_stats.rx_errors++;
+ atomic_long_inc(&vport->err_stats.rx_errors);
break;
case VPORT_E_TX_DROPPED:
- vport->err_stats.tx_dropped++;
+ atomic_long_inc(&vport->err_stats.tx_dropped);
break;
case VPORT_E_TX_ERROR:
- vport->err_stats.tx_errors++;
+ atomic_long_inc(&vport->err_stats.tx_errors);
break;
}
- spin_unlock(&vport->stats_lock);
}
static void free_vport_rcu(struct rcu_head *rcu)
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 8d721e62f388..8942125de3a6 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -23,6 +23,7 @@
#include <linux/list.h>
#include <linux/netlink.h>
#include <linux/openvswitch.h>
+#include <linux/reciprocal_div.h>
#include <linux/skbuff.h>
#include <linux/spinlock.h>
#include <linux/u64_stats_sync.h>
@@ -34,7 +35,6 @@ struct vport_parms;
/* The following definitions are for users of the vport subsytem: */
-/* The following definitions are for users of the vport subsytem: */
struct vport_net {
struct vport __rcu *gre_vport;
};
@@ -52,35 +52,52 @@ void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *);
int ovs_vport_set_options(struct vport *, struct nlattr *options);
int ovs_vport_get_options(const struct vport *, struct sk_buff *);
+int ovs_vport_set_upcall_portids(struct vport *, struct nlattr *pids);
+int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *);
+u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *);
+
int ovs_vport_send(struct vport *, struct sk_buff *);
/* The following definitions are for implementers of vport devices: */
struct vport_err_stats {
- u64 rx_dropped;
- u64 rx_errors;
- u64 tx_dropped;
- u64 tx_errors;
+ atomic_long_t rx_dropped;
+ atomic_long_t rx_errors;
+ atomic_long_t tx_dropped;
+ atomic_long_t tx_errors;
+};
+/**
+ * struct vport_portids - array of netlink portids of a vport.
+ * must be protected by rcu.
+ * @rn_ids: The reciprocal value of @n_ids.
+ * @rcu: RCU callback head for deferred destruction.
+ * @n_ids: Size of @ids array.
+ * @ids: Array storing the Netlink socket pids to be used for packets received
+ * on this port that miss the flow table.
+ */
+struct vport_portids {
+ struct reciprocal_value rn_ids;
+ struct rcu_head rcu;
+ u32 n_ids;
+ u32 ids[];
};
/**
* struct vport - one port within a datapath
* @rcu: RCU callback head for deferred destruction.
* @dp: Datapath to which this port belongs.
- * @upcall_portid: The Netlink port to use for packets received on this port that
- * miss the flow table.
+ * @upcall_portids: RCU protected 'struct vport_portids'.
* @port_no: Index into @dp's @ports array.
* @hash_node: Element in @dev_table hash table in vport.c.
* @dp_hash_node: Element in @datapath->ports hash table in datapath.c.
* @ops: Class structure.
* @percpu_stats: Points to per-CPU statistics used and maintained by vport
- * @stats_lock: Protects @err_stats;
* @err_stats: Points to error statistics used and maintained by vport
*/
struct vport {
struct rcu_head rcu;
struct datapath *dp;
- u32 upcall_portid;
+ struct vport_portids __rcu *upcall_portids;
u16 port_no;
struct hlist_node hash_node;
@@ -89,7 +106,6 @@ struct vport {
struct pcpu_sw_netstats __percpu *percpu_stats;
- spinlock_t stats_lock;
struct vport_err_stats err_stats;
};
@@ -111,7 +127,7 @@ struct vport_parms {
/* For ovs_vport_alloc(). */
struct datapath *dp;
u16 port_no;
- u32 upcall_portid;
+ struct nlattr *upcall_portids;
};
/**
@@ -191,7 +207,7 @@ static inline struct vport *vport_from_priv(void *priv)
}
void ovs_vport_receive(struct vport *, struct sk_buff *,
- struct ovs_key_ipv4_tunnel *);
+ struct ovs_tunnel_info *);
/* List of statically compiled vport implementations. Don't forget to also
* add yours to the list at the top of vport.c. */
@@ -199,6 +215,7 @@ extern const struct vport_ops ovs_netdev_vport_ops;
extern const struct vport_ops ovs_internal_vport_ops;
extern const struct vport_ops ovs_gre_vport_ops;
extern const struct vport_ops ovs_vxlan_vport_ops;
+extern const struct vport_ops ovs_geneve_vport_ops;
static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
const void *start, unsigned int len)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index b85c67ccb797..87d20f48ff06 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -240,11 +240,9 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po);
static int packet_direct_xmit(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
- const struct net_device_ops *ops = dev->netdev_ops;
netdev_features_t features;
struct netdev_queue *txq;
int ret = NETDEV_TX_BUSY;
- u16 queue_map;
if (unlikely(!netif_running(dev) ||
!netif_carrier_ok(dev)))
@@ -255,17 +253,13 @@ static int packet_direct_xmit(struct sk_buff *skb)
__skb_linearize(skb))
goto drop;
- queue_map = skb_get_queue_mapping(skb);
- txq = netdev_get_tx_queue(dev, queue_map);
+ txq = skb_get_tx_queue(dev, skb);
local_bh_disable();
HARD_TX_LOCK(dev, txq, smp_processor_id());
- if (!netif_xmit_frozen_or_drv_stopped(txq)) {
- ret = ops->ndo_start_xmit(skb, dev);
- if (ret == NETDEV_TX_OK)
- txq_trans_update(txq);
- }
+ if (!netif_xmit_frozen_or_drv_stopped(txq))
+ ret = netdev_start_xmit(skb, dev, txq, false);
HARD_TX_UNLOCK(dev, txq);
local_bh_enable();
@@ -441,14 +435,10 @@ static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts,
{
struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
- if (shhwtstamps) {
- if ((flags & SOF_TIMESTAMPING_SYS_HARDWARE) &&
- ktime_to_timespec_cond(shhwtstamps->syststamp, ts))
- return TP_STATUS_TS_SYS_HARDWARE;
- if ((flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
- ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts))
- return TP_STATUS_TS_RAW_HARDWARE;
- }
+ if (shhwtstamps &&
+ (flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
+ ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts))
+ return TP_STATUS_TS_RAW_HARDWARE;
if (ktime_to_timespec_cond(skb->tstamp, ts))
return TP_STATUS_TS_SOFTWARE;
@@ -636,6 +626,7 @@ static void init_prb_bdqc(struct packet_sock *po,
p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
+ p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
prb_init_ft_ops(p1, req_u);
prb_setup_retire_blk_timer(po, tx_ring);
prb_open_block(p1, pbd);
@@ -1946,6 +1937,18 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
if ((int)snaplen < 0)
snaplen = 0;
}
+ } else if (unlikely(macoff + snaplen >
+ GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) {
+ u32 nval;
+
+ nval = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len - macoff;
+ pr_err_once("tpacket_rcv: packet too big, clamped from %u to %u. macoff=%u\n",
+ snaplen, nval, macoff);
+ snaplen = nval;
+ if (unlikely((int)snaplen < 0)) {
+ snaplen = 0;
+ macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len;
+ }
}
spin_lock(&sk->sk_receive_queue.lock);
h.raw = packet_current_rx_frame(po, skb,
@@ -3071,10 +3074,8 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
break;
case PACKET_MR_PROMISC:
return dev_set_promiscuity(dev, what);
- break;
case PACKET_MR_ALLMULTI:
return dev_set_allmulti(dev, what);
- break;
case PACKET_MR_UNICAST:
if (i->alen != dev->addr_len)
return -EINVAL;
@@ -3789,6 +3790,10 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
goto out;
if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
goto out;
+ if (po->tp_version >= TPACKET_V3 &&
+ (int)(req->tp_block_size -
+ BLK_PLUS_PRIV(req_u->req3.tp_sizeof_priv)) <= 0)
+ goto out;
if (unlikely(req->tp_frame_size < po->tp_hdrlen +
po->tp_reserve))
goto out;
diff --git a/net/packet/internal.h b/net/packet/internal.h
index eb9580a6b25f..cdddf6a30399 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -29,6 +29,7 @@ struct tpacket_kbdq_core {
char *pkblk_start;
char *pkblk_end;
int kblk_size;
+ unsigned int max_frame_len;
unsigned int knum_blocks;
uint64_t knxt_seq_num;
char *prev;
diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c
index 66dc65e7c6a1..e9a83a637185 100644
--- a/net/phonet/pep-gprs.c
+++ b/net/phonet/pep-gprs.c
@@ -267,7 +267,7 @@ int gprs_attach(struct sock *sk)
return -EINVAL; /* need packet boundaries */
/* Create net device */
- dev = alloc_netdev(sizeof(*gp), ifname, gprs_setup);
+ dev = alloc_netdev(sizeof(*gp), ifname, NET_NAME_UNKNOWN, gprs_setup);
if (!dev)
return -ENOMEM;
gp = netdev_priv(dev);
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 56a6146ac94b..a58680016472 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -36,7 +36,7 @@
struct phonet_routes {
struct mutex lock;
- struct net_device *table[64];
+ struct net_device __rcu *table[64];
};
struct phonet_net {
@@ -275,7 +275,7 @@ static void phonet_route_autodel(struct net_device *dev)
bitmap_zero(deleted, 64);
mutex_lock(&pnn->routes.lock);
for (i = 0; i < 64; i++)
- if (dev == pnn->routes.table[i]) {
+ if (rcu_access_pointer(pnn->routes.table[i]) == dev) {
RCU_INIT_POINTER(pnn->routes.table[i], NULL);
set_bit(i, deleted);
}
@@ -388,7 +388,7 @@ int phonet_route_del(struct net_device *dev, u8 daddr)
daddr = daddr >> 2;
mutex_lock(&routes->lock);
- if (dev == routes->table[daddr])
+ if (rcu_access_pointer(routes->table[daddr]) == dev)
RCU_INIT_POINTER(routes->table[daddr], NULL);
else
dev = NULL;
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 424ff622ab5f..10443377fb9d 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -83,7 +83,7 @@ static int rds_release(struct socket *sock)
/*
* the binding lookup hash uses rcu, we need to
- * make sure we sychronize_rcu before we free our
+ * make sure we synchronize_rcu before we free our
* entry
*/
rds_remove_bound(rs);
diff --git a/net/rds/send.c b/net/rds/send.c
index 23718160d71e..0a64541020b0 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -593,8 +593,11 @@ static void rds_send_remove_from_sock(struct list_head *messages, int status)
sock_put(rds_rs_to_sk(rs));
}
rs = rm->m_rs;
- sock_hold(rds_rs_to_sk(rs));
+ if (rs)
+ sock_hold(rds_rs_to_sk(rs));
}
+ if (!rs)
+ goto unlock_and_drop;
spin_lock(&rs->rs_lock);
if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) {
@@ -638,9 +641,6 @@ unlock_and_drop:
* queue. This means that in the TCP case, the message may not have been
* assigned the m_ack_seq yet - but that's fine as long as tcp_is_acked
* checks the RDS_MSG_HAS_ACK_SEQ bit.
- *
- * XXX It's not clear to me how this is safely serialized with socket
- * destruction. Maybe it should bail if it sees SOCK_DEAD.
*/
void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
is_acked_func is_acked)
@@ -711,6 +711,9 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
*/
if (!test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) {
spin_unlock_irqrestore(&conn->c_lock, flags);
+ spin_lock_irqsave(&rm->m_rs_lock, flags);
+ rm->m_rs = NULL;
+ spin_unlock_irqrestore(&rm->m_rs_lock, flags);
continue;
}
list_del_init(&rm->m_conn_item);
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index a65ee78db0c5..f9f564a6c960 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -106,11 +106,14 @@ int rds_tcp_conn_connect(struct rds_connection *conn)
rds_tcp_set_callbacks(sock, conn);
ret = sock->ops->connect(sock, (struct sockaddr *)&dest, sizeof(dest),
O_NONBLOCK);
- sock = NULL;
rdsdebug("connect to address %pI4 returned %d\n", &conn->c_faddr, ret);
if (ret == -EINPROGRESS)
ret = 0;
+ if (ret == 0)
+ sock = NULL;
+ else
+ rds_tcp_restore_callbacks(sock, conn->c_transport_data);
out:
if (sock)
diff --git a/net/rds/threads.c b/net/rds/threads.c
index 65eaefcab241..dc2402e871fd 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -78,8 +78,7 @@ void rds_connect_complete(struct rds_connection *conn)
"current state is %d\n",
__func__,
atomic_read(&conn->c_state));
- atomic_set(&conn->c_state, RDS_CONN_ERROR);
- queue_work(rds_wq, &conn->c_down_w);
+ rds_conn_drop(conn);
return;
}
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index b3b16c070a7f..fa7cd792791c 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -329,7 +329,7 @@ static atomic_t rfkill_input_disabled = ATOMIC_INIT(0);
/**
* __rfkill_switch_all - Toggle state of all switches of given type
* @type: type of interfaces to be affected
- * @state: the new state
+ * @blocked: the new state
*
* This function sets the state of all switches of given type,
* unless a specific switch is claimed by userspace (in which case,
@@ -353,7 +353,7 @@ static void __rfkill_switch_all(const enum rfkill_type type, bool blocked)
/**
* rfkill_switch_all - Toggle state of all switches of given type
* @type: type of interfaces to be affected
- * @state: the new state
+ * @blocked: the new state
*
* Acquires rfkill_global_mutex and calls __rfkill_switch_all(@type, @state).
* Please refer to __rfkill_switch_all() for details.
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index 14c98e48f261..0f62326c0f5e 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -54,7 +54,7 @@ static int rfkill_gpio_set_power(void *data, bool blocked)
if (blocked && !IS_ERR(rfkill->clk) && rfkill->clk_enabled)
clk_disable(rfkill->clk);
- rfkill->clk_enabled = blocked;
+ rfkill->clk_enabled = !blocked;
return 0;
}
@@ -158,10 +158,12 @@ static const struct acpi_device_id rfkill_acpi_match[] = {
{ "BCM2E1A", RFKILL_TYPE_BLUETOOTH },
{ "BCM2E39", RFKILL_TYPE_BLUETOOTH },
{ "BCM2E3D", RFKILL_TYPE_BLUETOOTH },
+ { "BCM2E64", RFKILL_TYPE_BLUETOOTH },
{ "BCM4752", RFKILL_TYPE_GPS },
{ "LNV4752", RFKILL_TYPE_GPS },
{ },
};
+MODULE_DEVICE_TABLE(acpi, rfkill_acpi_match);
#endif
static struct platform_driver rfkill_gpio_driver = {
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 8451c8cdc9de..a85c1a086ae4 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1538,7 +1538,7 @@ static int __init rose_proto_init(void)
char name[IFNAMSIZ];
sprintf(name, "rose%d", i);
- dev = alloc_netdev(0, name, rose_setup);
+ dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, rose_setup);
if (!dev) {
printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate memory\n");
rc = -ENOMEM;
diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
index bc5514211b0c..e873d7d9f857 100644
--- a/net/rose/rose_link.c
+++ b/net/rose/rose_link.c
@@ -160,7 +160,8 @@ void rose_link_rx_restart(struct sk_buff *skb, struct rose_neigh *neigh, unsigne
break;
case ROSE_DIAGNOSTIC:
- printk(KERN_WARNING "ROSE: received diagnostic #%d - %02X %02X %02X\n", skb->data[3], skb->data[4], skb->data[5], skb->data[6]);
+ pr_warn("ROSE: received diagnostic #%d - %3ph\n", skb->data[3],
+ skb->data + 4);
break;
default:
diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c
index db57458c824c..74c0fcd36838 100644
--- a/net/rxrpc/ar-error.c
+++ b/net/rxrpc/ar-error.c
@@ -37,7 +37,7 @@ void rxrpc_UDP_error_report(struct sock *sk)
_enter("%p{%d}", sk, local->debug_id);
- skb = skb_dequeue(&sk->sk_error_queue);
+ skb = sock_dequeue_err_skb(sk);
if (!skb) {
_leave("UDP socket errqueue empty");
return;
@@ -111,18 +111,6 @@ void rxrpc_UDP_error_report(struct sock *sk)
skb_queue_tail(&trans->error_queue, skb);
rxrpc_queue_work(&trans->error_handler);
- /* reset and regenerate socket error */
- spin_lock_bh(&sk->sk_error_queue.lock);
- sk->sk_err = 0;
- skb = skb_peek(&sk->sk_error_queue);
- if (skb) {
- sk->sk_err = SKB_EXT_ERR(skb)->ee.ee_errno;
- spin_unlock_bh(&sk->sk_error_queue.lock);
- sk->sk_error_report(sk);
- } else {
- spin_unlock_bh(&sk->sk_error_queue.lock);
- }
-
_leave("");
}
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
index 63b21e580de9..481f89f93789 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/ar-input.c
@@ -45,7 +45,7 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb,
struct rxrpc_skb_priv *sp;
struct rxrpc_sock *rx = call->socket;
struct sock *sk;
- int skb_len, ret;
+ int ret;
_enter(",,%d,%d", force, terminal);
@@ -101,13 +101,6 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb,
rx->interceptor(sk, call->user_call_ID, skb);
spin_unlock_bh(&sk->sk_receive_queue.lock);
} else {
-
- /* Cache the SKB length before we tack it onto the
- * receive queue. Once it is added it no longer
- * belongs to us and may be freed by other threads of
- * control pulling packets from the queue */
- skb_len = skb->len;
-
_net("post skb %p", skb);
__skb_queue_tail(&sk->sk_receive_queue, skb);
spin_unlock_bh(&sk->sk_receive_queue.lock);
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index 10c6cb694b43..db0f39f5ef96 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -348,7 +348,7 @@ static int rxrpc_krb5_decode_tagged_array(struct krb5_tagged_data **_td,
n_elem = ntohl(*xdr++);
toklen -= 4;
- if (n_elem < 0 || n_elem > max_n_elem)
+ if (n_elem > max_n_elem)
return -EINVAL;
*_n_elem = n_elem;
if (n_elem > 0) {
@@ -1141,7 +1141,7 @@ static long rxrpc_read(const struct key *key,
if (copy_to_user(xdr, (s), _l) != 0) \
goto fault; \
if (_l & 3 && \
- copy_to_user((u8 *)xdr + _l, &zero, 4 - (_l & 3)) != 0) \
+ copy_to_user((u8 __user *)xdr + _l, &zero, 4 - (_l & 3)) != 0) \
goto fault; \
xdr += (_l + 3) >> 2; \
} while(0)
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 648778aef1a2..3d43e4979f27 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -252,7 +252,8 @@ int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a,
p->tcfc_tm.install = jiffies;
p->tcfc_tm.lastuse = jiffies;
if (est) {
- int err = gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est,
+ int err = gen_new_estimator(&p->tcfc_bstats, NULL,
+ &p->tcfc_rate_est,
&p->tcfc_lock, est);
if (err) {
kfree(p);
@@ -619,10 +620,12 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
if (err < 0)
goto errout;
- if (gnet_stats_copy_basic(&d, &p->tcfc_bstats) < 0 ||
+ if (gnet_stats_copy_basic(&d, NULL, &p->tcfc_bstats) < 0 ||
gnet_stats_copy_rate_est(&d, &p->tcfc_bstats,
&p->tcfc_rate_est) < 0 ||
- gnet_stats_copy_queue(&d, &p->tcfc_qstats) < 0)
+ gnet_stats_copy_queue(&d, NULL,
+ &p->tcfc_qstats,
+ p->tcfc_qstats.qlen) < 0)
goto errout;
if (gnet_stats_finish_copy(&d) < 0)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 4f912c0e225b..eb48306033d9 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -218,10 +218,12 @@ static int mirred_device_event(struct notifier_block *unused,
if (event == NETDEV_UNREGISTER)
list_for_each_entry(m, &mirred_list, tcfm_list) {
+ spin_lock_bh(&m->tcf_lock);
if (m->tcfm_dev == dev) {
dev_put(dev);
m->tcfm_dev = NULL;
}
+ spin_unlock_bh(&m->tcf_lock);
}
return NOTIFY_DONE;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 0566e4606a4a..69791ca77a05 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -178,7 +178,7 @@ override:
spin_lock_bh(&police->tcf_lock);
if (est) {
- err = gen_replace_estimator(&police->tcf_bstats,
+ err = gen_replace_estimator(&police->tcf_bstats, NULL,
&police->tcf_rate_est,
&police->tcf_lock, est);
if (err)
@@ -231,7 +231,7 @@ override:
if (ret != ACT_P_CREATED)
return ret;
- police->tcfp_t_c = ktime_to_ns(ktime_get());
+ police->tcfp_t_c = ktime_get_ns();
police->tcf_index = parm->index ? parm->index :
tcf_hash_new_index(hinfo);
h = tcf_hash(police->tcf_index, POL_TAB_MASK);
@@ -279,7 +279,7 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a,
return police->tcfp_result;
}
- now = ktime_to_ns(ktime_get());
+ now = ktime_get_ns();
toks = min_t(s64, now - police->tcfp_t_c,
police->tcfp_burst);
if (police->peak_present) {
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 45527e6b52db..aad6a679fb13 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -117,7 +117,6 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_MAX + 1];
- spinlock_t *root_lock;
struct tcmsg *t;
u32 protocol;
u32 prio;
@@ -125,7 +124,8 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
u32 parent;
struct net_device *dev;
struct Qdisc *q;
- struct tcf_proto **back, **chain;
+ struct tcf_proto __rcu **back;
+ struct tcf_proto __rcu **chain;
struct tcf_proto *tp;
const struct tcf_proto_ops *tp_ops;
const struct Qdisc_class_ops *cops;
@@ -197,7 +197,9 @@ replay:
goto errout;
/* Check the chain for existence of proto-tcf with this priority */
- for (back = chain; (tp = *back) != NULL; back = &tp->next) {
+ for (back = chain;
+ (tp = rtnl_dereference(*back)) != NULL;
+ back = &tp->next) {
if (tp->prio >= prio) {
if (tp->prio == prio) {
if (!nprio ||
@@ -209,8 +211,6 @@ replay:
}
}
- root_lock = qdisc_root_sleeping_lock(q);
-
if (tp == NULL) {
/* Proto-tcf does not exist, create new one */
@@ -259,7 +259,8 @@ replay:
}
tp->ops = tp_ops;
tp->protocol = protocol;
- tp->prio = nprio ? : TC_H_MAJ(tcf_auto_prio(*back));
+ tp->prio = nprio ? :
+ TC_H_MAJ(tcf_auto_prio(rtnl_dereference(*back)));
tp->q = q;
tp->classify = tp_ops->classify;
tp->classid = parent;
@@ -280,9 +281,9 @@ replay:
if (fh == 0) {
if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
- spin_lock_bh(root_lock);
- *back = tp->next;
- spin_unlock_bh(root_lock);
+ struct tcf_proto *next = rtnl_dereference(tp->next);
+
+ RCU_INIT_POINTER(*back, next);
tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
tcf_destroy(tp);
@@ -322,10 +323,8 @@ replay:
n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE);
if (err == 0) {
if (tp_created) {
- spin_lock_bh(root_lock);
- tp->next = *back;
- *back = tp;
- spin_unlock_bh(root_lock);
+ RCU_INIT_POINTER(tp->next, rtnl_dereference(*back));
+ rcu_assign_pointer(*back, tp);
}
tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER);
} else {
@@ -420,7 +419,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
int s_t;
struct net_device *dev;
struct Qdisc *q;
- struct tcf_proto *tp, **chain;
+ struct tcf_proto *tp, __rcu **chain;
struct tcmsg *tcm = nlmsg_data(cb->nlh);
unsigned long cl = 0;
const struct Qdisc_class_ops *cops;
@@ -454,7 +453,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
s_t = cb->args[0];
- for (tp = *chain, t = 0; tp; tp = tp->next, t++) {
+ for (tp = rtnl_dereference(*chain), t = 0;
+ tp; tp = rtnl_dereference(tp->next), t++) {
if (t < s_t)
continue;
if (TC_H_MAJ(tcm->tcm_info) &&
@@ -496,7 +496,7 @@ out:
return skb->len;
}
-void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts)
+void tcf_exts_destroy(struct tcf_exts *exts)
{
#ifdef CONFIG_NET_CLS_ACT
tcf_action_destroy(&exts->actions, TCA_ACT_UNBIND);
@@ -549,6 +549,7 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
tcf_tree_lock(tp);
list_splice_init(&dst->actions, &tmp);
list_splice(&src->actions, &dst->actions);
+ dst->type = src->type;
tcf_tree_unlock(tp);
tcf_action_destroy(&tmp, TCA_ACT_UNBIND);
#endif
@@ -561,13 +562,14 @@ EXPORT_SYMBOL(tcf_exts_change);
int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
{
#ifdef CONFIG_NET_CLS_ACT
+ struct nlattr *nest;
+
if (exts->action && !list_empty(&exts->actions)) {
/*
* again for backward compatible mode - we want
* to work with both old and new modes of entering
* tc data even if iproute2 was newer - jhs
*/
- struct nlattr *nest;
if (exts->type != TCA_OLD_COMPAT) {
nest = nla_nest_start(skb, exts->action);
if (nest == NULL)
@@ -585,10 +587,14 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
nla_nest_end(skb, nest);
}
}
-#endif
return 0;
-nla_put_failure: __attribute__ ((unused))
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
return -1;
+#else
+ return 0;
+#endif
}
EXPORT_SYMBOL(tcf_exts_dump);
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 0ae1813e3e90..cd61280941e5 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -24,6 +24,7 @@
struct basic_head {
u32 hgenerator;
struct list_head flist;
+ struct rcu_head rcu;
};
struct basic_filter {
@@ -31,17 +32,19 @@ struct basic_filter {
struct tcf_exts exts;
struct tcf_ematch_tree ematches;
struct tcf_result res;
+ struct tcf_proto *tp;
struct list_head link;
+ struct rcu_head rcu;
};
static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
int r;
- struct basic_head *head = tp->root;
+ struct basic_head *head = rcu_dereference_bh(tp->root);
struct basic_filter *f;
- list_for_each_entry(f, &head->flist, link) {
+ list_for_each_entry_rcu(f, &head->flist, link) {
if (!tcf_em_tree_match(skb, &f->ematches, NULL))
continue;
*res = f->res;
@@ -56,7 +59,7 @@ static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
static unsigned long basic_get(struct tcf_proto *tp, u32 handle)
{
unsigned long l = 0UL;
- struct basic_head *head = tp->root;
+ struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f;
if (head == NULL)
@@ -81,41 +84,43 @@ static int basic_init(struct tcf_proto *tp)
if (head == NULL)
return -ENOBUFS;
INIT_LIST_HEAD(&head->flist);
- tp->root = head;
+ rcu_assign_pointer(tp->root, head);
return 0;
}
-static void basic_delete_filter(struct tcf_proto *tp, struct basic_filter *f)
+static void basic_delete_filter(struct rcu_head *head)
{
- tcf_unbind_filter(tp, &f->res);
- tcf_exts_destroy(tp, &f->exts);
- tcf_em_tree_destroy(tp, &f->ematches);
+ struct basic_filter *f = container_of(head, struct basic_filter, rcu);
+
+ tcf_exts_destroy(&f->exts);
+ tcf_em_tree_destroy(&f->ematches);
kfree(f);
}
static void basic_destroy(struct tcf_proto *tp)
{
- struct basic_head *head = tp->root;
+ struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f, *n;
list_for_each_entry_safe(f, n, &head->flist, link) {
- list_del(&f->link);
- basic_delete_filter(tp, f);
+ list_del_rcu(&f->link);
+ tcf_unbind_filter(tp, &f->res);
+ call_rcu(&f->rcu, basic_delete_filter);
}
- kfree(head);
+ RCU_INIT_POINTER(tp->root, NULL);
+ kfree_rcu(head, rcu);
}
static int basic_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct basic_head *head = tp->root;
+ struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *t, *f = (struct basic_filter *) arg;
list_for_each_entry(t, &head->flist, link)
if (t == f) {
- tcf_tree_lock(tp);
- list_del(&t->link);
- tcf_tree_unlock(tp);
- basic_delete_filter(tp, t);
+ list_del_rcu(&t->link);
+ tcf_unbind_filter(tp, &t->res);
+ call_rcu(&t->rcu, basic_delete_filter);
return 0;
}
@@ -152,10 +157,11 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
tcf_exts_change(tp, &f->exts, &e);
tcf_em_tree_change(tp, &f->ematches, &t);
+ f->tp = tp;
return 0;
errout:
- tcf_exts_destroy(tp, &e);
+ tcf_exts_destroy(&e);
return err;
}
@@ -164,9 +170,10 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
struct nlattr **tca, unsigned long *arg, bool ovr)
{
int err;
- struct basic_head *head = tp->root;
+ struct basic_head *head = rtnl_dereference(tp->root);
struct nlattr *tb[TCA_BASIC_MAX + 1];
- struct basic_filter *f = (struct basic_filter *) *arg;
+ struct basic_filter *fold = (struct basic_filter *) *arg;
+ struct basic_filter *fnew;
if (tca[TCA_OPTIONS] == NULL)
return -EINVAL;
@@ -176,22 +183,23 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
return err;
- if (f != NULL) {
- if (handle && f->handle != handle)
+ if (fold != NULL) {
+ if (handle && fold->handle != handle)
return -EINVAL;
- return basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr);
}
err = -ENOBUFS;
- f = kzalloc(sizeof(*f), GFP_KERNEL);
- if (f == NULL)
+ fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
+ if (fnew == NULL)
goto errout;
- tcf_exts_init(&f->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE);
+ tcf_exts_init(&fnew->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE);
err = -EINVAL;
- if (handle)
- f->handle = handle;
- else {
+ if (handle) {
+ fnew->handle = handle;
+ } else if (fold) {
+ fnew->handle = fold->handle;
+ } else {
unsigned int i = 0x80000000;
do {
if (++head->hgenerator == 0x7FFFFFFF)
@@ -203,29 +211,32 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
goto errout;
}
- f->handle = head->hgenerator;
+ fnew->handle = head->hgenerator;
}
- err = basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr);
+ err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr);
if (err < 0)
goto errout;
- tcf_tree_lock(tp);
- list_add(&f->link, &head->flist);
- tcf_tree_unlock(tp);
- *arg = (unsigned long) f;
+ *arg = (unsigned long)fnew;
+
+ if (fold) {
+ list_replace_rcu(&fold->link, &fnew->link);
+ tcf_unbind_filter(tp, &fold->res);
+ call_rcu(&fold->rcu, basic_delete_filter);
+ } else {
+ list_add_rcu(&fnew->link, &head->flist);
+ }
return 0;
errout:
- if (*arg == 0UL && f)
- kfree(f);
-
+ kfree(fnew);
return err;
}
static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
- struct basic_head *head = tp->root;
+ struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f;
list_for_each_entry(f, &head->flist, link) {
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 13f64df2c710..eed49d1d0878 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -27,16 +27,19 @@ MODULE_DESCRIPTION("TC BPF based classifier");
struct cls_bpf_head {
struct list_head plist;
u32 hgen;
+ struct rcu_head rcu;
};
struct cls_bpf_prog {
- struct sk_filter *filter;
+ struct bpf_prog *filter;
struct sock_filter *bpf_ops;
struct tcf_exts exts;
struct tcf_result res;
struct list_head link;
u32 handle;
u16 bpf_len;
+ struct tcf_proto *tp;
+ struct rcu_head rcu;
};
static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
@@ -49,12 +52,12 @@ static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_head *head = rcu_dereference_bh(tp->root);
struct cls_bpf_prog *prog;
int ret;
- list_for_each_entry(prog, &head->plist, link) {
- int filter_res = SK_RUN_FILTER(prog->filter, skb);
+ list_for_each_entry_rcu(prog, &head->plist, link) {
+ int filter_res = BPF_PROG_RUN(prog->filter, skb);
if (filter_res == 0)
continue;
@@ -81,35 +84,39 @@ static int cls_bpf_init(struct tcf_proto *tp)
if (head == NULL)
return -ENOBUFS;
- INIT_LIST_HEAD(&head->plist);
- tp->root = head;
+ INIT_LIST_HEAD_RCU(&head->plist);
+ rcu_assign_pointer(tp->root, head);
return 0;
}
static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog)
{
- tcf_unbind_filter(tp, &prog->res);
- tcf_exts_destroy(tp, &prog->exts);
+ tcf_exts_destroy(&prog->exts);
- sk_unattached_filter_destroy(prog->filter);
+ bpf_prog_destroy(prog->filter);
kfree(prog->bpf_ops);
kfree(prog);
}
+static void __cls_bpf_delete_prog(struct rcu_head *rcu)
+{
+ struct cls_bpf_prog *prog = container_of(rcu, struct cls_bpf_prog, rcu);
+
+ cls_bpf_delete_prog(prog->tp, prog);
+}
+
static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *prog, *todel = (struct cls_bpf_prog *) arg;
list_for_each_entry(prog, &head->plist, link) {
if (prog == todel) {
- tcf_tree_lock(tp);
- list_del(&prog->link);
- tcf_tree_unlock(tp);
-
- cls_bpf_delete_prog(tp, prog);
+ list_del_rcu(&prog->link);
+ tcf_unbind_filter(tp, &prog->res);
+ call_rcu(&prog->rcu, __cls_bpf_delete_prog);
return 0;
}
}
@@ -119,27 +126,29 @@ static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
static void cls_bpf_destroy(struct tcf_proto *tp)
{
- struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *prog, *tmp;
list_for_each_entry_safe(prog, tmp, &head->plist, link) {
- list_del(&prog->link);
- cls_bpf_delete_prog(tp, prog);
+ list_del_rcu(&prog->link);
+ tcf_unbind_filter(tp, &prog->res);
+ call_rcu(&prog->rcu, __cls_bpf_delete_prog);
}
- kfree(head);
+ RCU_INIT_POINTER(tp->root, NULL);
+ kfree_rcu(head, rcu);
}
static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
{
- struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *prog;
unsigned long ret = 0UL;
if (head == NULL)
return 0UL;
- list_for_each_entry(prog, &head->plist, link) {
+ list_for_each_entry_rcu(prog, &head->plist, link) {
if (prog->handle == handle) {
ret = (unsigned long) prog;
break;
@@ -158,10 +167,10 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
unsigned long base, struct nlattr **tb,
struct nlattr *est, bool ovr)
{
- struct sock_filter *bpf_ops, *bpf_old;
+ struct sock_filter *bpf_ops;
struct tcf_exts exts;
struct sock_fprog_kern tmp;
- struct sk_filter *fp, *fp_old;
+ struct bpf_prog *fp;
u16 bpf_size, bpf_len;
u32 classid;
int ret;
@@ -193,34 +202,23 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
tmp.len = bpf_len;
tmp.filter = bpf_ops;
- ret = sk_unattached_filter_create(&fp, &tmp);
+ ret = bpf_prog_create(&fp, &tmp);
if (ret)
goto errout_free;
- tcf_tree_lock(tp);
- fp_old = prog->filter;
- bpf_old = prog->bpf_ops;
-
prog->bpf_len = bpf_len;
prog->bpf_ops = bpf_ops;
prog->filter = fp;
prog->res.classid = classid;
- tcf_tree_unlock(tp);
tcf_bind_filter(tp, &prog->res, base);
tcf_exts_change(tp, &prog->exts, &exts);
- if (fp_old)
- sk_unattached_filter_destroy(fp_old);
- if (bpf_old)
- kfree(bpf_old);
-
return 0;
-
errout_free:
kfree(bpf_ops);
errout:
- tcf_exts_destroy(tp, &exts);
+ tcf_exts_destroy(&exts);
return ret;
}
@@ -244,9 +242,10 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
u32 handle, struct nlattr **tca,
unsigned long *arg, bool ovr)
{
- struct cls_bpf_head *head = tp->root;
- struct cls_bpf_prog *prog = (struct cls_bpf_prog *) *arg;
+ struct cls_bpf_head *head = rtnl_dereference(tp->root);
+ struct cls_bpf_prog *oldprog = (struct cls_bpf_prog *) *arg;
struct nlattr *tb[TCA_BPF_MAX + 1];
+ struct cls_bpf_prog *prog;
int ret;
if (tca[TCA_OPTIONS] == NULL)
@@ -256,18 +255,19 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
if (ret < 0)
return ret;
- if (prog != NULL) {
- if (handle && prog->handle != handle)
- return -EINVAL;
- return cls_bpf_modify_existing(net, tp, prog, base, tb,
- tca[TCA_RATE], ovr);
- }
-
prog = kzalloc(sizeof(*prog), GFP_KERNEL);
- if (prog == NULL)
+ if (!prog)
return -ENOBUFS;
tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+
+ if (oldprog) {
+ if (handle && oldprog->handle != handle) {
+ ret = -EINVAL;
+ goto errout;
+ }
+ }
+
if (handle == 0)
prog->handle = cls_bpf_grab_new_handle(tp, head);
else
@@ -281,16 +281,18 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
if (ret < 0)
goto errout;
- tcf_tree_lock(tp);
- list_add(&prog->link, &head->plist);
- tcf_tree_unlock(tp);
+ if (oldprog) {
+ list_replace_rcu(&prog->link, &oldprog->link);
+ tcf_unbind_filter(tp, &oldprog->res);
+ call_rcu(&oldprog->rcu, __cls_bpf_delete_prog);
+ } else {
+ list_add_rcu(&prog->link, &head->plist);
+ }
*arg = (unsigned long) prog;
-
return 0;
errout:
- if (*arg == 0UL && prog)
- kfree(prog);
+ kfree(prog);
return ret;
}
@@ -339,10 +341,10 @@ nla_put_failure:
static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
- struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *prog;
- list_for_each_entry(prog, &head->plist, link) {
+ list_for_each_entry_rcu(prog, &head->plist, link) {
if (arg->count < arg->skip)
goto skip;
if (arg->fn(tp, (unsigned long) prog, arg) < 0) {
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index cacf01bd04f0..d61a801222c1 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -22,17 +22,17 @@ struct cls_cgroup_head {
u32 handle;
struct tcf_exts exts;
struct tcf_ematch_tree ematches;
+ struct tcf_proto *tp;
+ struct rcu_head rcu;
};
static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- struct cls_cgroup_head *head = tp->root;
+ struct cls_cgroup_head *head = rcu_dereference_bh(tp->root);
u32 classid;
- rcu_read_lock();
classid = task_cls_state(current)->classid;
- rcu_read_unlock();
/*
* Due to the nature of the classifier it is required to ignore all
@@ -80,13 +80,25 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
[TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED },
};
+static void cls_cgroup_destroy_rcu(struct rcu_head *root)
+{
+ struct cls_cgroup_head *head = container_of(root,
+ struct cls_cgroup_head,
+ rcu);
+
+ tcf_exts_destroy(&head->exts);
+ tcf_em_tree_destroy(&head->ematches);
+ kfree(head);
+}
+
static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
unsigned long *arg, bool ovr)
{
struct nlattr *tb[TCA_CGROUP_MAX + 1];
- struct cls_cgroup_head *head = tp->root;
+ struct cls_cgroup_head *head = rtnl_dereference(tp->root);
+ struct cls_cgroup_head *new;
struct tcf_ematch_tree t;
struct tcf_exts e;
int err;
@@ -94,53 +106,58 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
if (!tca[TCA_OPTIONS])
return -EINVAL;
- if (head == NULL) {
- if (!handle)
- return -EINVAL;
-
- head = kzalloc(sizeof(*head), GFP_KERNEL);
- if (head == NULL)
- return -ENOBUFS;
+ if (!head && !handle)
+ return -EINVAL;
- tcf_exts_init(&head->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
- head->handle = handle;
+ if (head && handle != head->handle)
+ return -ENOENT;
- tcf_tree_lock(tp);
- tp->root = head;
- tcf_tree_unlock(tp);
- }
+ new = kzalloc(sizeof(*head), GFP_KERNEL);
+ if (!new)
+ return -ENOBUFS;
- if (handle != head->handle)
- return -ENOENT;
+ tcf_exts_init(&new->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
+ if (head)
+ new->handle = head->handle;
+ else
+ new->handle = handle;
+ new->tp = tp;
err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS],
cgroup_policy);
if (err < 0)
- return err;
+ goto errout;
tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
if (err < 0)
- return err;
+ goto errout;
err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t);
- if (err < 0)
- return err;
+ if (err < 0) {
+ tcf_exts_destroy(&e);
+ goto errout;
+ }
- tcf_exts_change(tp, &head->exts, &e);
- tcf_em_tree_change(tp, &head->ematches, &t);
+ tcf_exts_change(tp, &new->exts, &e);
+ tcf_em_tree_change(tp, &new->ematches, &t);
+ rcu_assign_pointer(tp->root, new);
+ if (head)
+ call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
return 0;
+errout:
+ kfree(new);
+ return err;
}
static void cls_cgroup_destroy(struct tcf_proto *tp)
{
- struct cls_cgroup_head *head = tp->root;
+ struct cls_cgroup_head *head = rtnl_dereference(tp->root);
if (head) {
- tcf_exts_destroy(tp, &head->exts);
- tcf_em_tree_destroy(tp, &head->ematches);
- kfree(head);
+ RCU_INIT_POINTER(tp->root, NULL);
+ call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
}
}
@@ -151,7 +168,7 @@ static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg)
static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
- struct cls_cgroup_head *head = tp->root;
+ struct cls_cgroup_head *head = rtnl_dereference(tp->root);
if (arg->count < arg->skip)
goto skip;
@@ -167,7 +184,7 @@ skip:
static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
- struct cls_cgroup_head *head = tp->root;
+ struct cls_cgroup_head *head = rtnl_dereference(tp->root);
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 35be16f7c192..4ac515f2a6ce 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -34,12 +34,14 @@
struct flow_head {
struct list_head filters;
+ struct rcu_head rcu;
};
struct flow_filter {
struct list_head list;
struct tcf_exts exts;
struct tcf_ematch_tree ematches;
+ struct tcf_proto *tp;
struct timer_list perturb_timer;
u32 perturb_period;
u32 handle;
@@ -54,6 +56,7 @@ struct flow_filter {
u32 divisor;
u32 baseclass;
u32 hashrnd;
+ struct rcu_head rcu;
};
static inline u32 addr_fold(void *addr)
@@ -276,14 +279,14 @@ static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow)
static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- struct flow_head *head = tp->root;
+ struct flow_head *head = rcu_dereference_bh(tp->root);
struct flow_filter *f;
u32 keymask;
u32 classid;
unsigned int n, key;
int r;
- list_for_each_entry(f, &head->filters, list) {
+ list_for_each_entry_rcu(f, &head->filters, list) {
u32 keys[FLOW_KEY_MAX + 1];
struct flow_keys flow_keys;
@@ -346,13 +349,23 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
[TCA_FLOW_PERTURB] = { .type = NLA_U32 },
};
+static void flow_destroy_filter(struct rcu_head *head)
+{
+ struct flow_filter *f = container_of(head, struct flow_filter, rcu);
+
+ del_timer_sync(&f->perturb_timer);
+ tcf_exts_destroy(&f->exts);
+ tcf_em_tree_destroy(&f->ematches);
+ kfree(f);
+}
+
static int flow_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
unsigned long *arg, bool ovr)
{
- struct flow_head *head = tp->root;
- struct flow_filter *f;
+ struct flow_head *head = rtnl_dereference(tp->root);
+ struct flow_filter *fold, *fnew;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_FLOW_MAX + 1];
struct tcf_exts e;
@@ -401,20 +414,42 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
goto err1;
- f = (struct flow_filter *)*arg;
- if (f != NULL) {
+ err = -ENOBUFS;
+ fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
+ if (!fnew)
+ goto err2;
+
+ fold = (struct flow_filter *)*arg;
+ if (fold) {
err = -EINVAL;
- if (f->handle != handle && handle)
+ if (fold->handle != handle && handle)
goto err2;
- mode = f->mode;
+ /* Copy fold into fnew */
+ fnew->handle = fold->handle;
+ fnew->keymask = fold->keymask;
+ fnew->tp = fold->tp;
+
+ fnew->handle = fold->handle;
+ fnew->nkeys = fold->nkeys;
+ fnew->keymask = fold->keymask;
+ fnew->mode = fold->mode;
+ fnew->mask = fold->mask;
+ fnew->xor = fold->xor;
+ fnew->rshift = fold->rshift;
+ fnew->addend = fold->addend;
+ fnew->divisor = fold->divisor;
+ fnew->baseclass = fold->baseclass;
+ fnew->hashrnd = fold->hashrnd;
+
+ mode = fold->mode;
if (tb[TCA_FLOW_MODE])
mode = nla_get_u32(tb[TCA_FLOW_MODE]);
if (mode != FLOW_MODE_HASH && nkeys > 1)
goto err2;
if (mode == FLOW_MODE_HASH)
- perturb_period = f->perturb_period;
+ perturb_period = fold->perturb_period;
if (tb[TCA_FLOW_PERTURB]) {
if (mode != FLOW_MODE_HASH)
goto err2;
@@ -444,83 +479,72 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
if (TC_H_MIN(baseclass) == 0)
baseclass = TC_H_MAKE(baseclass, 1);
- err = -ENOBUFS;
- f = kzalloc(sizeof(*f), GFP_KERNEL);
- if (f == NULL)
- goto err2;
-
- f->handle = handle;
- f->mask = ~0U;
- tcf_exts_init(&f->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
-
- get_random_bytes(&f->hashrnd, 4);
- f->perturb_timer.function = flow_perturbation;
- f->perturb_timer.data = (unsigned long)f;
- init_timer_deferrable(&f->perturb_timer);
+ fnew->handle = handle;
+ fnew->mask = ~0U;
+ fnew->tp = tp;
+ get_random_bytes(&fnew->hashrnd, 4);
+ tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
}
- tcf_exts_change(tp, &f->exts, &e);
- tcf_em_tree_change(tp, &f->ematches, &t);
+ fnew->perturb_timer.function = flow_perturbation;
+ fnew->perturb_timer.data = (unsigned long)fnew;
+ init_timer_deferrable(&fnew->perturb_timer);
- tcf_tree_lock(tp);
+ tcf_exts_change(tp, &fnew->exts, &e);
+ tcf_em_tree_change(tp, &fnew->ematches, &t);
+
+ netif_keep_dst(qdisc_dev(tp->q));
if (tb[TCA_FLOW_KEYS]) {
- f->keymask = keymask;
- f->nkeys = nkeys;
+ fnew->keymask = keymask;
+ fnew->nkeys = nkeys;
}
- f->mode = mode;
+ fnew->mode = mode;
if (tb[TCA_FLOW_MASK])
- f->mask = nla_get_u32(tb[TCA_FLOW_MASK]);
+ fnew->mask = nla_get_u32(tb[TCA_FLOW_MASK]);
if (tb[TCA_FLOW_XOR])
- f->xor = nla_get_u32(tb[TCA_FLOW_XOR]);
+ fnew->xor = nla_get_u32(tb[TCA_FLOW_XOR]);
if (tb[TCA_FLOW_RSHIFT])
- f->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]);
+ fnew->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]);
if (tb[TCA_FLOW_ADDEND])
- f->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]);
+ fnew->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]);
if (tb[TCA_FLOW_DIVISOR])
- f->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]);
+ fnew->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]);
if (baseclass)
- f->baseclass = baseclass;
+ fnew->baseclass = baseclass;
- f->perturb_period = perturb_period;
- del_timer(&f->perturb_timer);
+ fnew->perturb_period = perturb_period;
if (perturb_period)
- mod_timer(&f->perturb_timer, jiffies + perturb_period);
+ mod_timer(&fnew->perturb_timer, jiffies + perturb_period);
if (*arg == 0)
- list_add_tail(&f->list, &head->filters);
+ list_add_tail_rcu(&fnew->list, &head->filters);
+ else
+ list_replace_rcu(&fnew->list, &fold->list);
- tcf_tree_unlock(tp);
+ *arg = (unsigned long)fnew;
- *arg = (unsigned long)f;
+ if (fold)
+ call_rcu(&fold->rcu, flow_destroy_filter);
return 0;
err2:
- tcf_em_tree_destroy(tp, &t);
+ tcf_em_tree_destroy(&t);
+ kfree(fnew);
err1:
- tcf_exts_destroy(tp, &e);
+ tcf_exts_destroy(&e);
return err;
}
-static void flow_destroy_filter(struct tcf_proto *tp, struct flow_filter *f)
-{
- del_timer_sync(&f->perturb_timer);
- tcf_exts_destroy(tp, &f->exts);
- tcf_em_tree_destroy(tp, &f->ematches);
- kfree(f);
-}
-
static int flow_delete(struct tcf_proto *tp, unsigned long arg)
{
struct flow_filter *f = (struct flow_filter *)arg;
- tcf_tree_lock(tp);
- list_del(&f->list);
- tcf_tree_unlock(tp);
- flow_destroy_filter(tp, f);
+ list_del_rcu(&f->list);
+ call_rcu(&f->rcu, flow_destroy_filter);
return 0;
}
@@ -532,28 +556,29 @@ static int flow_init(struct tcf_proto *tp)
if (head == NULL)
return -ENOBUFS;
INIT_LIST_HEAD(&head->filters);
- tp->root = head;
+ rcu_assign_pointer(tp->root, head);
return 0;
}
static void flow_destroy(struct tcf_proto *tp)
{
- struct flow_head *head = tp->root;
+ struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f, *next;
list_for_each_entry_safe(f, next, &head->filters, list) {
- list_del(&f->list);
- flow_destroy_filter(tp, f);
+ list_del_rcu(&f->list);
+ call_rcu(&f->rcu, flow_destroy_filter);
}
- kfree(head);
+ RCU_INIT_POINTER(tp->root, NULL);
+ kfree_rcu(head, rcu);
}
static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
{
- struct flow_head *head = tp->root;
+ struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f;
- list_for_each_entry(f, &head->filters, list)
+ list_for_each_entry_rcu(f, &head->filters, list)
if (f->handle == handle)
return (unsigned long)f;
return 0;
@@ -626,10 +651,10 @@ nla_put_failure:
static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
- struct flow_head *head = tp->root;
+ struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f;
- list_for_each_entry(f, &head->filters, list) {
+ list_for_each_entry_rcu(f, &head->filters, list) {
if (arg->count < arg->skip)
goto skip;
if (arg->fn(tp, (unsigned long)f, arg) < 0) {
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 861b03ccfed0..dbfdfd1f1a9f 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -33,17 +33,20 @@
struct fw_head {
u32 mask;
- struct fw_filter *ht[HTSIZE];
+ struct fw_filter __rcu *ht[HTSIZE];
+ struct rcu_head rcu;
};
struct fw_filter {
- struct fw_filter *next;
+ struct fw_filter __rcu *next;
u32 id;
struct tcf_result res;
#ifdef CONFIG_NET_CLS_IND
int ifindex;
#endif /* CONFIG_NET_CLS_IND */
struct tcf_exts exts;
+ struct tcf_proto *tp;
+ struct rcu_head rcu;
};
static u32 fw_hash(u32 handle)
@@ -56,14 +59,16 @@ static u32 fw_hash(u32 handle)
static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- struct fw_head *head = tp->root;
+ struct fw_head *head = rcu_dereference_bh(tp->root);
struct fw_filter *f;
int r;
u32 id = skb->mark;
if (head != NULL) {
id &= head->mask;
- for (f = head->ht[fw_hash(id)]; f; f = f->next) {
+
+ for (f = rcu_dereference_bh(head->ht[fw_hash(id)]); f;
+ f = rcu_dereference_bh(f->next)) {
if (f->id == id) {
*res = f->res;
#ifdef CONFIG_NET_CLS_IND
@@ -92,13 +97,14 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
static unsigned long fw_get(struct tcf_proto *tp, u32 handle)
{
- struct fw_head *head = tp->root;
+ struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f;
if (head == NULL)
return 0;
- for (f = head->ht[fw_hash(handle)]; f; f = f->next) {
+ f = rtnl_dereference(head->ht[fw_hash(handle)]);
+ for (; f; f = rtnl_dereference(f->next)) {
if (f->id == handle)
return (unsigned long)f;
}
@@ -114,16 +120,17 @@ static int fw_init(struct tcf_proto *tp)
return 0;
}
-static void fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
+static void fw_delete_filter(struct rcu_head *head)
{
- tcf_unbind_filter(tp, &f->res);
- tcf_exts_destroy(tp, &f->exts);
+ struct fw_filter *f = container_of(head, struct fw_filter, rcu);
+
+ tcf_exts_destroy(&f->exts);
kfree(f);
}
static void fw_destroy(struct tcf_proto *tp)
{
- struct fw_head *head = tp->root;
+ struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f;
int h;
@@ -131,29 +138,35 @@ static void fw_destroy(struct tcf_proto *tp)
return;
for (h = 0; h < HTSIZE; h++) {
- while ((f = head->ht[h]) != NULL) {
- head->ht[h] = f->next;
- fw_delete_filter(tp, f);
+ while ((f = rtnl_dereference(head->ht[h])) != NULL) {
+ RCU_INIT_POINTER(head->ht[h],
+ rtnl_dereference(f->next));
+ tcf_unbind_filter(tp, &f->res);
+ call_rcu(&f->rcu, fw_delete_filter);
}
}
- kfree(head);
+ RCU_INIT_POINTER(tp->root, NULL);
+ kfree_rcu(head, rcu);
}
static int fw_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct fw_head *head = tp->root;
+ struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = (struct fw_filter *)arg;
- struct fw_filter **fp;
+ struct fw_filter __rcu **fp;
+ struct fw_filter *pfp;
if (head == NULL || f == NULL)
goto out;
- for (fp = &head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) {
- if (*fp == f) {
- tcf_tree_lock(tp);
- *fp = f->next;
- tcf_tree_unlock(tp);
- fw_delete_filter(tp, f);
+ fp = &head->ht[fw_hash(f->id)];
+
+ for (pfp = rtnl_dereference(*fp); pfp;
+ fp = &pfp->next, pfp = rtnl_dereference(*fp)) {
+ if (pfp == f) {
+ RCU_INIT_POINTER(*fp, rtnl_dereference(f->next));
+ tcf_unbind_filter(tp, &f->res);
+ call_rcu(&f->rcu, fw_delete_filter);
return 0;
}
}
@@ -171,7 +184,7 @@ static int
fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
struct nlattr **tb, struct nlattr **tca, unsigned long base, bool ovr)
{
- struct fw_head *head = tp->root;
+ struct fw_head *head = rtnl_dereference(tp->root);
struct tcf_exts e;
u32 mask;
int err;
@@ -210,7 +223,7 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
return 0;
errout:
- tcf_exts_destroy(tp, &e);
+ tcf_exts_destroy(&e);
return err;
}
@@ -220,7 +233,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
struct nlattr **tca,
unsigned long *arg, bool ovr)
{
- struct fw_head *head = tp->root;
+ struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = (struct fw_filter *) *arg;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_FW_MAX + 1];
@@ -233,10 +246,45 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
return err;
- if (f != NULL) {
+ if (f) {
+ struct fw_filter *pfp, *fnew;
+ struct fw_filter __rcu **fp;
+
if (f->id != handle && handle)
return -EINVAL;
- return fw_change_attrs(net, tp, f, tb, tca, base, ovr);
+
+ fnew = kzalloc(sizeof(struct fw_filter), GFP_KERNEL);
+ if (!fnew)
+ return -ENOBUFS;
+
+ fnew->id = f->id;
+ fnew->res = f->res;
+#ifdef CONFIG_NET_CLS_IND
+ fnew->ifindex = f->ifindex;
+#endif /* CONFIG_NET_CLS_IND */
+ fnew->tp = f->tp;
+
+ tcf_exts_init(&fnew->exts, TCA_FW_ACT, TCA_FW_POLICE);
+
+ err = fw_change_attrs(net, tp, fnew, tb, tca, base, ovr);
+ if (err < 0) {
+ kfree(fnew);
+ return err;
+ }
+
+ fp = &head->ht[fw_hash(fnew->id)];
+ for (pfp = rtnl_dereference(*fp); pfp;
+ fp = &pfp->next, pfp = rtnl_dereference(*fp))
+ if (pfp == f)
+ break;
+
+ RCU_INIT_POINTER(fnew->next, rtnl_dereference(pfp->next));
+ rcu_assign_pointer(*fp, fnew);
+ tcf_unbind_filter(tp, &f->res);
+ call_rcu(&f->rcu, fw_delete_filter);
+
+ *arg = (unsigned long)fnew;
+ return err;
}
if (!handle)
@@ -252,9 +300,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
return -ENOBUFS;
head->mask = mask;
- tcf_tree_lock(tp);
- tp->root = head;
- tcf_tree_unlock(tp);
+ rcu_assign_pointer(tp->root, head);
}
f = kzalloc(sizeof(struct fw_filter), GFP_KERNEL);
@@ -263,15 +309,14 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE);
f->id = handle;
+ f->tp = tp;
err = fw_change_attrs(net, tp, f, tb, tca, base, ovr);
if (err < 0)
goto errout;
- f->next = head->ht[fw_hash(handle)];
- tcf_tree_lock(tp);
- head->ht[fw_hash(handle)] = f;
- tcf_tree_unlock(tp);
+ RCU_INIT_POINTER(f->next, head->ht[fw_hash(handle)]);
+ rcu_assign_pointer(head->ht[fw_hash(handle)], f);
*arg = (unsigned long)f;
return 0;
@@ -283,7 +328,7 @@ errout:
static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
- struct fw_head *head = tp->root;
+ struct fw_head *head = rtnl_dereference(tp->root);
int h;
if (head == NULL)
@@ -295,7 +340,8 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
for (h = 0; h < HTSIZE; h++) {
struct fw_filter *f;
- for (f = head->ht[h]; f; f = f->next) {
+ for (f = rtnl_dereference(head->ht[h]); f;
+ f = rtnl_dereference(f->next)) {
if (arg->count < arg->skip) {
arg->count++;
continue;
@@ -312,7 +358,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
static int fw_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
- struct fw_head *head = tp->root;
+ struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = (struct fw_filter *)fh;
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index dd9fc2523c76..109a329b7198 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -29,25 +29,26 @@
* are mutually exclusive.
* 3. "to TAG from ANY" has higher priority, than "to ANY from XXX"
*/
-
struct route4_fastmap {
- struct route4_filter *filter;
- u32 id;
- int iif;
+ struct route4_filter *filter;
+ u32 id;
+ int iif;
};
struct route4_head {
- struct route4_fastmap fastmap[16];
- struct route4_bucket *table[256 + 1];
+ struct route4_fastmap fastmap[16];
+ struct route4_bucket __rcu *table[256 + 1];
+ struct rcu_head rcu;
};
struct route4_bucket {
/* 16 FROM buckets + 16 IIF buckets + 1 wildcard bucket */
- struct route4_filter *ht[16 + 16 + 1];
+ struct route4_filter __rcu *ht[16 + 16 + 1];
+ struct rcu_head rcu;
};
struct route4_filter {
- struct route4_filter *next;
+ struct route4_filter __rcu *next;
u32 id;
int iif;
@@ -55,6 +56,8 @@ struct route4_filter {
struct tcf_exts exts;
u32 handle;
struct route4_bucket *bkt;
+ struct tcf_proto *tp;
+ struct rcu_head rcu;
};
#define ROUTE4_FAILURE ((struct route4_filter *)(-1L))
@@ -64,14 +67,13 @@ static inline int route4_fastmap_hash(u32 id, int iif)
return id & 0xF;
}
+static DEFINE_SPINLOCK(fastmap_lock);
static void
-route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
+route4_reset_fastmap(struct route4_head *head)
{
- spinlock_t *root_lock = qdisc_root_sleeping_lock(q);
-
- spin_lock_bh(root_lock);
+ spin_lock_bh(&fastmap_lock);
memset(head->fastmap, 0, sizeof(head->fastmap));
- spin_unlock_bh(root_lock);
+ spin_unlock_bh(&fastmap_lock);
}
static void
@@ -80,9 +82,12 @@ route4_set_fastmap(struct route4_head *head, u32 id, int iif,
{
int h = route4_fastmap_hash(id, iif);
+ /* fastmap updates must look atomic to aling id, iff, filter */
+ spin_lock_bh(&fastmap_lock);
head->fastmap[h].id = id;
head->fastmap[h].iif = iif;
head->fastmap[h].filter = f;
+ spin_unlock_bh(&fastmap_lock);
}
static inline int route4_hash_to(u32 id)
@@ -123,7 +128,7 @@ static inline int route4_hash_wild(void)
static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- struct route4_head *head = tp->root;
+ struct route4_head *head = rcu_dereference_bh(tp->root);
struct dst_entry *dst;
struct route4_bucket *b;
struct route4_filter *f;
@@ -141,32 +146,43 @@ static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp,
iif = inet_iif(skb);
h = route4_fastmap_hash(id, iif);
+
+ spin_lock(&fastmap_lock);
if (id == head->fastmap[h].id &&
iif == head->fastmap[h].iif &&
(f = head->fastmap[h].filter) != NULL) {
- if (f == ROUTE4_FAILURE)
+ if (f == ROUTE4_FAILURE) {
+ spin_unlock(&fastmap_lock);
goto failure;
+ }
*res = f->res;
+ spin_unlock(&fastmap_lock);
return 0;
}
+ spin_unlock(&fastmap_lock);
h = route4_hash_to(id);
restart:
- b = head->table[h];
+ b = rcu_dereference_bh(head->table[h]);
if (b) {
- for (f = b->ht[route4_hash_from(id)]; f; f = f->next)
+ for (f = rcu_dereference_bh(b->ht[route4_hash_from(id)]);
+ f;
+ f = rcu_dereference_bh(f->next))
if (f->id == id)
ROUTE4_APPLY_RESULT();
- for (f = b->ht[route4_hash_iif(iif)]; f; f = f->next)
+ for (f = rcu_dereference_bh(b->ht[route4_hash_iif(iif)]);
+ f;
+ f = rcu_dereference_bh(f->next))
if (f->iif == iif)
ROUTE4_APPLY_RESULT();
- for (f = b->ht[route4_hash_wild()]; f; f = f->next)
+ for (f = rcu_dereference_bh(b->ht[route4_hash_wild()]);
+ f;
+ f = rcu_dereference_bh(f->next))
ROUTE4_APPLY_RESULT();
-
}
if (h < 256) {
h = 256;
@@ -213,7 +229,7 @@ static inline u32 from_hash(u32 id)
static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
{
- struct route4_head *head = tp->root;
+ struct route4_head *head = rtnl_dereference(tp->root);
struct route4_bucket *b;
struct route4_filter *f;
unsigned int h1, h2;
@@ -229,9 +245,11 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
if (h2 > 32)
return 0;
- b = head->table[h1];
+ b = rtnl_dereference(head->table[h1]);
if (b) {
- for (f = b->ht[h2]; f; f = f->next)
+ for (f = rtnl_dereference(b->ht[h2]);
+ f;
+ f = rtnl_dereference(f->next))
if (f->handle == handle)
return (unsigned long)f;
}
@@ -248,16 +266,17 @@ static int route4_init(struct tcf_proto *tp)
}
static void
-route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f)
+route4_delete_filter(struct rcu_head *head)
{
- tcf_unbind_filter(tp, &f->res);
- tcf_exts_destroy(tp, &f->exts);
+ struct route4_filter *f = container_of(head, struct route4_filter, rcu);
+
+ tcf_exts_destroy(&f->exts);
kfree(f);
}
static void route4_destroy(struct tcf_proto *tp)
{
- struct route4_head *head = tp->root;
+ struct route4_head *head = rtnl_dereference(tp->root);
int h1, h2;
if (head == NULL)
@@ -266,28 +285,36 @@ static void route4_destroy(struct tcf_proto *tp)
for (h1 = 0; h1 <= 256; h1++) {
struct route4_bucket *b;
- b = head->table[h1];
+ b = rtnl_dereference(head->table[h1]);
if (b) {
for (h2 = 0; h2 <= 32; h2++) {
struct route4_filter *f;
- while ((f = b->ht[h2]) != NULL) {
- b->ht[h2] = f->next;
- route4_delete_filter(tp, f);
+ while ((f = rtnl_dereference(b->ht[h2])) != NULL) {
+ struct route4_filter *next;
+
+ next = rtnl_dereference(f->next);
+ RCU_INIT_POINTER(b->ht[h2], next);
+ tcf_unbind_filter(tp, &f->res);
+ call_rcu(&f->rcu, route4_delete_filter);
}
}
- kfree(b);
+ RCU_INIT_POINTER(head->table[h1], NULL);
+ kfree_rcu(b, rcu);
}
}
- kfree(head);
+ RCU_INIT_POINTER(tp->root, NULL);
+ kfree_rcu(head, rcu);
}
static int route4_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct route4_head *head = tp->root;
- struct route4_filter **fp, *f = (struct route4_filter *)arg;
- unsigned int h = 0;
+ struct route4_head *head = rtnl_dereference(tp->root);
+ struct route4_filter *f = (struct route4_filter *)arg;
+ struct route4_filter __rcu **fp;
+ struct route4_filter *nf;
struct route4_bucket *b;
+ unsigned int h = 0;
int i;
if (!head || !f)
@@ -296,27 +323,36 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
h = f->handle;
b = f->bkt;
- for (fp = &b->ht[from_hash(h >> 16)]; *fp; fp = &(*fp)->next) {
- if (*fp == f) {
- tcf_tree_lock(tp);
- *fp = f->next;
- tcf_tree_unlock(tp);
-
- route4_reset_fastmap(tp->q, head, f->id);
- route4_delete_filter(tp, f);
-
- /* Strip tree */
-
- for (i = 0; i <= 32; i++)
- if (b->ht[i])
+ fp = &b->ht[from_hash(h >> 16)];
+ for (nf = rtnl_dereference(*fp); nf;
+ fp = &nf->next, nf = rtnl_dereference(*fp)) {
+ if (nf == f) {
+ /* unlink it */
+ RCU_INIT_POINTER(*fp, rtnl_dereference(f->next));
+
+ /* Remove any fastmap lookups that might ref filter
+ * notice we unlink'd the filter so we can't get it
+ * back in the fastmap.
+ */
+ route4_reset_fastmap(head);
+
+ /* Delete it */
+ tcf_unbind_filter(tp, &f->res);
+ call_rcu(&f->rcu, route4_delete_filter);
+
+ /* Strip RTNL protected tree */
+ for (i = 0; i <= 32; i++) {
+ struct route4_filter *rt;
+
+ rt = rtnl_dereference(b->ht[i]);
+ if (rt)
return 0;
+ }
/* OK, session has no flows */
- tcf_tree_lock(tp);
- head->table[to_hash(h)] = NULL;
- tcf_tree_unlock(tp);
+ RCU_INIT_POINTER(head->table[to_hash(h)], NULL);
+ kfree_rcu(b, rcu);
- kfree(b);
return 0;
}
}
@@ -380,26 +416,25 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
}
h1 = to_hash(nhandle);
- b = head->table[h1];
+ b = rtnl_dereference(head->table[h1]);
if (!b) {
err = -ENOBUFS;
b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL);
if (b == NULL)
goto errout;
- tcf_tree_lock(tp);
- head->table[h1] = b;
- tcf_tree_unlock(tp);
+ rcu_assign_pointer(head->table[h1], b);
} else {
unsigned int h2 = from_hash(nhandle >> 16);
err = -EEXIST;
- for (fp = b->ht[h2]; fp; fp = fp->next)
+ for (fp = rtnl_dereference(b->ht[h2]);
+ fp;
+ fp = rtnl_dereference(fp->next))
if (fp->handle == f->handle)
goto errout;
}
- tcf_tree_lock(tp);
if (tb[TCA_ROUTE4_TO])
f->id = to;
@@ -410,7 +445,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
f->handle = nhandle;
f->bkt = b;
- tcf_tree_unlock(tp);
+ f->tp = tp;
if (tb[TCA_ROUTE4_CLASSID]) {
f->res.classid = nla_get_u32(tb[TCA_ROUTE4_CLASSID]);
@@ -421,7 +456,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
return 0;
errout:
- tcf_exts_destroy(tp, &e);
+ tcf_exts_destroy(&e);
return err;
}
@@ -431,14 +466,15 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
struct nlattr **tca,
unsigned long *arg, bool ovr)
{
- struct route4_head *head = tp->root;
- struct route4_filter *f, *f1, **fp;
+ struct route4_head *head = rtnl_dereference(tp->root);
+ struct route4_filter __rcu **fp;
+ struct route4_filter *fold, *f1, *pfp, *f = NULL;
struct route4_bucket *b;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_ROUTE4_MAX + 1];
unsigned int h, th;
- u32 old_handle = 0;
int err;
+ bool new = true;
if (opt == NULL)
return handle ? -EINVAL : 0;
@@ -447,70 +483,73 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
return err;
- f = (struct route4_filter *)*arg;
- if (f) {
- if (f->handle != handle && handle)
+ fold = (struct route4_filter *)*arg;
+ if (fold && handle && fold->handle != handle)
return -EINVAL;
- if (f->bkt)
- old_handle = f->handle;
-
- err = route4_set_parms(net, tp, base, f, handle, head, tb,
- tca[TCA_RATE], 0, ovr);
- if (err < 0)
- return err;
-
- goto reinsert;
- }
-
err = -ENOBUFS;
if (head == NULL) {
head = kzalloc(sizeof(struct route4_head), GFP_KERNEL);
if (head == NULL)
goto errout;
-
- tcf_tree_lock(tp);
- tp->root = head;
- tcf_tree_unlock(tp);
+ rcu_assign_pointer(tp->root, head);
}
f = kzalloc(sizeof(struct route4_filter), GFP_KERNEL);
- if (f == NULL)
+ if (!f)
goto errout;
tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
+ if (fold) {
+ f->id = fold->id;
+ f->iif = fold->iif;
+ f->res = fold->res;
+ f->handle = fold->handle;
+
+ f->tp = fold->tp;
+ f->bkt = fold->bkt;
+ new = false;
+ }
+
err = route4_set_parms(net, tp, base, f, handle, head, tb,
- tca[TCA_RATE], 1, ovr);
+ tca[TCA_RATE], new, ovr);
if (err < 0)
goto errout;
-reinsert:
h = from_hash(f->handle >> 16);
- for (fp = &f->bkt->ht[h]; (f1 = *fp) != NULL; fp = &f1->next)
+ fp = &f->bkt->ht[h];
+ for (pfp = rtnl_dereference(*fp);
+ (f1 = rtnl_dereference(*fp)) != NULL;
+ fp = &f1->next)
if (f->handle < f1->handle)
break;
- f->next = f1;
- tcf_tree_lock(tp);
- *fp = f;
+ netif_keep_dst(qdisc_dev(tp->q));
+ rcu_assign_pointer(f->next, f1);
+ rcu_assign_pointer(*fp, f);
- if (old_handle && f->handle != old_handle) {
- th = to_hash(old_handle);
- h = from_hash(old_handle >> 16);
- b = head->table[th];
+ if (fold && fold->handle && f->handle != fold->handle) {
+ th = to_hash(fold->handle);
+ h = from_hash(fold->handle >> 16);
+ b = rtnl_dereference(head->table[th]);
if (b) {
- for (fp = &b->ht[h]; *fp; fp = &(*fp)->next) {
- if (*fp == f) {
+ fp = &b->ht[h];
+ for (pfp = rtnl_dereference(*fp); pfp;
+ fp = &pfp->next, pfp = rtnl_dereference(*fp)) {
+ if (pfp == f) {
*fp = f->next;
break;
}
}
}
}
- tcf_tree_unlock(tp);
- route4_reset_fastmap(tp->q, head, f->id);
+ route4_reset_fastmap(head);
*arg = (unsigned long)f;
+ if (fold) {
+ tcf_unbind_filter(tp, &fold->res);
+ call_rcu(&fold->rcu, route4_delete_filter);
+ }
return 0;
errout:
@@ -520,7 +559,7 @@ errout:
static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
- struct route4_head *head = tp->root;
+ struct route4_head *head = rtnl_dereference(tp->root);
unsigned int h, h1;
if (head == NULL)
@@ -530,13 +569,15 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
return;
for (h = 0; h <= 256; h++) {
- struct route4_bucket *b = head->table[h];
+ struct route4_bucket *b = rtnl_dereference(head->table[h]);
if (b) {
for (h1 = 0; h1 <= 32; h1++) {
struct route4_filter *f;
- for (f = b->ht[h1]; f; f = f->next) {
+ for (f = rtnl_dereference(b->ht[h1]);
+ f;
+ f = rtnl_dereference(f->next)) {
if (arg->count < arg->skip) {
arg->count++;
continue;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 1020e233a5d6..6bb55f277a5a 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -70,31 +70,34 @@ struct rsvp_head {
u32 tmap[256/32];
u32 hgenerator;
u8 tgenerator;
- struct rsvp_session *ht[256];
+ struct rsvp_session __rcu *ht[256];
+ struct rcu_head rcu;
};
struct rsvp_session {
- struct rsvp_session *next;
- __be32 dst[RSVP_DST_LEN];
- struct tc_rsvp_gpi dpi;
- u8 protocol;
- u8 tunnelid;
+ struct rsvp_session __rcu *next;
+ __be32 dst[RSVP_DST_LEN];
+ struct tc_rsvp_gpi dpi;
+ u8 protocol;
+ u8 tunnelid;
/* 16 (src,sport) hash slots, and one wildcard source slot */
- struct rsvp_filter *ht[16 + 1];
+ struct rsvp_filter __rcu *ht[16 + 1];
+ struct rcu_head rcu;
};
struct rsvp_filter {
- struct rsvp_filter *next;
- __be32 src[RSVP_DST_LEN];
- struct tc_rsvp_gpi spi;
- u8 tunnelhdr;
+ struct rsvp_filter __rcu *next;
+ __be32 src[RSVP_DST_LEN];
+ struct tc_rsvp_gpi spi;
+ u8 tunnelhdr;
- struct tcf_result res;
- struct tcf_exts exts;
+ struct tcf_result res;
+ struct tcf_exts exts;
- u32 handle;
- struct rsvp_session *sess;
+ u32 handle;
+ struct rsvp_session *sess;
+ struct rcu_head rcu;
};
static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
@@ -128,7 +131,7 @@ static inline unsigned int hash_src(__be32 *src)
static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
+ struct rsvp_head *head = rcu_dereference_bh(tp->root);
struct rsvp_session *s;
struct rsvp_filter *f;
unsigned int h1, h2;
@@ -169,7 +172,8 @@ restart:
h1 = hash_dst(dst, protocol, tunnelid);
h2 = hash_src(src);
- for (s = sht[h1]; s; s = s->next) {
+ for (s = rcu_dereference_bh(head->ht[h1]); s;
+ s = rcu_dereference_bh(s->next)) {
if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
protocol == s->protocol &&
!(s->dpi.mask &
@@ -181,7 +185,8 @@ restart:
#endif
tunnelid == s->tunnelid) {
- for (f = s->ht[h2]; f; f = f->next) {
+ for (f = rcu_dereference_bh(s->ht[h2]); f;
+ f = rcu_dereference_bh(f->next)) {
if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
!(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
#if RSVP_DST_LEN == 4
@@ -205,7 +210,8 @@ matched:
}
/* And wildcard bucket... */
- for (f = s->ht[16]; f; f = f->next) {
+ for (f = rcu_dereference_bh(s->ht[16]); f;
+ f = rcu_dereference_bh(f->next)) {
*res = f->res;
RSVP_APPLY_RESULT();
goto matched;
@@ -216,9 +222,36 @@ matched:
return -1;
}
+static void rsvp_replace(struct tcf_proto *tp, struct rsvp_filter *n, u32 h)
+{
+ struct rsvp_head *head = rtnl_dereference(tp->root);
+ struct rsvp_session *s;
+ struct rsvp_filter __rcu **ins;
+ struct rsvp_filter *pins;
+ unsigned int h1 = h & 0xFF;
+ unsigned int h2 = (h >> 8) & 0xFF;
+
+ for (s = rtnl_dereference(head->ht[h1]); s;
+ s = rtnl_dereference(s->next)) {
+ for (ins = &s->ht[h2], pins = rtnl_dereference(*ins); ;
+ ins = &pins->next, pins = rtnl_dereference(*ins)) {
+ if (pins->handle == h) {
+ RCU_INIT_POINTER(n->next, pins->next);
+ rcu_assign_pointer(*ins, n);
+ return;
+ }
+ }
+ }
+
+ /* Something went wrong if we are trying to replace a non-existant
+ * node. Mind as well halt instead of silently failing.
+ */
+ BUG_ON(1);
+}
+
static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
{
- struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
+ struct rsvp_head *head = rtnl_dereference(tp->root);
struct rsvp_session *s;
struct rsvp_filter *f;
unsigned int h1 = handle & 0xFF;
@@ -227,8 +260,10 @@ static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
if (h2 > 16)
return 0;
- for (s = sht[h1]; s; s = s->next) {
- for (f = s->ht[h2]; f; f = f->next) {
+ for (s = rtnl_dereference(head->ht[h1]); s;
+ s = rtnl_dereference(s->next)) {
+ for (f = rtnl_dereference(s->ht[h2]); f;
+ f = rtnl_dereference(f->next)) {
if (f->handle == handle)
return (unsigned long)f;
}
@@ -246,7 +281,7 @@ static int rsvp_init(struct tcf_proto *tp)
data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
if (data) {
- tp->root = data;
+ rcu_assign_pointer(tp->root, data);
return 0;
}
return -ENOBUFS;
@@ -256,54 +291,55 @@ static void
rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
{
tcf_unbind_filter(tp, &f->res);
- tcf_exts_destroy(tp, &f->exts);
- kfree(f);
+ tcf_exts_destroy(&f->exts);
+ kfree_rcu(f, rcu);
}
static void rsvp_destroy(struct tcf_proto *tp)
{
- struct rsvp_head *data = xchg(&tp->root, NULL);
- struct rsvp_session **sht;
+ struct rsvp_head *data = rtnl_dereference(tp->root);
int h1, h2;
if (data == NULL)
return;
- sht = data->ht;
+ RCU_INIT_POINTER(tp->root, NULL);
for (h1 = 0; h1 < 256; h1++) {
struct rsvp_session *s;
- while ((s = sht[h1]) != NULL) {
- sht[h1] = s->next;
+ while ((s = rtnl_dereference(data->ht[h1])) != NULL) {
+ RCU_INIT_POINTER(data->ht[h1], s->next);
for (h2 = 0; h2 <= 16; h2++) {
struct rsvp_filter *f;
- while ((f = s->ht[h2]) != NULL) {
- s->ht[h2] = f->next;
+ while ((f = rtnl_dereference(s->ht[h2])) != NULL) {
+ rcu_assign_pointer(s->ht[h2], f->next);
rsvp_delete_filter(tp, f);
}
}
- kfree(s);
+ kfree_rcu(s, rcu);
}
}
- kfree(data);
+ kfree_rcu(data, rcu);
}
static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct rsvp_filter **fp, *f = (struct rsvp_filter *)arg;
+ struct rsvp_head *head = rtnl_dereference(tp->root);
+ struct rsvp_filter *nfp, *f = (struct rsvp_filter *)arg;
+ struct rsvp_filter __rcu **fp;
unsigned int h = f->handle;
- struct rsvp_session **sp;
- struct rsvp_session *s = f->sess;
+ struct rsvp_session __rcu **sp;
+ struct rsvp_session *nsp, *s = f->sess;
int i;
- for (fp = &s->ht[(h >> 8) & 0xFF]; *fp; fp = &(*fp)->next) {
- if (*fp == f) {
- tcf_tree_lock(tp);
- *fp = f->next;
- tcf_tree_unlock(tp);
+ fp = &s->ht[(h >> 8) & 0xFF];
+ for (nfp = rtnl_dereference(*fp); nfp;
+ fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
+ if (nfp == f) {
+ RCU_INIT_POINTER(*fp, f->next);
rsvp_delete_filter(tp, f);
/* Strip tree */
@@ -313,14 +349,12 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
return 0;
/* OK, session has no flows */
- for (sp = &((struct rsvp_head *)tp->root)->ht[h & 0xFF];
- *sp; sp = &(*sp)->next) {
- if (*sp == s) {
- tcf_tree_lock(tp);
- *sp = s->next;
- tcf_tree_unlock(tp);
-
- kfree(s);
+ sp = &head->ht[h & 0xFF];
+ for (nsp = rtnl_dereference(*sp); nsp;
+ sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
+ if (nsp == s) {
+ RCU_INIT_POINTER(*sp, s->next);
+ kfree_rcu(s, rcu);
return 0;
}
}
@@ -333,7 +367,7 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
{
- struct rsvp_head *data = tp->root;
+ struct rsvp_head *data = rtnl_dereference(tp->root);
int i = 0xFFFF;
while (i-- > 0) {
@@ -361,7 +395,7 @@ static int tunnel_bts(struct rsvp_head *data)
static void tunnel_recycle(struct rsvp_head *data)
{
- struct rsvp_session **sht = data->ht;
+ struct rsvp_session __rcu **sht = data->ht;
u32 tmap[256/32];
int h1, h2;
@@ -369,11 +403,13 @@ static void tunnel_recycle(struct rsvp_head *data)
for (h1 = 0; h1 < 256; h1++) {
struct rsvp_session *s;
- for (s = sht[h1]; s; s = s->next) {
+ for (s = rtnl_dereference(sht[h1]); s;
+ s = rtnl_dereference(s->next)) {
for (h2 = 0; h2 <= 16; h2++) {
struct rsvp_filter *f;
- for (f = s->ht[h2]; f; f = f->next) {
+ for (f = rtnl_dereference(s->ht[h2]); f;
+ f = rtnl_dereference(f->next)) {
if (f->tunnelhdr == 0)
continue;
data->tgenerator = f->res.classid;
@@ -417,9 +453,11 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
struct nlattr **tca,
unsigned long *arg, bool ovr)
{
- struct rsvp_head *data = tp->root;
- struct rsvp_filter *f, **fp;
- struct rsvp_session *s, **sp;
+ struct rsvp_head *data = rtnl_dereference(tp->root);
+ struct rsvp_filter *f, *nfp;
+ struct rsvp_filter __rcu **fp;
+ struct rsvp_session *nsp, *s;
+ struct rsvp_session __rcu **sp;
struct tc_rsvp_pinfo *pinfo = NULL;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_RSVP_MAX + 1];
@@ -443,15 +481,26 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
f = (struct rsvp_filter *)*arg;
if (f) {
/* Node exists: adjust only classid */
+ struct rsvp_filter *n;
if (f->handle != handle && handle)
goto errout2;
+
+ n = kmemdup(f, sizeof(*f), GFP_KERNEL);
+ if (!n) {
+ err = -ENOMEM;
+ goto errout2;
+ }
+
+ tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+
if (tb[TCA_RSVP_CLASSID]) {
- f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
- tcf_bind_filter(tp, &f->res, base);
+ n->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
+ tcf_bind_filter(tp, &n->res, base);
}
- tcf_exts_change(tp, &f->exts, &e);
+ tcf_exts_change(tp, &n->exts, &e);
+ rsvp_replace(tp, n, handle);
return 0;
}
@@ -499,7 +548,9 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
goto errout;
}
- for (sp = &data->ht[h1]; (s = *sp) != NULL; sp = &s->next) {
+ for (sp = &data->ht[h1];
+ (s = rtnl_dereference(*sp)) != NULL;
+ sp = &s->next) {
if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
pinfo && pinfo->protocol == s->protocol &&
memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
@@ -521,12 +572,16 @@ insert:
tcf_exts_change(tp, &f->exts, &e);
- for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
- if (((*fp)->spi.mask & f->spi.mask) != f->spi.mask)
+ fp = &s->ht[h2];
+ for (nfp = rtnl_dereference(*fp); nfp;
+ fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
+ __u32 mask = nfp->spi.mask & f->spi.mask;
+
+ if (mask != f->spi.mask)
break;
- f->next = *fp;
- wmb();
- *fp = f;
+ }
+ RCU_INIT_POINTER(f->next, nfp);
+ rcu_assign_pointer(*fp, f);
*arg = (unsigned long)f;
return 0;
@@ -546,26 +601,27 @@ insert:
s->protocol = pinfo->protocol;
s->tunnelid = pinfo->tunnelid;
}
- for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
- if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
+ sp = &data->ht[h1];
+ for (nsp = rtnl_dereference(*sp); nsp;
+ sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
+ if ((nsp->dpi.mask & s->dpi.mask) != s->dpi.mask)
break;
}
- s->next = *sp;
- wmb();
- *sp = s;
+ RCU_INIT_POINTER(s->next, nsp);
+ rcu_assign_pointer(*sp, s);
goto insert;
errout:
kfree(f);
errout2:
- tcf_exts_destroy(tp, &e);
+ tcf_exts_destroy(&e);
return err;
}
static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
- struct rsvp_head *head = tp->root;
+ struct rsvp_head *head = rtnl_dereference(tp->root);
unsigned int h, h1;
if (arg->stop)
@@ -574,11 +630,13 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
for (h = 0; h < 256; h++) {
struct rsvp_session *s;
- for (s = head->ht[h]; s; s = s->next) {
+ for (s = rtnl_dereference(head->ht[h]); s;
+ s = rtnl_dereference(s->next)) {
for (h1 = 0; h1 <= 16; h1++) {
struct rsvp_filter *f;
- for (f = s->ht[h1]; f; f = f->next) {
+ for (f = rtnl_dereference(s->ht[h1]); f;
+ f = rtnl_dereference(f->next)) {
if (arg->count < arg->skip) {
arg->count++;
continue;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index c721cd4a469f..30f10fb07f4a 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -32,19 +32,21 @@ struct tcindex_filter_result {
struct tcindex_filter {
u16 key;
struct tcindex_filter_result result;
- struct tcindex_filter *next;
+ struct tcindex_filter __rcu *next;
+ struct rcu_head rcu;
};
struct tcindex_data {
struct tcindex_filter_result *perfect; /* perfect hash; NULL if none */
- struct tcindex_filter **h; /* imperfect hash; only used if !perfect;
- NULL if unused */
+ struct tcindex_filter __rcu **h; /* imperfect hash; */
+ struct tcf_proto *tp;
u16 mask; /* AND key with mask */
- int shift; /* shift ANDed key to the right */
- int hash; /* hash table size; 0 if undefined */
- int alloc_hash; /* allocated size */
- int fall_through; /* 0: only classify if explicit match */
+ u32 shift; /* shift ANDed key to the right */
+ u32 hash; /* hash table size; 0 if undefined */
+ u32 alloc_hash; /* allocated size */
+ u32 fall_through; /* 0: only classify if explicit match */
+ struct rcu_head rcu;
};
static inline int
@@ -56,13 +58,18 @@ tcindex_filter_is_set(struct tcindex_filter_result *r)
static struct tcindex_filter_result *
tcindex_lookup(struct tcindex_data *p, u16 key)
{
- struct tcindex_filter *f;
+ if (p->perfect) {
+ struct tcindex_filter_result *f = p->perfect + key;
+
+ return tcindex_filter_is_set(f) ? f : NULL;
+ } else if (p->h) {
+ struct tcindex_filter __rcu **fp;
+ struct tcindex_filter *f;
- if (p->perfect)
- return tcindex_filter_is_set(p->perfect + key) ?
- p->perfect + key : NULL;
- else if (p->h) {
- for (f = p->h[key % p->hash]; f; f = f->next)
+ fp = &p->h[key % p->hash];
+ for (f = rcu_dereference_bh_rtnl(*fp);
+ f;
+ fp = &f->next, f = rcu_dereference_bh_rtnl(*fp))
if (f->key == key)
return &f->result;
}
@@ -74,7 +81,7 @@ tcindex_lookup(struct tcindex_data *p, u16 key)
static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- struct tcindex_data *p = tp->root;
+ struct tcindex_data *p = rcu_dereference_bh(tp->root);
struct tcindex_filter_result *f;
int key = (skb->tc_index & p->mask) >> p->shift;
@@ -99,7 +106,7 @@ static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle)
{
- struct tcindex_data *p = tp->root;
+ struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r;
pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle);
@@ -129,49 +136,59 @@ static int tcindex_init(struct tcf_proto *tp)
p->hash = DEFAULT_HASH_SIZE;
p->fall_through = 1;
- tp->root = p;
+ rcu_assign_pointer(tp->root, p);
return 0;
}
-
static int
-__tcindex_delete(struct tcf_proto *tp, unsigned long arg, int lock)
+tcindex_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct tcindex_data *p = tp->root;
+ struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
+ struct tcindex_filter __rcu **walk;
struct tcindex_filter *f = NULL;
- pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p,f %p\n", tp, arg, p, f);
+ pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p\n", tp, arg, p);
if (p->perfect) {
if (!r->res.class)
return -ENOENT;
} else {
int i;
- struct tcindex_filter **walk = NULL;
- for (i = 0; i < p->hash; i++)
- for (walk = p->h+i; *walk; walk = &(*walk)->next)
- if (&(*walk)->result == r)
+ for (i = 0; i < p->hash; i++) {
+ walk = p->h + i;
+ for (f = rtnl_dereference(*walk); f;
+ walk = &f->next, f = rtnl_dereference(*walk)) {
+ if (&f->result == r)
goto found;
+ }
+ }
return -ENOENT;
found:
- f = *walk;
- if (lock)
- tcf_tree_lock(tp);
- *walk = f->next;
- if (lock)
- tcf_tree_unlock(tp);
+ rcu_assign_pointer(*walk, rtnl_dereference(f->next));
}
tcf_unbind_filter(tp, &r->res);
- tcf_exts_destroy(tp, &r->exts);
- kfree(f);
+ tcf_exts_destroy(&r->exts);
+ if (f)
+ kfree_rcu(f, rcu);
return 0;
}
-static int tcindex_delete(struct tcf_proto *tp, unsigned long arg)
+static int tcindex_destroy_element(struct tcf_proto *tp,
+ unsigned long arg,
+ struct tcf_walker *walker)
{
- return __tcindex_delete(tp, arg, 1);
+ return tcindex_delete(tp, arg);
+}
+
+static void __tcindex_destroy(struct rcu_head *head)
+{
+ struct tcindex_data *p = container_of(head, struct tcindex_data, rcu);
+
+ kfree(p->perfect);
+ kfree(p->h);
+ kfree(p);
}
static inline int
@@ -194,6 +211,14 @@ static void tcindex_filter_result_init(struct tcindex_filter_result *r)
tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
}
+static void __tcindex_partial_destroy(struct rcu_head *head)
+{
+ struct tcindex_data *p = container_of(head, struct tcindex_data, rcu);
+
+ kfree(p->perfect);
+ kfree(p);
+}
+
static int
tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
u32 handle, struct tcindex_data *p,
@@ -203,7 +228,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
int err, balloc = 0;
struct tcindex_filter_result new_filter_result, *old_r = r;
struct tcindex_filter_result cr;
- struct tcindex_data cp;
+ struct tcindex_data *cp, *oldp;
struct tcindex_filter *f = NULL; /* make gcc behave */
struct tcf_exts e;
@@ -212,89 +237,130 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
if (err < 0)
return err;
- memcpy(&cp, p, sizeof(cp));
- tcindex_filter_result_init(&new_filter_result);
+ err = -ENOMEM;
+ /* tcindex_data attributes must look atomic to classifier/lookup so
+ * allocate new tcindex data and RCU assign it onto root. Keeping
+ * perfect hash and hash pointers from old data.
+ */
+ cp = kzalloc(sizeof(*cp), GFP_KERNEL);
+ if (!cp)
+ goto errout;
+
+ cp->mask = p->mask;
+ cp->shift = p->shift;
+ cp->hash = p->hash;
+ cp->alloc_hash = p->alloc_hash;
+ cp->fall_through = p->fall_through;
+ cp->tp = tp;
+ if (p->perfect) {
+ int i;
+
+ cp->perfect = kmemdup(p->perfect,
+ sizeof(*r) * cp->hash, GFP_KERNEL);
+ if (!cp->perfect)
+ goto errout;
+ for (i = 0; i < cp->hash; i++)
+ tcf_exts_init(&cp->perfect[i].exts,
+ TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+ balloc = 1;
+ }
+ cp->h = p->h;
+
+ tcindex_filter_result_init(&new_filter_result);
tcindex_filter_result_init(&cr);
if (old_r)
cr.res = r->res;
if (tb[TCA_TCINDEX_HASH])
- cp.hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
+ cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
if (tb[TCA_TCINDEX_MASK])
- cp.mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
+ cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
if (tb[TCA_TCINDEX_SHIFT])
- cp.shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
+ cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
err = -EBUSY;
+
/* Hash already allocated, make sure that we still meet the
* requirements for the allocated hash.
*/
- if (cp.perfect) {
- if (!valid_perfect_hash(&cp) ||
- cp.hash > cp.alloc_hash)
- goto errout;
- } else if (cp.h && cp.hash != cp.alloc_hash)
- goto errout;
+ if (cp->perfect) {
+ if (!valid_perfect_hash(cp) ||
+ cp->hash > cp->alloc_hash)
+ goto errout_alloc;
+ } else if (cp->h && cp->hash != cp->alloc_hash) {
+ goto errout_alloc;
+ }
err = -EINVAL;
if (tb[TCA_TCINDEX_FALL_THROUGH])
- cp.fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
+ cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
- if (!cp.hash) {
+ if (!cp->hash) {
/* Hash not specified, use perfect hash if the upper limit
* of the hashing index is below the threshold.
*/
- if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD)
- cp.hash = (cp.mask >> cp.shift) + 1;
+ if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD)
+ cp->hash = (cp->mask >> cp->shift) + 1;
else
- cp.hash = DEFAULT_HASH_SIZE;
+ cp->hash = DEFAULT_HASH_SIZE;
}
- if (!cp.perfect && !cp.h)
- cp.alloc_hash = cp.hash;
+ if (!cp->perfect && !cp->h)
+ cp->alloc_hash = cp->hash;
/* Note: this could be as restrictive as if (handle & ~(mask >> shift))
* but then, we'd fail handles that may become valid after some future
* mask change. While this is extremely unlikely to ever matter,
* the check below is safer (and also more backwards-compatible).
*/
- if (cp.perfect || valid_perfect_hash(&cp))
- if (handle >= cp.alloc_hash)
- goto errout;
+ if (cp->perfect || valid_perfect_hash(cp))
+ if (handle >= cp->alloc_hash)
+ goto errout_alloc;
err = -ENOMEM;
- if (!cp.perfect && !cp.h) {
- if (valid_perfect_hash(&cp)) {
+ if (!cp->perfect && !cp->h) {
+ if (valid_perfect_hash(cp)) {
int i;
- cp.perfect = kcalloc(cp.hash, sizeof(*r), GFP_KERNEL);
- if (!cp.perfect)
- goto errout;
- for (i = 0; i < cp.hash; i++)
- tcf_exts_init(&cp.perfect[i].exts, TCA_TCINDEX_ACT,
+ cp->perfect = kcalloc(cp->hash, sizeof(*r), GFP_KERNEL);
+ if (!cp->perfect)
+ goto errout_alloc;
+ for (i = 0; i < cp->hash; i++)
+ tcf_exts_init(&cp->perfect[i].exts,
+ TCA_TCINDEX_ACT,
TCA_TCINDEX_POLICE);
balloc = 1;
} else {
- cp.h = kcalloc(cp.hash, sizeof(f), GFP_KERNEL);
- if (!cp.h)
- goto errout;
+ struct tcindex_filter __rcu **hash;
+
+ hash = kcalloc(cp->hash,
+ sizeof(struct tcindex_filter *),
+ GFP_KERNEL);
+
+ if (!hash)
+ goto errout_alloc;
+
+ cp->h = hash;
balloc = 2;
}
}
- if (cp.perfect)
- r = cp.perfect + handle;
+ if (cp->perfect)
+ r = cp->perfect + handle;
else
- r = tcindex_lookup(&cp, handle) ? : &new_filter_result;
+ r = tcindex_lookup(cp, handle) ? : &new_filter_result;
if (r == &new_filter_result) {
f = kzalloc(sizeof(*f), GFP_KERNEL);
if (!f)
goto errout_alloc;
+ f->key = handle;
+ tcindex_filter_result_init(&f->result);
+ f->next = NULL;
}
if (tb[TCA_TCINDEX_CLASSID]) {
@@ -307,34 +373,40 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
else
tcf_exts_change(tp, &cr.exts, &e);
- tcf_tree_lock(tp);
if (old_r && old_r != r)
tcindex_filter_result_init(old_r);
- memcpy(p, &cp, sizeof(cp));
+ oldp = p;
r->res = cr.res;
+ rcu_assign_pointer(tp->root, cp);
if (r == &new_filter_result) {
- struct tcindex_filter **fp;
+ struct tcindex_filter *nfp;
+ struct tcindex_filter __rcu **fp;
- f->key = handle;
- f->result = new_filter_result;
- f->next = NULL;
- for (fp = p->h+(handle % p->hash); *fp; fp = &(*fp)->next)
- /* nothing */;
- *fp = f;
+ tcf_exts_change(tp, &f->result.exts, &r->exts);
+
+ fp = cp->h + (handle % cp->hash);
+ for (nfp = rtnl_dereference(*fp);
+ nfp;
+ fp = &nfp->next, nfp = rtnl_dereference(*fp))
+ ; /* nothing */
+
+ rcu_assign_pointer(*fp, f);
}
- tcf_tree_unlock(tp);
+ if (oldp)
+ call_rcu(&oldp->rcu, __tcindex_partial_destroy);
return 0;
errout_alloc:
if (balloc == 1)
- kfree(cp.perfect);
+ kfree(cp->perfect);
else if (balloc == 2)
- kfree(cp.h);
+ kfree(cp->h);
errout:
- tcf_exts_destroy(tp, &e);
+ kfree(cp);
+ tcf_exts_destroy(&e);
return err;
}
@@ -345,7 +417,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
{
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_TCINDEX_MAX + 1];
- struct tcindex_data *p = tp->root;
+ struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg;
int err;
@@ -364,10 +436,9 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
tca[TCA_RATE], ovr);
}
-
static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
{
- struct tcindex_data *p = tp->root;
+ struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter *f, *next;
int i;
@@ -390,8 +461,8 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
if (!p->h)
return;
for (i = 0; i < p->hash; i++) {
- for (f = p->h[i]; f; f = next) {
- next = f->next;
+ for (f = rtnl_dereference(p->h[i]); f; f = next) {
+ next = rtnl_dereference(f->next);
if (walker->count >= walker->skip) {
if (walker->fn(tp, (unsigned long) &f->result,
walker) < 0) {
@@ -404,35 +475,26 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
}
}
-
-static int tcindex_destroy_element(struct tcf_proto *tp,
- unsigned long arg, struct tcf_walker *walker)
-{
- return __tcindex_delete(tp, arg, 0);
-}
-
-
static void tcindex_destroy(struct tcf_proto *tp)
{
- struct tcindex_data *p = tp->root;
+ struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcf_walker walker;
pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
walker.count = 0;
walker.skip = 0;
- walker.fn = &tcindex_destroy_element;
+ walker.fn = tcindex_destroy_element;
tcindex_walk(tp, &walker);
- kfree(p->perfect);
- kfree(p->h);
- kfree(p);
- tp->root = NULL;
+
+ RCU_INIT_POINTER(tp->root, NULL);
+ call_rcu(&p->rcu, __tcindex_destroy);
}
static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
- struct tcindex_data *p = tp->root;
+ struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
@@ -455,15 +517,18 @@ static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
nla_nest_end(skb, nest);
} else {
if (p->perfect) {
- t->tcm_handle = r-p->perfect;
+ t->tcm_handle = r - p->perfect;
} else {
struct tcindex_filter *f;
+ struct tcindex_filter __rcu **fp;
int i;
t->tcm_handle = 0;
for (i = 0; !t->tcm_handle && i < p->hash; i++) {
- for (f = p->h[i]; !t->tcm_handle && f;
- f = f->next) {
+ fp = &p->h[i];
+ for (f = rtnl_dereference(*fp);
+ !t->tcm_handle && f;
+ fp = &f->next, f = rtnl_dereference(*fp)) {
if (&f->result == r)
t->tcm_handle = f->key;
}
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 70c0be8d0121..0472909bb014 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -36,6 +36,7 @@
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
+#include <linux/percpu.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
#include <linux/bitmap.h>
@@ -44,40 +45,49 @@
#include <net/pkt_cls.h>
struct tc_u_knode {
- struct tc_u_knode *next;
+ struct tc_u_knode __rcu *next;
u32 handle;
- struct tc_u_hnode *ht_up;
+ struct tc_u_hnode __rcu *ht_up;
struct tcf_exts exts;
#ifdef CONFIG_NET_CLS_IND
int ifindex;
#endif
u8 fshift;
struct tcf_result res;
- struct tc_u_hnode *ht_down;
+ struct tc_u_hnode __rcu *ht_down;
#ifdef CONFIG_CLS_U32_PERF
- struct tc_u32_pcnt *pf;
+ struct tc_u32_pcnt __percpu *pf;
#endif
#ifdef CONFIG_CLS_U32_MARK
- struct tc_u32_mark mark;
+ u32 val;
+ u32 mask;
+ u32 __percpu *pcpu_success;
#endif
+ struct tcf_proto *tp;
+ struct rcu_head rcu;
+ /* The 'sel' field MUST be the last field in structure to allow for
+ * tc_u32_keys allocated at end of structure.
+ */
struct tc_u32_sel sel;
};
struct tc_u_hnode {
- struct tc_u_hnode *next;
+ struct tc_u_hnode __rcu *next;
u32 handle;
u32 prio;
struct tc_u_common *tp_c;
int refcnt;
unsigned int divisor;
- struct tc_u_knode *ht[1];
+ struct tc_u_knode __rcu *ht[1];
+ struct rcu_head rcu;
};
struct tc_u_common {
- struct tc_u_hnode *hlist;
+ struct tc_u_hnode __rcu *hlist;
struct Qdisc *q;
int refcnt;
u32 hgenerator;
+ struct rcu_head rcu;
};
static inline unsigned int u32_hash_fold(__be32 key,
@@ -96,7 +106,7 @@ static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct
unsigned int off;
} stack[TC_U32_MAXDEPTH];
- struct tc_u_hnode *ht = tp->root;
+ struct tc_u_hnode *ht = rcu_dereference_bh(tp->root);
unsigned int off = skb_network_offset(skb);
struct tc_u_knode *n;
int sdepth = 0;
@@ -108,23 +118,23 @@ static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct
int i, r;
next_ht:
- n = ht->ht[sel];
+ n = rcu_dereference_bh(ht->ht[sel]);
next_knode:
if (n) {
struct tc_u32_key *key = n->sel.keys;
#ifdef CONFIG_CLS_U32_PERF
- n->pf->rcnt += 1;
+ __this_cpu_inc(n->pf->rcnt);
j = 0;
#endif
#ifdef CONFIG_CLS_U32_MARK
- if ((skb->mark & n->mark.mask) != n->mark.val) {
- n = n->next;
+ if ((skb->mark & n->mask) != n->val) {
+ n = rcu_dereference_bh(n->next);
goto next_knode;
} else {
- n->mark.success++;
+ __this_cpu_inc(*n->pcpu_success);
}
#endif
@@ -139,37 +149,39 @@ next_knode:
if (!data)
goto out;
if ((*data ^ key->val) & key->mask) {
- n = n->next;
+ n = rcu_dereference_bh(n->next);
goto next_knode;
}
#ifdef CONFIG_CLS_U32_PERF
- n->pf->kcnts[j] += 1;
+ __this_cpu_inc(n->pf->kcnts[j]);
j++;
#endif
}
- if (n->ht_down == NULL) {
+
+ ht = rcu_dereference_bh(n->ht_down);
+ if (!ht) {
check_terminal:
if (n->sel.flags & TC_U32_TERMINAL) {
*res = n->res;
#ifdef CONFIG_NET_CLS_IND
if (!tcf_match_indev(skb, n->ifindex)) {
- n = n->next;
+ n = rcu_dereference_bh(n->next);
goto next_knode;
}
#endif
#ifdef CONFIG_CLS_U32_PERF
- n->pf->rhit += 1;
+ __this_cpu_inc(n->pf->rhit);
#endif
r = tcf_exts_exec(skb, &n->exts, res);
if (r < 0) {
- n = n->next;
+ n = rcu_dereference_bh(n->next);
goto next_knode;
}
return r;
}
- n = n->next;
+ n = rcu_dereference_bh(n->next);
goto next_knode;
}
@@ -180,7 +192,7 @@ check_terminal:
stack[sdepth].off = off;
sdepth++;
- ht = n->ht_down;
+ ht = rcu_dereference_bh(n->ht_down);
sel = 0;
if (ht->divisor) {
__be32 *data, hdata;
@@ -222,7 +234,7 @@ check_terminal:
/* POP */
if (sdepth--) {
n = stack[sdepth].knode;
- ht = n->ht_up;
+ ht = rcu_dereference_bh(n->ht_up);
off = stack[sdepth].off;
goto check_terminal;
}
@@ -239,7 +251,9 @@ u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
{
struct tc_u_hnode *ht;
- for (ht = tp_c->hlist; ht; ht = ht->next)
+ for (ht = rtnl_dereference(tp_c->hlist);
+ ht;
+ ht = rtnl_dereference(ht->next))
if (ht->handle == handle)
break;
@@ -256,7 +270,9 @@ u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
if (sel > ht->divisor)
goto out;
- for (n = ht->ht[sel]; n; n = n->next)
+ for (n = rtnl_dereference(ht->ht[sel]);
+ n;
+ n = rtnl_dereference(n->next))
if (n->handle == handle)
break;
out:
@@ -270,7 +286,7 @@ static unsigned long u32_get(struct tcf_proto *tp, u32 handle)
struct tc_u_common *tp_c = tp->data;
if (TC_U32_HTID(handle) == TC_U32_ROOT)
- ht = tp->root;
+ ht = rtnl_dereference(tp->root);
else
ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));
@@ -291,6 +307,9 @@ static u32 gen_new_htid(struct tc_u_common *tp_c)
{
int i = 0x800;
+ /* hgenerator only used inside rtnl lock it is safe to increment
+ * without read _copy_ update semantics
+ */
do {
if (++tp_c->hgenerator == 0x7FF)
tp_c->hgenerator = 1;
@@ -326,41 +345,78 @@ static int u32_init(struct tcf_proto *tp)
}
tp_c->refcnt++;
- root_ht->next = tp_c->hlist;
- tp_c->hlist = root_ht;
+ RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
+ rcu_assign_pointer(tp_c->hlist, root_ht);
root_ht->tp_c = tp_c;
- tp->root = root_ht;
+ rcu_assign_pointer(tp->root, root_ht);
tp->data = tp_c;
return 0;
}
-static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n)
+static int u32_destroy_key(struct tcf_proto *tp,
+ struct tc_u_knode *n,
+ bool free_pf)
{
- tcf_unbind_filter(tp, &n->res);
- tcf_exts_destroy(tp, &n->exts);
+ tcf_exts_destroy(&n->exts);
if (n->ht_down)
n->ht_down->refcnt--;
#ifdef CONFIG_CLS_U32_PERF
- kfree(n->pf);
+ if (free_pf)
+ free_percpu(n->pf);
+#endif
+#ifdef CONFIG_CLS_U32_MARK
+ if (free_pf)
+ free_percpu(n->pcpu_success);
#endif
kfree(n);
return 0;
}
+/* u32_delete_key_rcu should be called when free'ing a copied
+ * version of a tc_u_knode obtained from u32_init_knode(). When
+ * copies are obtained from u32_init_knode() the statistics are
+ * shared between the old and new copies to allow readers to
+ * continue to update the statistics during the copy. To support
+ * this the u32_delete_key_rcu variant does not free the percpu
+ * statistics.
+ */
+static void u32_delete_key_rcu(struct rcu_head *rcu)
+{
+ struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
+
+ u32_destroy_key(key->tp, key, false);
+}
+
+/* u32_delete_key_freepf_rcu is the rcu callback variant
+ * that free's the entire structure including the statistics
+ * percpu variables. Only use this if the key is not a copy
+ * returned by u32_init_knode(). See u32_delete_key_rcu()
+ * for the variant that should be used with keys return from
+ * u32_init_knode()
+ */
+static void u32_delete_key_freepf_rcu(struct rcu_head *rcu)
+{
+ struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
+
+ u32_destroy_key(key->tp, key, true);
+}
+
static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
{
- struct tc_u_knode **kp;
- struct tc_u_hnode *ht = key->ht_up;
+ struct tc_u_knode __rcu **kp;
+ struct tc_u_knode *pkp;
+ struct tc_u_hnode *ht = rtnl_dereference(key->ht_up);
if (ht) {
- for (kp = &ht->ht[TC_U32_HASH(key->handle)]; *kp; kp = &(*kp)->next) {
- if (*kp == key) {
- tcf_tree_lock(tp);
- *kp = key->next;
- tcf_tree_unlock(tp);
-
- u32_destroy_key(tp, key);
+ kp = &ht->ht[TC_U32_HASH(key->handle)];
+ for (pkp = rtnl_dereference(*kp); pkp;
+ kp = &pkp->next, pkp = rtnl_dereference(*kp)) {
+ if (pkp == key) {
+ RCU_INIT_POINTER(*kp, key->next);
+
+ tcf_unbind_filter(tp, &key->res);
+ call_rcu(&key->rcu, u32_delete_key_freepf_rcu);
return 0;
}
}
@@ -375,10 +431,11 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
unsigned int h;
for (h = 0; h <= ht->divisor; h++) {
- while ((n = ht->ht[h]) != NULL) {
- ht->ht[h] = n->next;
-
- u32_destroy_key(tp, n);
+ while ((n = rtnl_dereference(ht->ht[h])) != NULL) {
+ RCU_INIT_POINTER(ht->ht[h],
+ rtnl_dereference(n->next));
+ tcf_unbind_filter(tp, &n->res);
+ call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
}
}
}
@@ -386,28 +443,31 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
{
struct tc_u_common *tp_c = tp->data;
- struct tc_u_hnode **hn;
+ struct tc_u_hnode __rcu **hn;
+ struct tc_u_hnode *phn;
WARN_ON(ht->refcnt);
u32_clear_hnode(tp, ht);
- for (hn = &tp_c->hlist; *hn; hn = &(*hn)->next) {
- if (*hn == ht) {
- *hn = ht->next;
- kfree(ht);
+ hn = &tp_c->hlist;
+ for (phn = rtnl_dereference(*hn);
+ phn;
+ hn = &phn->next, phn = rtnl_dereference(*hn)) {
+ if (phn == ht) {
+ RCU_INIT_POINTER(*hn, ht->next);
+ kfree_rcu(ht, rcu);
return 0;
}
}
- WARN_ON(1);
return -ENOENT;
}
static void u32_destroy(struct tcf_proto *tp)
{
struct tc_u_common *tp_c = tp->data;
- struct tc_u_hnode *root_ht = tp->root;
+ struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
WARN_ON(root_ht == NULL);
@@ -419,17 +479,16 @@ static void u32_destroy(struct tcf_proto *tp)
tp->q->u32_node = NULL;
- for (ht = tp_c->hlist; ht; ht = ht->next) {
+ for (ht = rtnl_dereference(tp_c->hlist);
+ ht;
+ ht = rtnl_dereference(ht->next)) {
ht->refcnt--;
u32_clear_hnode(tp, ht);
}
- while ((ht = tp_c->hlist) != NULL) {
- tp_c->hlist = ht->next;
-
- WARN_ON(ht->refcnt != 0);
-
- kfree(ht);
+ while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) {
+ RCU_INIT_POINTER(tp_c->hlist, ht->next);
+ kfree_rcu(ht, rcu);
}
kfree(tp_c);
@@ -441,6 +500,7 @@ static void u32_destroy(struct tcf_proto *tp)
static int u32_delete(struct tcf_proto *tp, unsigned long arg)
{
struct tc_u_hnode *ht = (struct tc_u_hnode *)arg;
+ struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
if (ht == NULL)
return 0;
@@ -448,7 +508,7 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg)
if (TC_U32_KEY(ht->handle))
return u32_delete_key(tp, (struct tc_u_knode *)ht);
- if (tp->root == ht)
+ if (root_ht == ht)
return -EINVAL;
if (ht->refcnt == 1) {
@@ -471,7 +531,9 @@ static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
if (!bitmap)
return handle | 0xFFF;
- for (n = ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
+ for (n = rtnl_dereference(ht->ht[TC_U32_HASH(handle)]);
+ n;
+ n = rtnl_dereference(n->next))
set_bit(TC_U32_NODE(n->handle), bitmap);
i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800);
@@ -521,10 +583,8 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
ht_down->refcnt++;
}
- tcf_tree_lock(tp);
- ht_old = n->ht_down;
- n->ht_down = ht_down;
- tcf_tree_unlock(tp);
+ ht_old = rtnl_dereference(n->ht_down);
+ rcu_assign_pointer(n->ht_down, ht_down);
if (ht_old)
ht_old->refcnt--;
@@ -547,10 +607,86 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
return 0;
errout:
- tcf_exts_destroy(tp, &e);
+ tcf_exts_destroy(&e);
return err;
}
+static void u32_replace_knode(struct tcf_proto *tp,
+ struct tc_u_common *tp_c,
+ struct tc_u_knode *n)
+{
+ struct tc_u_knode __rcu **ins;
+ struct tc_u_knode *pins;
+ struct tc_u_hnode *ht;
+
+ if (TC_U32_HTID(n->handle) == TC_U32_ROOT)
+ ht = rtnl_dereference(tp->root);
+ else
+ ht = u32_lookup_ht(tp_c, TC_U32_HTID(n->handle));
+
+ ins = &ht->ht[TC_U32_HASH(n->handle)];
+
+ /* The node must always exist for it to be replaced if this is not the
+ * case then something went very wrong elsewhere.
+ */
+ for (pins = rtnl_dereference(*ins); ;
+ ins = &pins->next, pins = rtnl_dereference(*ins))
+ if (pins->handle == n->handle)
+ break;
+
+ RCU_INIT_POINTER(n->next, pins->next);
+ rcu_assign_pointer(*ins, n);
+}
+
+static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
+ struct tc_u_knode *n)
+{
+ struct tc_u_knode *new;
+ struct tc_u32_sel *s = &n->sel;
+
+ new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key),
+ GFP_KERNEL);
+
+ if (!new)
+ return NULL;
+
+ RCU_INIT_POINTER(new->next, n->next);
+ new->handle = n->handle;
+ RCU_INIT_POINTER(new->ht_up, n->ht_up);
+
+#ifdef CONFIG_NET_CLS_IND
+ new->ifindex = n->ifindex;
+#endif
+ new->fshift = n->fshift;
+ new->res = n->res;
+ RCU_INIT_POINTER(new->ht_down, n->ht_down);
+
+ /* bump reference count as long as we hold pointer to structure */
+ if (new->ht_down)
+ new->ht_down->refcnt++;
+
+#ifdef CONFIG_CLS_U32_PERF
+ /* Statistics may be incremented by readers during update
+ * so we must keep them in tact. When the node is later destroyed
+ * a special destroy call must be made to not free the pf memory.
+ */
+ new->pf = n->pf;
+#endif
+
+#ifdef CONFIG_CLS_U32_MARK
+ new->val = n->val;
+ new->mask = n->mask;
+ /* Similarly success statistics must be moved as pointers */
+ new->pcpu_success = n->pcpu_success;
+#endif
+ new->tp = tp;
+ memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
+
+ tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE);
+
+ return new;
+}
+
static int u32_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
struct nlattr **tca,
@@ -564,6 +700,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
struct nlattr *tb[TCA_U32_MAX + 1];
u32 htid;
int err;
+#ifdef CONFIG_CLS_U32_PERF
+ size_t size;
+#endif
if (opt == NULL)
return handle ? -EINVAL : 0;
@@ -574,11 +713,28 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
n = (struct tc_u_knode *)*arg;
if (n) {
+ struct tc_u_knode *new;
+
if (TC_U32_KEY(n->handle) == 0)
return -EINVAL;
- return u32_set_parms(net, tp, base, n->ht_up, n, tb,
- tca[TCA_RATE], ovr);
+ new = u32_init_knode(tp, n);
+ if (!new)
+ return -ENOMEM;
+
+ err = u32_set_parms(net, tp, base,
+ rtnl_dereference(n->ht_up), new, tb,
+ tca[TCA_RATE], ovr);
+
+ if (err) {
+ u32_destroy_key(tp, new, false);
+ return err;
+ }
+
+ u32_replace_knode(tp, tp_c, new);
+ tcf_unbind_filter(tp, &n->res);
+ call_rcu(&n->rcu, u32_delete_key_rcu);
+ return 0;
}
if (tb[TCA_U32_DIVISOR]) {
@@ -601,8 +757,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
ht->divisor = divisor;
ht->handle = handle;
ht->prio = tp->prio;
- ht->next = tp_c->hlist;
- tp_c->hlist = ht;
+ RCU_INIT_POINTER(ht->next, tp_c->hlist);
+ rcu_assign_pointer(tp_c->hlist, ht);
*arg = (unsigned long)ht;
return 0;
}
@@ -610,7 +766,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
if (tb[TCA_U32_HASH]) {
htid = nla_get_u32(tb[TCA_U32_HASH]);
if (TC_U32_HTID(htid) == TC_U32_ROOT) {
- ht = tp->root;
+ ht = rtnl_dereference(tp->root);
htid = ht->handle;
} else {
ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid));
@@ -618,7 +774,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
}
} else {
- ht = tp->root;
+ ht = rtnl_dereference(tp->root);
htid = ht->handle;
}
@@ -642,46 +798,62 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return -ENOBUFS;
#ifdef CONFIG_CLS_U32_PERF
- n->pf = kzalloc(sizeof(struct tc_u32_pcnt) + s->nkeys*sizeof(u64), GFP_KERNEL);
- if (n->pf == NULL) {
+ size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64);
+ n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt));
+ if (!n->pf) {
kfree(n);
return -ENOBUFS;
}
#endif
memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
- n->ht_up = ht;
+ RCU_INIT_POINTER(n->ht_up, ht);
n->handle = handle;
n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
+ n->tp = tp;
#ifdef CONFIG_CLS_U32_MARK
+ n->pcpu_success = alloc_percpu(u32);
+ if (!n->pcpu_success) {
+ err = -ENOMEM;
+ goto errout;
+ }
+
if (tb[TCA_U32_MARK]) {
struct tc_u32_mark *mark;
mark = nla_data(tb[TCA_U32_MARK]);
- memcpy(&n->mark, mark, sizeof(struct tc_u32_mark));
- n->mark.success = 0;
+ n->val = mark->val;
+ n->mask = mark->mask;
}
#endif
err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr);
if (err == 0) {
- struct tc_u_knode **ins;
- for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next)
- if (TC_U32_NODE(handle) < TC_U32_NODE((*ins)->handle))
+ struct tc_u_knode __rcu **ins;
+ struct tc_u_knode *pins;
+
+ ins = &ht->ht[TC_U32_HASH(handle)];
+ for (pins = rtnl_dereference(*ins); pins;
+ ins = &pins->next, pins = rtnl_dereference(*ins))
+ if (TC_U32_NODE(handle) < TC_U32_NODE(pins->handle))
break;
- n->next = *ins;
- tcf_tree_lock(tp);
- *ins = n;
- tcf_tree_unlock(tp);
+ RCU_INIT_POINTER(n->next, pins);
+ rcu_assign_pointer(*ins, n);
*arg = (unsigned long)n;
return 0;
}
+
+#ifdef CONFIG_CLS_U32_MARK
+ free_percpu(n->pcpu_success);
+errout:
+#endif
+
#ifdef CONFIG_CLS_U32_PERF
- kfree(n->pf);
+ free_percpu(n->pf);
#endif
kfree(n);
return err;
@@ -697,7 +869,9 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
if (arg->stop)
return;
- for (ht = tp_c->hlist; ht; ht = ht->next) {
+ for (ht = rtnl_dereference(tp_c->hlist);
+ ht;
+ ht = rtnl_dereference(ht->next)) {
if (ht->prio != tp->prio)
continue;
if (arg->count >= arg->skip) {
@@ -708,7 +882,9 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
}
arg->count++;
for (h = 0; h <= ht->divisor; h++) {
- for (n = ht->ht[h]; n; n = n->next) {
+ for (n = rtnl_dereference(ht->ht[h]);
+ n;
+ n = rtnl_dereference(n->next)) {
if (arg->count < arg->skip) {
arg->count++;
continue;
@@ -727,6 +903,7 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
struct tc_u_knode *n = (struct tc_u_knode *)fh;
+ struct tc_u_hnode *ht_up, *ht_down;
struct nlattr *nest;
if (n == NULL)
@@ -745,11 +922,18 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor))
goto nla_put_failure;
} else {
+#ifdef CONFIG_CLS_U32_PERF
+ struct tc_u32_pcnt *gpf;
+ int cpu;
+#endif
+
if (nla_put(skb, TCA_U32_SEL,
sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
&n->sel))
goto nla_put_failure;
- if (n->ht_up) {
+
+ ht_up = rtnl_dereference(n->ht_up);
+ if (ht_up) {
u32 htid = n->handle & 0xFFFFF000;
if (nla_put_u32(skb, TCA_U32_HASH, htid))
goto nla_put_failure;
@@ -757,14 +941,28 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
if (n->res.classid &&
nla_put_u32(skb, TCA_U32_CLASSID, n->res.classid))
goto nla_put_failure;
- if (n->ht_down &&
- nla_put_u32(skb, TCA_U32_LINK, n->ht_down->handle))
+
+ ht_down = rtnl_dereference(n->ht_down);
+ if (ht_down &&
+ nla_put_u32(skb, TCA_U32_LINK, ht_down->handle))
goto nla_put_failure;
#ifdef CONFIG_CLS_U32_MARK
- if ((n->mark.val || n->mark.mask) &&
- nla_put(skb, TCA_U32_MARK, sizeof(n->mark), &n->mark))
- goto nla_put_failure;
+ if ((n->val || n->mask)) {
+ struct tc_u32_mark mark = {.val = n->val,
+ .mask = n->mask,
+ .success = 0};
+ int cpum;
+
+ for_each_possible_cpu(cpum) {
+ __u32 cnt = *per_cpu_ptr(n->pcpu_success, cpum);
+
+ mark.success += cnt;
+ }
+
+ if (nla_put(skb, TCA_U32_MARK, sizeof(mark), &mark))
+ goto nla_put_failure;
+ }
#endif
if (tcf_exts_dump(skb, &n->exts) < 0)
@@ -779,10 +977,29 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
}
#endif
#ifdef CONFIG_CLS_U32_PERF
+ gpf = kzalloc(sizeof(struct tc_u32_pcnt) +
+ n->sel.nkeys * sizeof(u64),
+ GFP_KERNEL);
+ if (!gpf)
+ goto nla_put_failure;
+
+ for_each_possible_cpu(cpu) {
+ int i;
+ struct tc_u32_pcnt *pf = per_cpu_ptr(n->pf, cpu);
+
+ gpf->rcnt += pf->rcnt;
+ gpf->rhit += pf->rhit;
+ for (i = 0; i < n->sel.nkeys; i++)
+ gpf->kcnts[i] += pf->kcnts[i];
+ }
+
if (nla_put(skb, TCA_U32_PCNT,
sizeof(struct tc_u32_pcnt) + n->sel.nkeys*sizeof(u64),
- n->pf))
+ gpf)) {
+ kfree(gpf);
goto nla_put_failure;
+ }
+ kfree(gpf);
#endif
}
diff --git a/net/sched/em_canid.c b/net/sched/em_canid.c
index bfd34e4c1afc..ddd883ca55b2 100644
--- a/net/sched/em_canid.c
+++ b/net/sched/em_canid.c
@@ -120,12 +120,11 @@ static int em_canid_match(struct sk_buff *skb, struct tcf_ematch *m,
return match;
}
-static int em_canid_change(struct tcf_proto *tp, void *data, int len,
+static int em_canid_change(struct net *net, void *data, int len,
struct tcf_ematch *m)
{
struct can_filter *conf = data; /* Array with rules */
struct canid_match *cm;
- struct canid_match *cm_old = (struct canid_match *)m->data;
int i;
if (!len)
@@ -181,16 +180,10 @@ static int em_canid_change(struct tcf_proto *tp, void *data, int len,
m->datalen = sizeof(struct canid_match) + len;
m->data = (unsigned long)cm;
-
- if (cm_old != NULL) {
- pr_err("canid: Configuring an existing ematch!\n");
- kfree(cm_old);
- }
-
return 0;
}
-static void em_canid_destroy(struct tcf_proto *tp, struct tcf_ematch *m)
+static void em_canid_destroy(struct tcf_ematch *m)
{
struct canid_match *cm = em_canid_priv(m);
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index 527aeb7a3ff0..5b4a4efe468c 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -19,12 +19,11 @@
#include <net/ip.h>
#include <net/pkt_cls.h>
-static int em_ipset_change(struct tcf_proto *tp, void *data, int data_len,
+static int em_ipset_change(struct net *net, void *data, int data_len,
struct tcf_ematch *em)
{
struct xt_set_info *set = data;
ip_set_id_t index;
- struct net *net = dev_net(qdisc_dev(tp->q));
if (data_len != sizeof(*set))
return -EINVAL;
@@ -42,11 +41,11 @@ static int em_ipset_change(struct tcf_proto *tp, void *data, int data_len,
return -ENOMEM;
}
-static void em_ipset_destroy(struct tcf_proto *p, struct tcf_ematch *em)
+static void em_ipset_destroy(struct tcf_ematch *em)
{
const struct xt_set_info *set = (const void *) em->data;
if (set) {
- ip_set_nfnl_put(dev_net(qdisc_dev(p->q)), set->index);
+ ip_set_nfnl_put(em->net, set->index);
kfree((void *) em->data);
}
}
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 9b8c0b0e60d7..c8f8c399b99a 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -856,7 +856,7 @@ static const struct nla_policy meta_policy[TCA_EM_META_MAX + 1] = {
[TCA_EM_META_HDR] = { .len = sizeof(struct tcf_meta_hdr) },
};
-static int em_meta_change(struct tcf_proto *tp, void *data, int len,
+static int em_meta_change(struct net *net, void *data, int len,
struct tcf_ematch *m)
{
int err;
@@ -908,7 +908,7 @@ errout:
return err;
}
-static void em_meta_destroy(struct tcf_proto *tp, struct tcf_ematch *m)
+static void em_meta_destroy(struct tcf_ematch *m)
{
if (m)
meta_delete((struct meta_match *) m->data);
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c
index a3bed07a008b..df3110d69585 100644
--- a/net/sched/em_nbyte.c
+++ b/net/sched/em_nbyte.c
@@ -23,7 +23,7 @@ struct nbyte_data {
char pattern[0];
};
-static int em_nbyte_change(struct tcf_proto *tp, void *data, int data_len,
+static int em_nbyte_change(struct net *net, void *data, int data_len,
struct tcf_ematch *em)
{
struct tcf_em_nbyte *nbyte = data;
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index 15d353d2e4be..f03c3de16c27 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -45,7 +45,7 @@ static int em_text_match(struct sk_buff *skb, struct tcf_ematch *m,
return skb_find_text(skb, from, to, tm->config, &state) != UINT_MAX;
}
-static int em_text_change(struct tcf_proto *tp, void *data, int len,
+static int em_text_change(struct net *net, void *data, int len,
struct tcf_ematch *m)
{
struct text_match *tm;
@@ -100,7 +100,7 @@ retry:
return 0;
}
-static void em_text_destroy(struct tcf_proto *tp, struct tcf_ematch *m)
+static void em_text_destroy(struct tcf_ematch *m)
{
if (EM_TEXT_PRIV(m) && EM_TEXT_PRIV(m)->config)
textsearch_destroy(EM_TEXT_PRIV(m)->config);
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 3a633debb6df..6742200b1307 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -178,6 +178,7 @@ static int tcf_em_validate(struct tcf_proto *tp,
struct tcf_ematch_hdr *em_hdr = nla_data(nla);
int data_len = nla_len(nla) - sizeof(*em_hdr);
void *data = (void *) em_hdr + sizeof(*em_hdr);
+ struct net *net = dev_net(qdisc_dev(tp->q));
if (!TCF_EM_REL_VALID(em_hdr->flags))
goto errout;
@@ -240,7 +241,7 @@ static int tcf_em_validate(struct tcf_proto *tp,
goto errout;
if (em->ops->change) {
- err = em->ops->change(tp, data, data_len, em);
+ err = em->ops->change(net, data, data_len, em);
if (err < 0)
goto errout;
} else if (data_len > 0) {
@@ -271,6 +272,7 @@ static int tcf_em_validate(struct tcf_proto *tp,
em->matchid = em_hdr->matchid;
em->flags = em_hdr->flags;
em->datalen = data_len;
+ em->net = net;
err = 0;
errout:
@@ -378,7 +380,7 @@ errout:
return err;
errout_abort:
- tcf_em_tree_destroy(tp, tree);
+ tcf_em_tree_destroy(tree);
return err;
}
EXPORT_SYMBOL(tcf_em_tree_validate);
@@ -393,7 +395,7 @@ EXPORT_SYMBOL(tcf_em_tree_validate);
* tcf_em_tree_validate()/tcf_em_tree_change(). You must ensure that
* the ematch tree is not in use before calling this function.
*/
-void tcf_em_tree_destroy(struct tcf_proto *tp, struct tcf_ematch_tree *tree)
+void tcf_em_tree_destroy(struct tcf_ematch_tree *tree)
{
int i;
@@ -405,7 +407,7 @@ void tcf_em_tree_destroy(struct tcf_proto *tp, struct tcf_ematch_tree *tree)
if (em->ops) {
if (em->ops->destroy)
- em->ops->destroy(tp, em);
+ em->ops->destroy(em);
else if (!tcf_em_is_simple(em))
kfree((void *) em->data);
module_put(em->ops->owner);
@@ -526,9 +528,12 @@ pop_stack:
match_idx = stack[--stackp];
cur_match = tcf_em_get_match(tree, match_idx);
- if (tcf_em_early_end(cur_match, res))
+ if (tcf_em_is_inverted(cur_match))
+ res = !res;
+
+ if (tcf_em_early_end(cur_match, res)) {
goto pop_stack;
- else {
+ } else {
match_idx++;
goto proceed;
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 58bed7599db7..2cf61b3e633c 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -578,31 +578,34 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
timer);
+ rcu_read_lock();
qdisc_unthrottled(wd->qdisc);
__netif_schedule(qdisc_root(wd->qdisc));
+ rcu_read_unlock();
return HRTIMER_NORESTART;
}
void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
{
- hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
wd->timer.function = qdisc_watchdog;
wd->qdisc = qdisc;
}
EXPORT_SYMBOL(qdisc_watchdog_init);
-void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
+void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires, bool throttle)
{
if (test_bit(__QDISC_STATE_DEACTIVATED,
&qdisc_root_sleeping(wd->qdisc)->state))
return;
- qdisc_throttled(wd->qdisc);
+ if (throttle)
+ qdisc_throttled(wd->qdisc);
hrtimer_start(&wd->timer,
ns_to_ktime(expires),
- HRTIMER_MODE_ABS);
+ HRTIMER_MODE_ABS_PINNED);
}
EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
@@ -763,7 +766,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
cops->put(sch, cl);
}
sch->q.qlen -= n;
- sch->qstats.drops += drops;
+ __qdisc_qstats_drop(sch, drops);
}
}
EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
@@ -942,6 +945,17 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
sch->handle = handle;
if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
+ if (qdisc_is_percpu_stats(sch)) {
+ sch->cpu_bstats =
+ alloc_percpu(struct gnet_stats_basic_cpu);
+ if (!sch->cpu_bstats)
+ goto err_out4;
+
+ sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
+ if (!sch->cpu_qstats)
+ goto err_out4;
+ }
+
if (tca[TCA_STAB]) {
stab = qdisc_get_stab(tca[TCA_STAB]);
if (IS_ERR(stab)) {
@@ -964,8 +978,11 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
else
root_lock = qdisc_lock(sch);
- err = gen_new_estimator(&sch->bstats, &sch->rate_est,
- root_lock, tca[TCA_RATE]);
+ err = gen_new_estimator(&sch->bstats,
+ sch->cpu_bstats,
+ &sch->rate_est,
+ root_lock,
+ tca[TCA_RATE]);
if (err)
goto err_out4;
}
@@ -984,6 +1001,8 @@ err_out:
return NULL;
err_out4:
+ free_percpu(sch->cpu_bstats);
+ free_percpu(sch->cpu_qstats);
/*
* Any broken qdiscs that would require a ops->reset() here?
* The qdisc was never in action so it shouldn't be necessary.
@@ -1022,9 +1041,11 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
because change can't be undone. */
if (sch->flags & TCQ_F_MQROOT)
goto out;
- gen_replace_estimator(&sch->bstats, &sch->rate_est,
- qdisc_root_sleeping_lock(sch),
- tca[TCA_RATE]);
+ gen_replace_estimator(&sch->bstats,
+ sch->cpu_bstats,
+ &sch->rate_est,
+ qdisc_root_sleeping_lock(sch),
+ tca[TCA_RATE]);
}
out:
return 0;
@@ -1299,11 +1320,14 @@ graft:
static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
u32 portid, u32 seq, u16 flags, int event)
{
+ struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
+ struct gnet_stats_queue __percpu *cpu_qstats = NULL;
struct tcmsg *tcm;
struct nlmsghdr *nlh;
unsigned char *b = skb_tail_pointer(skb);
struct gnet_dump d;
struct qdisc_size_table *stab;
+ __u32 qlen;
cond_resched();
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
@@ -1321,7 +1345,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
goto nla_put_failure;
if (q->ops->dump && q->ops->dump(q, skb) < 0)
goto nla_put_failure;
- q->qstats.qlen = q->q.qlen;
+ qlen = q->q.qlen;
stab = rtnl_dereference(q->stab);
if (stab && qdisc_dump_stab(skb, stab) < 0)
@@ -1334,9 +1358,14 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
goto nla_put_failure;
- if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
+ if (qdisc_is_percpu_stats(q)) {
+ cpu_bstats = q->cpu_bstats;
+ cpu_qstats = q->cpu_qstats;
+ }
+
+ if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats) < 0 ||
gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 ||
- gnet_stats_copy_queue(&d, &q->qstats) < 0)
+ gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
goto nla_put_failure;
if (gnet_stats_finish_copy(&d) < 0)
@@ -1781,7 +1810,7 @@ int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
__be16 protocol = skb->protocol;
int err;
- for (; tp; tp = tp->next) {
+ for (; tp; tp = rcu_dereference_bh(tp->next)) {
if (tp->protocol != protocol &&
tp->protocol != htons(ETH_P_ALL))
continue;
@@ -1833,15 +1862,15 @@ void tcf_destroy(struct tcf_proto *tp)
{
tp->ops->destroy(tp);
module_put(tp->ops->owner);
- kfree(tp);
+ kfree_rcu(tp, rcu);
}
-void tcf_destroy_chain(struct tcf_proto **fl)
+void tcf_destroy_chain(struct tcf_proto __rcu **fl)
{
struct tcf_proto *tp;
- while ((tp = *fl) != NULL) {
- *fl = tp->next;
+ while ((tp = rtnl_dereference(*fl)) != NULL) {
+ RCU_INIT_POINTER(*fl, tp->next);
tcf_destroy(tp);
}
}
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 8449b337f9e3..e3e2cc5fd068 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -41,7 +41,7 @@
struct atm_flow_data {
struct Qdisc *q; /* FIFO, TBF, etc. */
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */
void (*old_pop)(struct atm_vcc *vcc,
struct sk_buff *skb); /* chaining */
@@ -273,7 +273,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
error = -ENOBUFS;
goto err_out;
}
- flow->filter_list = NULL;
+ RCU_INIT_POINTER(flow->filter_list, NULL);
flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
if (!flow->q)
flow->q = &noop_qdisc;
@@ -311,7 +311,7 @@ static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
if (list_empty(&flow->list))
return -EINVAL;
- if (flow->filter_list || flow == &p->link)
+ if (rcu_access_pointer(flow->filter_list) || flow == &p->link)
return -EBUSY;
/*
* Reference count must be 2: one for "keepalive" (set at class
@@ -345,7 +345,8 @@ static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker)
}
}
-static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **atm_tc_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
struct atm_flow_data *flow = (struct atm_flow_data *)cl;
@@ -369,11 +370,12 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
flow = NULL;
if (TC_H_MAJ(skb->priority) != sch->handle ||
!(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) {
+ struct tcf_proto *fl;
+
list_for_each_entry(flow, &p->flows, list) {
- if (flow->filter_list) {
- result = tc_classify_compat(skb,
- flow->filter_list,
- &res);
+ fl = rcu_dereference_bh(flow->filter_list);
+ if (fl) {
+ result = tc_classify_compat(skb, fl, &res);
if (result < 0)
continue;
flow = (struct atm_flow_data *)res.class;
@@ -415,7 +417,7 @@ done:
if (ret != NET_XMIT_SUCCESS) {
drop: __maybe_unused
if (net_xmit_drop_count(ret)) {
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
if (flow)
flow->qstats.drops++;
}
@@ -544,7 +546,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
if (!p->link.q)
p->link.q = &noop_qdisc;
pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
- p->link.filter_list = NULL;
+ RCU_INIT_POINTER(p->link.filter_list, NULL);
p->link.vcc = NULL;
p->link.sock = NULL;
p->link.classid = sch->handle;
@@ -635,10 +637,8 @@ atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
{
struct atm_flow_data *flow = (struct atm_flow_data *)arg;
- flow->qstats.qlen = flow->q->q.qlen;
-
- if (gnet_stats_copy_basic(d, &flow->bstats) < 0 ||
- gnet_stats_copy_queue(d, &flow->qstats) < 0)
+ if (gnet_stats_copy_basic(d, NULL, &flow->bstats) < 0 ||
+ gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0)
return -1;
return 0;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index ead526467cca..beeb75f80fdb 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -133,7 +133,7 @@ struct cbq_class {
struct gnet_stats_rate_est64 rate_est;
struct tc_cbq_xstats xstats;
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
int refcnt;
int filters;
@@ -159,7 +159,6 @@ struct cbq_sched_data {
struct cbq_class *tx_borrowed;
int tx_len;
psched_time_t now; /* Cached timestamp */
- psched_time_t now_rt; /* Cached real time */
unsigned int pmask;
struct hrtimer delay_timer;
@@ -222,6 +221,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
struct cbq_class **defmap;
struct cbq_class *cl = NULL;
u32 prio = skb->priority;
+ struct tcf_proto *fl;
struct tcf_result res;
/*
@@ -236,11 +236,12 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
int result = 0;
defmap = head->defaults;
+ fl = rcu_dereference_bh(head->filter_list);
/*
* Step 2+n. Apply classifier.
*/
- if (!head->filter_list ||
- (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
+ result = tc_classify_compat(skb, fl, &res);
+ if (!fl || result < 0)
goto fallback;
cl = (void *)res.class;
@@ -353,12 +354,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
int toplevel = q->toplevel;
if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
- psched_time_t now;
- psched_tdiff_t incr;
-
- now = psched_get_time();
- incr = now - q->now_rt;
- now = q->now + incr;
+ psched_time_t now = psched_get_time();
do {
if (cl->undertime < now) {
@@ -381,7 +377,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
#endif
if (cl == NULL) {
if (ret & __NET_XMIT_BYPASS)
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
kfree_skb(skb);
return ret;
}
@@ -399,7 +395,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
if (net_xmit_drop_count(ret)) {
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
cbq_mark_toplevel(q, cl);
cl->qstats.drops++;
}
@@ -621,7 +617,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
time = ktime_set(0, 0);
time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
- hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
+ hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS_PINNED);
}
qdisc_unthrottled(sch);
@@ -654,11 +650,11 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
return 0;
}
if (net_xmit_drop_count(ret))
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
return 0;
}
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
return -1;
}
#endif
@@ -700,8 +696,13 @@ cbq_update(struct cbq_sched_data *q)
struct cbq_class *this = q->tx_class;
struct cbq_class *cl = this;
int len = q->tx_len;
+ psched_time_t now;
q->tx_class = NULL;
+ /* Time integrator. We calculate EOS time
+ * by adding expected packet transmission time.
+ */
+ now = q->now + L2T(&q->link, len);
for ( ; cl; cl = cl->share) {
long avgidle = cl->avgidle;
@@ -717,7 +718,7 @@ cbq_update(struct cbq_sched_data *q)
* idle = (now - last) - last_pktlen/rate
*/
- idle = q->now - cl->last;
+ idle = now - cl->last;
if ((unsigned long)idle > 128*1024*1024) {
avgidle = cl->maxidle;
} else {
@@ -761,7 +762,7 @@ cbq_update(struct cbq_sched_data *q)
idle -= L2T(&q->link, len);
idle += L2T(cl, len);
- cl->undertime = q->now + idle;
+ cl->undertime = now + idle;
} else {
/* Underlimit */
@@ -771,7 +772,8 @@ cbq_update(struct cbq_sched_data *q)
else
cl->avgidle = avgidle;
}
- cl->last = q->now;
+ if ((s64)(now - cl->last) > 0)
+ cl->last = now;
}
cbq_update_toplevel(q, this, q->tx_borrowed);
@@ -943,31 +945,13 @@ cbq_dequeue(struct Qdisc *sch)
struct sk_buff *skb;
struct cbq_sched_data *q = qdisc_priv(sch);
psched_time_t now;
- psched_tdiff_t incr;
now = psched_get_time();
- incr = now - q->now_rt;
-
- if (q->tx_class) {
- psched_tdiff_t incr2;
- /* Time integrator. We calculate EOS time
- * by adding expected packet transmission time.
- * If real time is greater, we warp artificial clock,
- * so that:
- *
- * cbq_time = max(real_time, work);
- */
- incr2 = L2T(&q->link, q->tx_len);
- q->now += incr2;
+
+ if (q->tx_class)
cbq_update(q);
- if ((incr -= incr2) < 0)
- incr = 0;
- q->now += incr;
- } else {
- if (now > q->now)
- q->now = now;
- }
- q->now_rt = now;
+
+ q->now = now;
for (;;) {
q->wd_expires = 0;
@@ -1011,7 +995,7 @@ cbq_dequeue(struct Qdisc *sch)
*/
if (sch->q.qlen) {
- sch->qstats.overlimits++;
+ qdisc_qstats_overlimit(sch);
if (q->wd_expires)
qdisc_watchdog_schedule(&q->watchdog,
now + q->wd_expires);
@@ -1223,7 +1207,6 @@ cbq_reset(struct Qdisc *sch)
hrtimer_cancel(&q->delay_timer);
q->toplevel = TC_CBQ_MAXLEVEL;
q->now = psched_get_time();
- q->now_rt = q->now;
for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
q->active[prio] = NULL;
@@ -1403,11 +1386,10 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
q->link.minidle = -0x7FFFFFFF;
qdisc_watchdog_init(&q->watchdog, sch);
- hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
q->delay_timer.function = cbq_undelay;
q->toplevel = TC_CBQ_MAXLEVEL;
q->now = psched_get_time();
- q->now_rt = q->now;
cbq_link_class(&q->link);
@@ -1612,16 +1594,15 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
struct cbq_sched_data *q = qdisc_priv(sch);
struct cbq_class *cl = (struct cbq_class *)arg;
- cl->qstats.qlen = cl->q->q.qlen;
cl->xstats.avgidle = cl->avgidle;
cl->xstats.undertime = 0;
if (cl->undertime != PSCHED_PASTPERFECT)
cl->xstats.undertime = cl->undertime - q->now;
- if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
+ if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
- gnet_stats_copy_queue(d, &cl->qstats) < 0)
+ gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->q->q.qlen) < 0)
return -1;
return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
@@ -1777,7 +1758,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
}
if (tca[TCA_RATE]) {
- err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
+ err = gen_replace_estimator(&cl->bstats, NULL,
+ &cl->rate_est,
qdisc_root_sleeping_lock(sch),
tca[TCA_RATE]);
if (err) {
@@ -1870,7 +1852,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
goto failure;
if (tca[TCA_RATE]) {
- err = gen_new_estimator(&cl->bstats, &cl->rate_est,
+ err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
qdisc_root_sleeping_lock(sch),
tca[TCA_RATE]);
if (err) {
@@ -1974,7 +1956,8 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
return 0;
}
-static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
+static struct tcf_proto __rcu **cbq_find_tcf(struct Qdisc *sch,
+ unsigned long arg)
{
struct cbq_sched_data *q = qdisc_priv(sch);
struct cbq_class *cl = (struct cbq_class *)arg;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index ed30e436128b..c009eb9045ce 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -57,7 +57,7 @@ struct choke_sched_data {
/* Variables */
struct red_vars vars;
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
struct {
u32 prob_drop; /* Early probability drops */
u32 prob_mark; /* Early probability marks */
@@ -127,16 +127,22 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
if (idx == q->tail)
choke_zap_tail_holes(q);
- sch->qstats.backlog -= qdisc_pkt_len(skb);
+ qdisc_qstats_backlog_dec(sch, skb);
qdisc_drop(skb, sch);
qdisc_tree_decrease_qlen(sch, 1);
--sch->q.qlen;
}
+/* private part of skb->cb[] that a qdisc is allowed to use
+ * is limited to QDISC_CB_PRIV_LEN bytes.
+ * As a flow key might be too large, we store a part of it only.
+ */
+#define CHOKE_K_LEN min_t(u32, sizeof(struct flow_keys), QDISC_CB_PRIV_LEN - 3)
+
struct choke_skb_cb {
u16 classid;
u8 keys_valid;
- struct flow_keys keys;
+ u8 keys[QDISC_CB_PRIV_LEN - 3];
};
static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
@@ -163,22 +169,26 @@ static u16 choke_get_classid(const struct sk_buff *skb)
static bool choke_match_flow(struct sk_buff *skb1,
struct sk_buff *skb2)
{
+ struct flow_keys temp;
+
if (skb1->protocol != skb2->protocol)
return false;
if (!choke_skb_cb(skb1)->keys_valid) {
choke_skb_cb(skb1)->keys_valid = 1;
- skb_flow_dissect(skb1, &choke_skb_cb(skb1)->keys);
+ skb_flow_dissect(skb1, &temp);
+ memcpy(&choke_skb_cb(skb1)->keys, &temp, CHOKE_K_LEN);
}
if (!choke_skb_cb(skb2)->keys_valid) {
choke_skb_cb(skb2)->keys_valid = 1;
- skb_flow_dissect(skb2, &choke_skb_cb(skb2)->keys);
+ skb_flow_dissect(skb2, &temp);
+ memcpy(&choke_skb_cb(skb2)->keys, &temp, CHOKE_K_LEN);
}
return !memcmp(&choke_skb_cb(skb1)->keys,
&choke_skb_cb(skb2)->keys,
- sizeof(struct flow_keys));
+ CHOKE_K_LEN);
}
/*
@@ -193,9 +203,11 @@ static bool choke_classify(struct sk_buff *skb,
{
struct choke_sched_data *q = qdisc_priv(sch);
struct tcf_result res;
+ struct tcf_proto *fl;
int result;
- result = tc_classify(skb, q->filter_list, &res);
+ fl = rcu_dereference_bh(q->filter_list);
+ result = tc_classify(skb, fl, &res);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -249,7 +261,7 @@ static bool choke_match_random(const struct choke_sched_data *q,
return false;
oskb = choke_peek_random(q, pidx);
- if (q->filter_list)
+ if (rcu_access_pointer(q->filter_list))
return choke_get_classid(nskb) == choke_get_classid(oskb);
return choke_match_flow(oskb, nskb);
@@ -257,11 +269,11 @@ static bool choke_match_random(const struct choke_sched_data *q,
static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
+ int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
struct choke_sched_data *q = qdisc_priv(sch);
const struct red_parms *p = &q->parms;
- int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- if (q->filter_list) {
+ if (rcu_access_pointer(q->filter_list)) {
/* If using external classifiers, get result and record it. */
if (!choke_classify(skb, sch, &ret))
goto other_drop; /* Packet was eaten by filter */
@@ -290,7 +302,7 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (q->vars.qavg > p->qth_max) {
q->vars.qcount = -1;
- sch->qstats.overlimits++;
+ qdisc_qstats_overlimit(sch);
if (use_harddrop(q) || !use_ecn(q) ||
!INET_ECN_set_ce(skb)) {
q->stats.forced_drop++;
@@ -303,7 +315,7 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
q->vars.qcount = 0;
q->vars.qR = red_random(p);
- sch->qstats.overlimits++;
+ qdisc_qstats_overlimit(sch);
if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
q->stats.prob_drop++;
goto congestion_drop;
@@ -320,7 +332,7 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
q->tab[q->tail] = skb;
q->tail = (q->tail + 1) & q->tab_mask;
++sch->q.qlen;
- sch->qstats.backlog += qdisc_pkt_len(skb);
+ qdisc_qstats_backlog_inc(sch, skb);
return NET_XMIT_SUCCESS;
}
@@ -333,7 +345,7 @@ congestion_drop:
other_drop:
if (ret & __NET_XMIT_BYPASS)
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
kfree_skb(skb);
return ret;
}
@@ -353,7 +365,7 @@ static struct sk_buff *choke_dequeue(struct Qdisc *sch)
q->tab[q->head] = NULL;
choke_zap_head_holes(q);
--sch->q.qlen;
- sch->qstats.backlog -= qdisc_pkt_len(skb);
+ qdisc_qstats_backlog_dec(sch, skb);
qdisc_bstats_update(sch, skb);
return skb;
@@ -448,7 +460,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
ntab[tail++] = skb;
continue;
}
- sch->qstats.backlog -= qdisc_pkt_len(skb);
+ qdisc_qstats_backlog_dec(sch, skb);
--sch->q.qlen;
qdisc_drop(skb, sch);
}
@@ -554,7 +566,8 @@ static unsigned long choke_bind(struct Qdisc *sch, unsigned long parent,
return 0;
}
-static struct tcf_proto **choke_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **choke_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
{
struct choke_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index 2f9ab17db85a..de28f8e968e8 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -149,7 +149,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = __skb_dequeue(&sch->q);
- sch->qstats.backlog -= qdisc_pkt_len(skb);
+ qdisc_qstats_backlog_dec(sch, skb);
qdisc_drop(skb, sch);
}
qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 7bbbfe112192..338706092c27 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -35,7 +35,7 @@ struct drr_class {
struct drr_sched {
struct list_head active;
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
struct Qdisc_class_hash clhash;
};
@@ -88,7 +88,8 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (cl != NULL) {
if (tca[TCA_RATE]) {
- err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
+ err = gen_replace_estimator(&cl->bstats, NULL,
+ &cl->rate_est,
qdisc_root_sleeping_lock(sch),
tca[TCA_RATE]);
if (err)
@@ -116,7 +117,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
cl->qdisc = &noop_qdisc;
if (tca[TCA_RATE]) {
- err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
+ err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est,
qdisc_root_sleeping_lock(sch),
tca[TCA_RATE]);
if (err) {
@@ -184,7 +185,8 @@ static void drr_put_class(struct Qdisc *sch, unsigned long arg)
drr_destroy_class(sch, cl);
}
-static struct tcf_proto **drr_tcf_chain(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **drr_tcf_chain(struct Qdisc *sch,
+ unsigned long cl)
{
struct drr_sched *q = qdisc_priv(sch);
@@ -273,17 +275,16 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
struct gnet_dump *d)
{
struct drr_class *cl = (struct drr_class *)arg;
+ __u32 qlen = cl->qdisc->q.qlen;
struct tc_drr_stats xstats;
memset(&xstats, 0, sizeof(xstats));
- if (cl->qdisc->q.qlen) {
+ if (qlen)
xstats.deficit = cl->deficit;
- cl->qdisc->qstats.qlen = cl->qdisc->q.qlen;
- }
- if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
+ if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
- gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0)
+ gnet_stats_copy_queue(d, NULL, &cl->qdisc->qstats, qlen) < 0)
return -1;
return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
@@ -319,6 +320,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
struct drr_sched *q = qdisc_priv(sch);
struct drr_class *cl;
struct tcf_result res;
+ struct tcf_proto *fl;
int result;
if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
@@ -328,7 +330,8 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
}
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tc_classify(skb, q->filter_list, &res);
+ fl = rcu_dereference_bh(q->filter_list);
+ result = tc_classify(skb, fl, &res);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -356,7 +359,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
cl = drr_classify(skb, sch, &err);
if (cl == NULL) {
if (err & __NET_XMIT_BYPASS)
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
kfree_skb(skb);
return err;
}
@@ -365,7 +368,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (unlikely(err != NET_XMIT_SUCCESS)) {
if (net_xmit_drop_count(err)) {
cl->qstats.drops++;
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
}
return err;
}
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 49d6ef338b55..227114f27f94 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -37,7 +37,7 @@
struct dsmark_qdisc_data {
struct Qdisc *q;
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
u8 *mask; /* "owns" the array */
u8 *value;
u16 indices;
@@ -186,8 +186,8 @@ ignore:
}
}
-static inline struct tcf_proto **dsmark_find_tcf(struct Qdisc *sch,
- unsigned long cl)
+static inline struct tcf_proto __rcu **dsmark_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
{
struct dsmark_qdisc_data *p = qdisc_priv(sch);
return &p->filter_list;
@@ -229,7 +229,8 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
skb->tc_index = TC_H_MIN(skb->priority);
else {
struct tcf_result res;
- int result = tc_classify(skb, p->filter_list, &res);
+ struct tcf_proto *fl = rcu_dereference_bh(p->filter_list);
+ int result = tc_classify(skb, fl, &res);
pr_debug("result %d class 0x%04x\n", result, res.classid);
@@ -257,7 +258,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
err = qdisc_enqueue(skb, p->q);
if (err != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(err))
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
return err;
}
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index e15a9eb29087..2e2398cfc694 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -42,7 +42,7 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch)
/* queue full, remove one skb to fulfill the limit */
__qdisc_queue_drop_head(sch, &sch->q);
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
qdisc_enqueue_tail(skb, sch);
return NET_XMIT_CN;
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index ba32c2b005d0..cbd7e1fd23b4 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -290,7 +290,7 @@ static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow)
flow->head = skb->next;
skb->next = NULL;
flow->qlen--;
- sch->qstats.backlog -= qdisc_pkt_len(skb);
+ qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
}
return skb;
@@ -371,13 +371,12 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
f->qlen++;
if (skb_is_retransmit(skb))
q->stat_tcp_retrans++;
- sch->qstats.backlog += qdisc_pkt_len(skb);
+ qdisc_qstats_backlog_inc(sch, skb);
if (fq_flow_is_detached(f)) {
fq_flow_add_tail(&q->new_flows, f);
if (time_after(jiffies, f->age + q->flow_refill_delay))
f->credit = max_t(u32, f->credit, q->quantum);
q->inactive_flows--;
- qdisc_unthrottled(sch);
}
/* Note: this overwrites f->age */
@@ -385,7 +384,6 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (unlikely(f == &q->internal)) {
q->stat_internal_packets++;
- qdisc_unthrottled(sch);
}
sch->q.qlen++;
@@ -416,7 +414,7 @@ static void fq_check_throttled(struct fq_sched_data *q, u64 now)
static struct sk_buff *fq_dequeue(struct Qdisc *sch)
{
struct fq_sched_data *q = qdisc_priv(sch);
- u64 now = ktime_to_ns(ktime_get());
+ u64 now = ktime_get_ns();
struct fq_flow_head *head;
struct sk_buff *skb;
struct fq_flow *f;
@@ -433,7 +431,8 @@ begin:
if (!head->first) {
if (q->time_next_delayed_flow != ~0ULL)
qdisc_watchdog_schedule_ns(&q->watchdog,
- q->time_next_delayed_flow);
+ q->time_next_delayed_flow,
+ false);
return NULL;
}
}
@@ -495,7 +494,6 @@ begin:
}
out:
qdisc_bstats_update(sch, skb);
- qdisc_unthrottled(sch);
return skb;
}
@@ -787,7 +785,7 @@ nla_put_failure:
static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
{
struct fq_sched_data *q = qdisc_priv(sch);
- u64 now = ktime_to_ns(ktime_get());
+ u64 now = ktime_get_ns();
struct tc_fq_qd_stats st = {
.gc_flows = q->stat_gc_flows,
.highprio_packets = q->stat_internal_packets,
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 063b726bf1f8..b9ca32ebc1de 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -52,7 +52,7 @@ struct fq_codel_flow {
}; /* please try to keep this structure <= 64 bytes */
struct fq_codel_sched_data {
- struct tcf_proto *filter_list; /* optional external classifier */
+ struct tcf_proto __rcu *filter_list; /* optional external classifier */
struct fq_codel_flow *flows; /* Flows table [flows_cnt] */
u32 *backlogs; /* backlog table [flows_cnt] */
u32 flows_cnt; /* number of flows */
@@ -77,13 +77,15 @@ static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q,
hash = jhash_3words((__force u32)keys.dst,
(__force u32)keys.src ^ keys.ip_proto,
(__force u32)keys.ports, q->perturbation);
- return ((u64)hash * q->flows_cnt) >> 32;
+
+ return reciprocal_scale(hash, q->flows_cnt);
}
static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
int *qerr)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
+ struct tcf_proto *filter;
struct tcf_result res;
int result;
@@ -92,11 +94,12 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
TC_H_MIN(skb->priority) <= q->flows_cnt)
return TC_H_MIN(skb->priority);
- if (!q->filter_list)
+ filter = rcu_dereference(q->filter_list);
+ if (!filter)
return fq_codel_hash(q, skb) + 1;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tc_classify(skb, q->filter_list, &res);
+ result = tc_classify(skb, filter, &res);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -161,8 +164,8 @@ static unsigned int fq_codel_drop(struct Qdisc *sch)
q->backlogs[idx] -= len;
kfree_skb(skb);
sch->q.qlen--;
- sch->qstats.drops++;
- sch->qstats.backlog -= len;
+ qdisc_qstats_drop(sch);
+ qdisc_qstats_backlog_dec(sch, skb);
flow->dropped++;
return idx;
}
@@ -177,7 +180,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
idx = fq_codel_classify(skb, sch, &ret);
if (idx == 0) {
if (ret & __NET_XMIT_BYPASS)
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
kfree_skb(skb);
return ret;
}
@@ -187,7 +190,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
flow = &q->flows[idx];
flow_queue_add(flow, skb);
q->backlogs[idx] += qdisc_pkt_len(skb);
- sch->qstats.backlog += qdisc_pkt_len(skb);
+ qdisc_qstats_backlog_inc(sch, skb);
if (list_empty(&flow->flowchain)) {
list_add_tail(&flow->flowchain, &q->new_flows);
@@ -495,7 +498,8 @@ static void fq_codel_put(struct Qdisc *q, unsigned long cl)
{
}
-static struct tcf_proto **fq_codel_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **fq_codel_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
@@ -546,7 +550,7 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl,
qs.backlog = q->backlogs[idx];
qs.drops = flow->dropped;
}
- if (gnet_stats_copy_queue(d, &qs) < 0)
+ if (gnet_stats_copy_queue(d, NULL, &qs, 0) < 0)
return -1;
if (idx < q->flows_cnt)
return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index e1543b03e39d..6efca30894aa 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -47,7 +47,6 @@ EXPORT_SYMBOL(default_qdisc_ops);
static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
{
- skb_dst_force(skb);
q->gso_skb = skb;
q->qstats.requeues++;
q->q.qlen++; /* it's still part of the queue */
@@ -56,24 +55,56 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
return 0;
}
-static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
+static void try_bulk_dequeue_skb(struct Qdisc *q,
+ struct sk_buff *skb,
+ const struct netdev_queue *txq,
+ int *packets)
+{
+ int bytelimit = qdisc_avail_bulklimit(txq) - skb->len;
+
+ while (bytelimit > 0) {
+ struct sk_buff *nskb = q->dequeue(q);
+
+ if (!nskb)
+ break;
+
+ bytelimit -= nskb->len; /* covers GSO len */
+ skb->next = nskb;
+ skb = nskb;
+ (*packets)++; /* GSO counts as one pkt */
+ }
+ skb->next = NULL;
+}
+
+/* Note that dequeue_skb can possibly return a SKB list (via skb->next).
+ * A requeued skb (via q->gso_skb) can also be a SKB list.
+ */
+static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
+ int *packets)
{
struct sk_buff *skb = q->gso_skb;
const struct netdev_queue *txq = q->dev_queue;
+ *packets = 1;
+ *validate = true;
if (unlikely(skb)) {
/* check the reason of requeuing without tx lock first */
- txq = netdev_get_tx_queue(txq->dev, skb_get_queue_mapping(skb));
+ txq = skb_get_tx_queue(txq->dev, skb);
if (!netif_xmit_frozen_or_stopped(txq)) {
q->gso_skb = NULL;
q->q.qlen--;
} else
skb = NULL;
+ /* skb in gso_skb were already validated */
+ *validate = false;
} else {
- if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq))
+ if (!(q->flags & TCQ_F_ONETXQUEUE) ||
+ !netif_xmit_frozen_or_stopped(txq)) {
skb = q->dequeue(q);
+ if (skb && qdisc_may_bulk(q))
+ try_bulk_dequeue_skb(q, skb, txq, packets);
+ }
}
-
return skb;
}
@@ -90,7 +121,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
* detect it by checking xmit owner and drop the packet when
* deadloop is detected. Return OK to try the next skb.
*/
- kfree_skb(skb);
+ kfree_skb_list(skb);
net_warn_ratelimited("Dead loop on netdevice %s, fix it urgently!\n",
dev_queue->dev->name);
ret = qdisc_qlen(q);
@@ -107,9 +138,9 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
}
/*
- * Transmit one skb, and handle the return status as required. Holding the
- * __QDISC_STATE_RUNNING bit guarantees that only one CPU can execute this
- * function.
+ * Transmit possibly several skbs, and handle the return status as
+ * required. Holding the __QDISC___STATE_RUNNING bit guarantees that
+ * only one CPU can execute this function.
*
* Returns to the caller:
* 0 - queue is empty or throttled.
@@ -117,19 +148,24 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
*/
int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
struct net_device *dev, struct netdev_queue *txq,
- spinlock_t *root_lock)
+ spinlock_t *root_lock, bool validate)
{
int ret = NETDEV_TX_BUSY;
/* And release qdisc */
spin_unlock(root_lock);
- HARD_TX_LOCK(dev, txq, smp_processor_id());
- if (!netif_xmit_frozen_or_stopped(txq))
- ret = dev_hard_start_xmit(skb, dev, txq);
+ /* Note that we validate skb (GSO, checksum, ...) outside of locks */
+ if (validate)
+ skb = validate_xmit_skb_list(skb, dev);
- HARD_TX_UNLOCK(dev, txq);
+ if (skb) {
+ HARD_TX_LOCK(dev, txq, smp_processor_id());
+ if (!netif_xmit_frozen_or_stopped(txq))
+ skb = dev_hard_start_xmit(skb, dev, txq, &ret);
+ HARD_TX_UNLOCK(dev, txq);
+ }
spin_lock(root_lock);
if (dev_xmit_complete(ret)) {
@@ -156,7 +192,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
/*
* NOTE: Called under qdisc_lock(q) with locally disabled BH.
*
- * __QDISC_STATE_RUNNING guarantees only one CPU can process
+ * __QDISC___STATE_RUNNING guarantees only one CPU can process
* this qdisc at a time. qdisc_lock(q) serializes queue accesses for
* this queue.
*
@@ -172,36 +208,39 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
* >0 - queue is not empty.
*
*/
-static inline int qdisc_restart(struct Qdisc *q)
+static inline int qdisc_restart(struct Qdisc *q, int *packets)
{
struct netdev_queue *txq;
struct net_device *dev;
spinlock_t *root_lock;
struct sk_buff *skb;
+ bool validate;
/* Dequeue packet */
- skb = dequeue_skb(q);
+ skb = dequeue_skb(q, &validate, packets);
if (unlikely(!skb))
return 0;
- WARN_ON_ONCE(skb_dst_is_noref(skb));
+
root_lock = qdisc_lock(q);
dev = qdisc_dev(q);
- txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+ txq = skb_get_tx_queue(dev, skb);
- return sch_direct_xmit(skb, q, dev, txq, root_lock);
+ return sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
}
void __qdisc_run(struct Qdisc *q)
{
int quota = weight_p;
+ int packets;
- while (qdisc_restart(q)) {
+ while (qdisc_restart(q, &packets)) {
/*
* Ordered by possible occurrence: Postpone processing if
* 1. we've exceeded packet quota
* 2. another process needs the CPU;
*/
- if (--quota <= 0 || need_resched()) {
+ quota -= packets;
+ if (quota <= 0 || need_resched()) {
__netif_schedule(q);
break;
}
@@ -518,7 +557,7 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
- skb_queue_head_init(band2list(priv, prio));
+ __skb_queue_head_init(band2list(priv, prio));
/* Can by-pass the queue discipline */
qdisc->flags |= TCQ_F_CAN_BYPASS;
@@ -616,7 +655,7 @@ void qdisc_reset(struct Qdisc *qdisc)
ops->reset(qdisc);
if (qdisc->gso_skb) {
- kfree_skb(qdisc->gso_skb);
+ kfree_skb_list(qdisc->gso_skb);
qdisc->gso_skb = NULL;
qdisc->q.qlen = 0;
}
@@ -627,6 +666,9 @@ static void qdisc_rcu_free(struct rcu_head *head)
{
struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head);
+ if (qdisc_is_percpu_stats(qdisc))
+ free_percpu(qdisc->cpu_bstats);
+
kfree((char *) qdisc - qdisc->padded);
}
@@ -652,7 +694,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
module_put(ops->owner);
dev_put(qdisc_dev(qdisc));
- kfree_skb(qdisc->gso_skb);
+ kfree_skb_list(qdisc->gso_skb);
/*
* gen_estimator est_timer() might access qdisc->q.lock,
* wait a RCU grace period before freeing qdisc.
@@ -778,7 +820,7 @@ static void dev_deactivate_queue(struct net_device *dev,
struct Qdisc *qdisc_default = _qdisc_default;
struct Qdisc *qdisc;
- qdisc = dev_queue->qdisc;
+ qdisc = rtnl_dereference(dev_queue->qdisc);
if (qdisc) {
spin_lock_bh(qdisc_lock(qdisc));
@@ -871,7 +913,7 @@ static void dev_init_scheduler_queue(struct net_device *dev,
{
struct Qdisc *qdisc = _qdisc;
- dev_queue->qdisc = qdisc;
+ rcu_assign_pointer(dev_queue->qdisc, qdisc);
dev_queue->qdisc_sleeping = qdisc;
}
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 12cbc09157fc..a4ca4517cdc8 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -209,7 +209,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
break;
case RED_PROB_MARK:
- sch->qstats.overlimits++;
+ qdisc_qstats_overlimit(sch);
if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) {
q->stats.prob_drop++;
goto congestion_drop;
@@ -219,7 +219,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
break;
case RED_HARD_MARK:
- sch->qstats.overlimits++;
+ qdisc_qstats_overlimit(sch);
if (gred_use_harddrop(t) || !gred_use_ecn(t) ||
!INET_ECN_set_ce(skb)) {
q->stats.forced_drop++;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index ec8aeaac1dd7..e6c7416d0332 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -116,7 +116,7 @@ struct hfsc_class {
struct gnet_stats_queue qstats;
struct gnet_stats_rate_est64 rate_est;
unsigned int level; /* class level in hierarchy */
- struct tcf_proto *filter_list; /* filter list */
+ struct tcf_proto __rcu *filter_list; /* filter list */
unsigned int filter_cnt; /* filter count */
struct hfsc_sched *sched; /* scheduler data */
@@ -1014,9 +1014,12 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
cur_time = psched_get_time();
if (tca[TCA_RATE]) {
- err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
- qdisc_root_sleeping_lock(sch),
- tca[TCA_RATE]);
+ spinlock_t *lock = qdisc_root_sleeping_lock(sch);
+
+ err = gen_replace_estimator(&cl->bstats, NULL,
+ &cl->rate_est,
+ lock,
+ tca[TCA_RATE]);
if (err)
return err;
}
@@ -1063,7 +1066,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
return -ENOBUFS;
if (tca[TCA_RATE]) {
- err = gen_new_estimator(&cl->bstats, &cl->rate_est,
+ err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
qdisc_root_sleeping_lock(sch),
tca[TCA_RATE]);
if (err) {
@@ -1161,7 +1164,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
head = &q->root;
- tcf = q->root.filter_list;
+ tcf = rcu_dereference_bh(q->root.filter_list);
while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -1185,7 +1188,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
return cl; /* hit leaf class */
/* apply inner filter chain */
- tcf = cl->filter_list;
+ tcf = rcu_dereference_bh(cl->filter_list);
head = cl;
}
@@ -1285,7 +1288,7 @@ hfsc_unbind_tcf(struct Qdisc *sch, unsigned long arg)
cl->filter_cnt--;
}
-static struct tcf_proto **
+static struct tcf_proto __rcu **
hfsc_tcf_chain(struct Qdisc *sch, unsigned long arg)
{
struct hfsc_sched *q = qdisc_priv(sch);
@@ -1367,16 +1370,15 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
struct hfsc_class *cl = (struct hfsc_class *)arg;
struct tc_hfsc_stats xstats;
- cl->qstats.qlen = cl->qdisc->q.qlen;
cl->qstats.backlog = cl->qdisc->qstats.backlog;
xstats.level = cl->level;
xstats.period = cl->cl_vtperiod;
xstats.work = cl->cl_total;
xstats.rtwork = cl->cl_cumul;
- if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
+ if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
- gnet_stats_copy_queue(d, &cl->qstats) < 0)
+ gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->qdisc->q.qlen) < 0)
return -1;
return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
@@ -1588,7 +1590,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
cl = hfsc_classify(skb, sch, &err);
if (cl == NULL) {
if (err & __NET_XMIT_BYPASS)
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
kfree_skb(skb);
return err;
}
@@ -1597,7 +1599,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (unlikely(err != NET_XMIT_SUCCESS)) {
if (net_xmit_drop_count(err)) {
cl->qstats.drops++;
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
}
return err;
}
@@ -1640,7 +1642,7 @@ hfsc_dequeue(struct Qdisc *sch)
*/
cl = vttree_get_minvt(&q->root, cur_time);
if (cl == NULL) {
- sch->qstats.overlimits++;
+ qdisc_qstats_overlimit(sch);
hfsc_schedule_watchdog(sch);
return NULL;
}
@@ -1695,7 +1697,7 @@ hfsc_drop(struct Qdisc *sch)
list_move_tail(&cl->dlist, &q->droplist);
}
cl->qstats.drops++;
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
sch->q.qlen--;
return len;
}
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index d85b6812a7d4..15d3aabfe250 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -376,8 +376,8 @@ static unsigned int hhf_drop(struct Qdisc *sch)
struct sk_buff *skb = dequeue_head(bucket);
sch->q.qlen--;
- sch->qstats.drops++;
- sch->qstats.backlog -= qdisc_pkt_len(skb);
+ qdisc_qstats_drop(sch);
+ qdisc_qstats_backlog_dec(sch, skb);
kfree_skb(skb);
}
@@ -395,7 +395,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
bucket = &q->buckets[idx];
bucket_add(bucket, skb);
- sch->qstats.backlog += qdisc_pkt_len(skb);
+ qdisc_qstats_backlog_inc(sch, skb);
if (list_empty(&bucket->bucketchain)) {
unsigned int weight;
@@ -457,7 +457,7 @@ begin:
if (bucket->head) {
skb = dequeue_head(bucket);
sch->q.qlen--;
- sch->qstats.backlog -= qdisc_pkt_len(skb);
+ qdisc_qstats_backlog_dec(sch, skb);
}
if (!skb) {
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 9f949abcacef..f1acb0f60dc3 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -103,7 +103,7 @@ struct htb_class {
u32 prio; /* these two are used only by leaves... */
int quantum; /* but stored for parent-to-leaf return */
- struct tcf_proto *filter_list; /* class attached filters */
+ struct tcf_proto __rcu *filter_list; /* class attached filters */
int filter_cnt;
int refcnt; /* usage count of this class */
@@ -153,7 +153,7 @@ struct htb_sched {
int rate2quantum; /* quant = rate / rate2quantum */
/* filters for qdisc itself */
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
#define HTB_WARN_TOOMANYEVENTS 0x1
unsigned int warned; /* only one warning */
@@ -223,9 +223,9 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
if (cl->level == 0)
return cl;
/* Start with inner filter chain if a non-leaf class is selected */
- tcf = cl->filter_list;
+ tcf = rcu_dereference_bh(cl->filter_list);
} else {
- tcf = q->filter_list;
+ tcf = rcu_dereference_bh(q->filter_list);
}
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
@@ -251,7 +251,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
return cl; /* we hit leaf; return it */
/* we have got inner class; apply inner filter chain */
- tcf = cl->filter_list;
+ tcf = rcu_dereference_bh(cl->filter_list);
}
/* classification failed; try to use default class */
cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
@@ -586,13 +586,13 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
#ifdef CONFIG_NET_CLS_ACT
} else if (!cl) {
if (ret & __NET_XMIT_BYPASS)
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
kfree_skb(skb);
return ret;
#endif
} else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(ret)) {
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
cl->qstats.drops++;
}
return ret;
@@ -895,7 +895,7 @@ ok:
if (!sch->q.qlen)
goto fin;
- q->now = ktime_to_ns(ktime_get());
+ q->now = ktime_get_ns();
start_at = jiffies;
next_event = q->now + 5LLU * NSEC_PER_SEC;
@@ -925,14 +925,14 @@ ok:
goto ok;
}
}
- sch->qstats.overlimits++;
+ qdisc_qstats_overlimit(sch);
if (likely(next_event > q->now)) {
if (!test_bit(__QDISC_STATE_DEACTIVATED,
&qdisc_root_sleeping(q->watchdog.qdisc)->state)) {
ktime_t time = ns_to_ktime(next_event);
qdisc_throttled(q->watchdog.qdisc);
hrtimer_start(&q->watchdog.timer, time,
- HRTIMER_MODE_ABS);
+ HRTIMER_MODE_ABS_PINNED);
}
} else {
schedule_work(&q->work);
@@ -1044,7 +1044,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
qdisc_watchdog_init(&q->watchdog, sch);
INIT_WORK(&q->work, htb_work_func);
- skb_queue_head_init(&q->direct_queue);
+ __skb_queue_head_init(&q->direct_queue);
if (tb[TCA_HTB_DIRECT_QLEN])
q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
@@ -1138,15 +1138,16 @@ static int
htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
{
struct htb_class *cl = (struct htb_class *)arg;
+ __u32 qlen = 0;
if (!cl->level && cl->un.leaf.q)
- cl->qstats.qlen = cl->un.leaf.q->q.qlen;
+ qlen = cl->un.leaf.q->q.qlen;
cl->xstats.tokens = PSCHED_NS2TICKS(cl->tokens);
cl->xstats.ctokens = PSCHED_NS2TICKS(cl->ctokens);
- if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
+ if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
- gnet_stats_copy_queue(d, &cl->qstats) < 0)
+ gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
return -1;
return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
@@ -1225,7 +1226,7 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
parent->tokens = parent->buffer;
parent->ctokens = parent->cbuffer;
- parent->t_c = ktime_to_ns(ktime_get());
+ parent->t_c = ktime_get_ns();
parent->cmode = HTB_CAN_SEND;
}
@@ -1402,7 +1403,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
goto failure;
if (htb_rate_est || tca[TCA_RATE]) {
- err = gen_new_estimator(&cl->bstats, &cl->rate_est,
+ err = gen_new_estimator(&cl->bstats, NULL,
+ &cl->rate_est,
qdisc_root_sleeping_lock(sch),
tca[TCA_RATE] ? : &est.nla);
if (err) {
@@ -1455,7 +1457,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
cl->mbuffer = 60ULL * NSEC_PER_SEC; /* 1min */
- cl->t_c = ktime_to_ns(ktime_get());
+ cl->t_c = ktime_get_ns();
cl->cmode = HTB_CAN_SEND;
/* attach to the hash list and parent's family */
@@ -1464,8 +1466,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
parent->children++;
} else {
if (tca[TCA_RATE]) {
- err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
- qdisc_root_sleeping_lock(sch),
+ spinlock_t *lock = qdisc_root_sleeping_lock(sch);
+
+ err = gen_replace_estimator(&cl->bstats, NULL,
+ &cl->rate_est,
+ lock,
tca[TCA_RATE]);
if (err)
return err;
@@ -1519,11 +1524,12 @@ failure:
return err;
}
-static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
+static struct tcf_proto __rcu **htb_find_tcf(struct Qdisc *sch,
+ unsigned long arg)
{
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl = (struct htb_class *)arg;
- struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
+ struct tcf_proto __rcu **fl = cl ? &cl->filter_list : &q->filter_list;
return fl;
}
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 62871c14e1f9..eb5b8445fef9 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -17,7 +17,7 @@
struct ingress_qdisc_data {
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
};
/* ------------------------- Class/flow operations ------------------------- */
@@ -46,7 +46,8 @@ static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker)
{
}
-static struct tcf_proto **ingress_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **ingress_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
{
struct ingress_qdisc_data *p = qdisc_priv(sch);
@@ -59,15 +60,16 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct ingress_qdisc_data *p = qdisc_priv(sch);
struct tcf_result res;
+ struct tcf_proto *fl = rcu_dereference_bh(p->filter_list);
int result;
- result = tc_classify(skb, p->filter_list, &res);
+ result = tc_classify(skb, fl, &res);
qdisc_bstats_update(sch, skb);
switch (result) {
case TC_ACT_SHOT:
result = TC_ACT_SHOT;
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
break;
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index a8b2864a696b..f3cbaecd283a 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -112,7 +112,6 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
sch->q.qlen += qdisc->q.qlen;
sch->bstats.bytes += qdisc->bstats.bytes;
sch->bstats.packets += qdisc->bstats.packets;
- sch->qstats.qlen += qdisc->qstats.qlen;
sch->qstats.backlog += qdisc->qstats.backlog;
sch->qstats.drops += qdisc->qstats.drops;
sch->qstats.requeues += qdisc->qstats.requeues;
@@ -200,9 +199,8 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
sch = dev_queue->qdisc_sleeping;
- sch->qstats.qlen = sch->q.qlen;
- if (gnet_stats_copy_basic(d, &sch->bstats) < 0 ||
- gnet_stats_copy_queue(d, &sch->qstats) < 0)
+ if (gnet_stats_copy_basic(d, NULL, &sch->bstats) < 0 ||
+ gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0)
return -1;
return 0;
}
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 6749e2f540d0..3811a745452c 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -231,12 +231,11 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
memset(&sch->qstats, 0, sizeof(sch->qstats));
for (i = 0; i < dev->num_tx_queues; i++) {
- qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+ qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc);
spin_lock_bh(qdisc_lock(qdisc));
sch->q.qlen += qdisc->q.qlen;
sch->bstats.bytes += qdisc->bstats.bytes;
sch->bstats.packets += qdisc->bstats.packets;
- sch->qstats.qlen += qdisc->qstats.qlen;
sch->qstats.backlog += qdisc->qstats.backlog;
sch->qstats.drops += qdisc->qstats.drops;
sch->qstats.requeues += qdisc->qstats.requeues;
@@ -327,6 +326,7 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
if (cl <= netdev_get_num_tc(dev)) {
int i;
+ __u32 qlen = 0;
struct Qdisc *qdisc;
struct gnet_stats_queue qstats = {0};
struct gnet_stats_basic_packed bstats = {0};
@@ -340,11 +340,13 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
spin_unlock_bh(d->lock);
for (i = tc.offset; i < tc.offset + tc.count; i++) {
- qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+ struct netdev_queue *q = netdev_get_tx_queue(dev, i);
+
+ qdisc = rtnl_dereference(q->qdisc);
spin_lock_bh(qdisc_lock(qdisc));
+ qlen += qdisc->q.qlen;
bstats.bytes += qdisc->bstats.bytes;
bstats.packets += qdisc->bstats.packets;
- qstats.qlen += qdisc->qstats.qlen;
qstats.backlog += qdisc->qstats.backlog;
qstats.drops += qdisc->qstats.drops;
qstats.requeues += qdisc->qstats.requeues;
@@ -353,16 +355,16 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
}
/* Reclaim root sleeping lock before completing stats */
spin_lock_bh(d->lock);
- if (gnet_stats_copy_basic(d, &bstats) < 0 ||
- gnet_stats_copy_queue(d, &qstats) < 0)
+ if (gnet_stats_copy_basic(d, NULL, &bstats) < 0 ||
+ gnet_stats_copy_queue(d, NULL, &qstats, qlen) < 0)
return -1;
} else {
struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
sch = dev_queue->qdisc_sleeping;
- sch->qstats.qlen = sch->q.qlen;
- if (gnet_stats_copy_basic(d, &sch->bstats) < 0 ||
- gnet_stats_copy_queue(d, &sch->qstats) < 0)
+ if (gnet_stats_copy_basic(d, NULL, &sch->bstats) < 0 ||
+ gnet_stats_copy_queue(d, NULL,
+ &sch->qstats, sch->q.qlen) < 0)
return -1;
}
return 0;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index afb050a735fa..42dd218871e0 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -31,7 +31,7 @@ struct multiq_sched_data {
u16 bands;
u16 max_bands;
u16 curband;
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
struct Qdisc **queues;
};
@@ -42,10 +42,11 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
struct multiq_sched_data *q = qdisc_priv(sch);
u32 band;
struct tcf_result res;
+ struct tcf_proto *fl = rcu_dereference_bh(q->filter_list);
int err;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- err = tc_classify(skb, q->filter_list, &res);
+ err = tc_classify(skb, fl, &res);
#ifdef CONFIG_NET_CLS_ACT
switch (err) {
case TC_ACT_STOLEN:
@@ -74,7 +75,7 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (qdisc == NULL) {
if (ret & __NET_XMIT_BYPASS)
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
kfree_skb(skb);
return ret;
}
@@ -86,7 +87,7 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return NET_XMIT_SUCCESS;
}
if (net_xmit_drop_count(ret))
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
return ret;
}
@@ -359,9 +360,8 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
struct Qdisc *cl_q;
cl_q = q->queues[cl - 1];
- cl_q->qstats.qlen = cl_q->q.qlen;
- if (gnet_stats_copy_basic(d, &cl_q->bstats) < 0 ||
- gnet_stats_copy_queue(d, &cl_q->qstats) < 0)
+ if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats) < 0 ||
+ gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0)
return -1;
return 0;
@@ -388,7 +388,8 @@ static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
}
}
-static struct tcf_proto **multiq_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **multiq_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
{
struct multiq_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 111d70fddaea..b34331967e02 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -429,12 +429,12 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
/* Drop packet? */
if (loss_event(q)) {
if (q->ecn && INET_ECN_set_ce(skb))
- sch->qstats.drops++; /* mark packet */
+ qdisc_qstats_drop(sch); /* mark packet */
else
--count;
}
if (count == 0) {
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
kfree_skb(skb);
return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
}
@@ -478,7 +478,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (unlikely(skb_queue_len(&sch->q) >= sch->limit))
return qdisc_reshape_fail(skb, sch);
- sch->qstats.backlog += qdisc_pkt_len(skb);
+ qdisc_qstats_backlog_inc(sch, skb);
cb = netem_skb_cb(skb);
if (q->gap == 0 || /* not doing reordering */
@@ -549,15 +549,14 @@ static unsigned int netem_drop(struct Qdisc *sch)
sch->q.qlen--;
skb->next = NULL;
skb->prev = NULL;
- len = qdisc_pkt_len(skb);
- sch->qstats.backlog -= len;
+ qdisc_qstats_backlog_dec(sch, skb);
kfree_skb(skb);
}
}
if (!len && q->qdisc && q->qdisc->ops->drop)
len = q->qdisc->ops->drop(q->qdisc);
if (len)
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
return len;
}
@@ -575,7 +574,7 @@ tfifo_dequeue:
skb = __skb_dequeue(&sch->q);
if (skb) {
deliver:
- sch->qstats.backlog -= qdisc_pkt_len(skb);
+ qdisc_qstats_backlog_dec(sch, skb);
qdisc_unthrottled(sch);
qdisc_bstats_update(sch, skb);
return skb;
@@ -610,7 +609,7 @@ deliver:
if (unlikely(err != NET_XMIT_SUCCESS)) {
if (net_xmit_drop_count(err)) {
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
qdisc_tree_decrease_qlen(sch, 1);
}
}
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index fefeeb73f15f..33d7a98a7a97 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -232,7 +232,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt)
while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = __skb_dequeue(&sch->q);
- sch->qstats.backlog -= qdisc_pkt_len(skb);
+ qdisc_qstats_backlog_dec(sch, skb);
qdisc_drop(skb, sch);
}
qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 79359b69ad8d..8e5cd34aaa74 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -24,7 +24,7 @@
struct prio_sched_data {
int bands;
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
u8 prio2band[TC_PRIO_MAX+1];
struct Qdisc *queues[TCQ_PRIO_BANDS];
};
@@ -36,11 +36,13 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
struct prio_sched_data *q = qdisc_priv(sch);
u32 band = skb->priority;
struct tcf_result res;
+ struct tcf_proto *fl;
int err;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
if (TC_H_MAJ(skb->priority) != sch->handle) {
- err = tc_classify(skb, q->filter_list, &res);
+ fl = rcu_dereference_bh(q->filter_list);
+ err = tc_classify(skb, fl, &res);
#ifdef CONFIG_NET_CLS_ACT
switch (err) {
case TC_ACT_STOLEN:
@@ -50,7 +52,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
return NULL;
}
#endif
- if (!q->filter_list || err < 0) {
+ if (!fl || err < 0) {
if (TC_H_MAJ(band))
band = 0;
return q->queues[q->prio2band[band & TC_PRIO_MAX]];
@@ -75,7 +77,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (qdisc == NULL) {
if (ret & __NET_XMIT_BYPASS)
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
kfree_skb(skb);
return ret;
}
@@ -87,7 +89,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return NET_XMIT_SUCCESS;
}
if (net_xmit_drop_count(ret))
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
return ret;
}
@@ -322,9 +324,8 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
struct Qdisc *cl_q;
cl_q = q->queues[cl - 1];
- cl_q->qstats.qlen = cl_q->q.qlen;
- if (gnet_stats_copy_basic(d, &cl_q->bstats) < 0 ||
- gnet_stats_copy_queue(d, &cl_q->qstats) < 0)
+ if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats) < 0 ||
+ gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0)
return -1;
return 0;
@@ -351,7 +352,8 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
}
}
-static struct tcf_proto **prio_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **prio_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
{
struct prio_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 8056fb4e618a..3ec7e88a43ca 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -181,7 +181,7 @@ struct qfq_group {
};
struct qfq_sched {
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
struct Qdisc_class_hash clhash;
u64 oldV, V; /* Precise virtual times. */
@@ -459,7 +459,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (cl != NULL) { /* modify existing class */
if (tca[TCA_RATE]) {
- err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
+ err = gen_replace_estimator(&cl->bstats, NULL,
+ &cl->rate_est,
qdisc_root_sleeping_lock(sch),
tca[TCA_RATE]);
if (err)
@@ -484,7 +485,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
cl->qdisc = &noop_qdisc;
if (tca[TCA_RATE]) {
- err = gen_new_estimator(&cl->bstats, &cl->rate_est,
+ err = gen_new_estimator(&cl->bstats, NULL,
+ &cl->rate_est,
qdisc_root_sleeping_lock(sch),
tca[TCA_RATE]);
if (err)
@@ -576,7 +578,8 @@ static void qfq_put_class(struct Qdisc *sch, unsigned long arg)
qfq_destroy_class(sch, cl);
}
-static struct tcf_proto **qfq_tcf_chain(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **qfq_tcf_chain(struct Qdisc *sch,
+ unsigned long cl)
{
struct qfq_sched *q = qdisc_priv(sch);
@@ -661,14 +664,14 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
struct tc_qfq_stats xstats;
memset(&xstats, 0, sizeof(xstats));
- cl->qdisc->qstats.qlen = cl->qdisc->q.qlen;
xstats.weight = cl->agg->class_weight;
xstats.lmax = cl->agg->lmax;
- if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
+ if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
- gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0)
+ gnet_stats_copy_queue(d, NULL,
+ &cl->qdisc->qstats, cl->qdisc->q.qlen) < 0)
return -1;
return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
@@ -704,6 +707,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
struct qfq_sched *q = qdisc_priv(sch);
struct qfq_class *cl;
struct tcf_result res;
+ struct tcf_proto *fl;
int result;
if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
@@ -714,7 +718,8 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
}
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tc_classify(skb, q->filter_list, &res);
+ fl = rcu_dereference_bh(q->filter_list);
+ result = tc_classify(skb, fl, &res);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -1224,7 +1229,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
cl = qfq_classify(skb, sch, &err);
if (cl == NULL) {
if (err & __NET_XMIT_BYPASS)
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
kfree_skb(skb);
return err;
}
@@ -1244,7 +1249,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
pr_debug("qfq_enqueue: enqueue failed %d\n", err);
if (net_xmit_drop_count(err)) {
cl->qstats.drops++;
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
}
return err;
}
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 633e32defdcc..6c0534cc7758 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -74,7 +74,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch)
break;
case RED_PROB_MARK:
- sch->qstats.overlimits++;
+ qdisc_qstats_overlimit(sch);
if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
q->stats.prob_drop++;
goto congestion_drop;
@@ -84,7 +84,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch)
break;
case RED_HARD_MARK:
- sch->qstats.overlimits++;
+ qdisc_qstats_overlimit(sch);
if (red_use_harddrop(q) || !red_use_ecn(q) ||
!INET_ECN_set_ce(skb)) {
q->stats.forced_drop++;
@@ -100,7 +100,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch)
sch->q.qlen++;
} else if (net_xmit_drop_count(ret)) {
q->stats.pdrop++;
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
}
return ret;
@@ -142,7 +142,7 @@ static unsigned int red_drop(struct Qdisc *sch)
if (child->ops->drop && (len = child->ops->drop(child)) > 0) {
q->stats.other++;
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
sch->q.qlen--;
return len;
}
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 9b0f7093d970..5819dd82630d 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -55,7 +55,7 @@ struct sfb_bins {
struct sfb_sched_data {
struct Qdisc *qdisc;
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
unsigned long rehash_interval;
unsigned long warmup_time; /* double buffering warmup time in jiffies */
u32 max;
@@ -253,13 +253,13 @@ static bool sfb_rate_limit(struct sk_buff *skb, struct sfb_sched_data *q)
return false;
}
-static bool sfb_classify(struct sk_buff *skb, struct sfb_sched_data *q,
+static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl,
int *qerr, u32 *salt)
{
struct tcf_result res;
int result;
- result = tc_classify(skb, q->filter_list, &res);
+ result = tc_classify(skb, fl, &res);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -281,6 +281,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
struct sfb_sched_data *q = qdisc_priv(sch);
struct Qdisc *child = q->qdisc;
+ struct tcf_proto *fl;
int i;
u32 p_min = ~0;
u32 minqlen = ~0;
@@ -289,7 +290,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
struct flow_keys keys;
if (unlikely(sch->q.qlen >= q->limit)) {
- sch->qstats.overlimits++;
+ qdisc_qstats_overlimit(sch);
q->stats.queuedrop++;
goto drop;
}
@@ -306,9 +307,10 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
}
- if (q->filter_list) {
+ fl = rcu_dereference_bh(q->filter_list);
+ if (fl) {
/* If using external classifiers, get result and record it. */
- if (!sfb_classify(skb, q, &ret, &salt))
+ if (!sfb_classify(skb, fl, &ret, &salt))
goto other_drop;
keys.src = salt;
keys.dst = 0;
@@ -346,7 +348,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
sfb_skb_cb(skb)->hashes[slot] = 0;
if (unlikely(minqlen >= q->max)) {
- sch->qstats.overlimits++;
+ qdisc_qstats_overlimit(sch);
q->stats.bucketdrop++;
goto drop;
}
@@ -374,7 +376,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
}
if (sfb_rate_limit(skb, q)) {
- sch->qstats.overlimits++;
+ qdisc_qstats_overlimit(sch);
q->stats.penaltydrop++;
goto drop;
}
@@ -409,7 +411,7 @@ enqueue:
increment_qlen(skb, q);
} else if (net_xmit_drop_count(ret)) {
q->stats.childdrop++;
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
}
return ret;
@@ -418,7 +420,7 @@ drop:
return NET_XMIT_CN;
other_drop:
if (ret & __NET_XMIT_BYPASS)
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
kfree_skb(skb);
return ret;
}
@@ -660,7 +662,8 @@ static void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker)
}
}
-static struct tcf_proto **sfb_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **sfb_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
{
struct sfb_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 1af2f73906d0..b877140beda5 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -125,7 +125,7 @@ struct sfq_sched_data {
u8 cur_depth; /* depth of longest slot */
u8 flags;
unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
- struct tcf_proto *filter_list;
+ struct tcf_proto __rcu *filter_list;
sfq_index *ht; /* Hash table ('divisor' slots) */
struct sfq_slot *slots; /* Flows table ('maxflows' entries) */
@@ -187,6 +187,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
{
struct sfq_sched_data *q = qdisc_priv(sch);
struct tcf_result res;
+ struct tcf_proto *fl;
int result;
if (TC_H_MAJ(skb->priority) == sch->handle &&
@@ -194,13 +195,14 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
TC_H_MIN(skb->priority) <= q->divisor)
return TC_H_MIN(skb->priority);
- if (!q->filter_list) {
+ fl = rcu_dereference_bh(q->filter_list);
+ if (!fl) {
skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys);
return sfq_hash(q, skb) + 1;
}
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tc_classify(skb, q->filter_list, &res);
+ result = tc_classify(skb, fl, &res);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -310,11 +312,6 @@ static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
slot->skblist_prev = skb;
}
-#define slot_queue_walk(slot, skb) \
- for (skb = slot->skblist_next; \
- skb != (struct sk_buff *)slot; \
- skb = skb->next)
-
static unsigned int sfq_drop(struct Qdisc *sch)
{
struct sfq_sched_data *q = qdisc_priv(sch);
@@ -334,8 +331,8 @@ drop:
sfq_dec(q, x);
kfree_skb(skb);
sch->q.qlen--;
- sch->qstats.drops++;
- sch->qstats.backlog -= len;
+ qdisc_qstats_drop(sch);
+ qdisc_qstats_backlog_dec(sch, skb);
return len;
}
@@ -382,7 +379,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
hash = sfq_classify(skb, sch, &ret);
if (hash == 0) {
if (ret & __NET_XMIT_BYPASS)
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
kfree_skb(skb);
return ret;
}
@@ -412,7 +409,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
break;
case RED_PROB_MARK:
- sch->qstats.overlimits++;
+ qdisc_qstats_overlimit(sch);
if (sfq_prob_mark(q)) {
/* We know we have at least one packet in queue */
if (sfq_headdrop(q) &&
@@ -429,7 +426,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
goto congestion_drop;
case RED_HARD_MARK:
- sch->qstats.overlimits++;
+ qdisc_qstats_overlimit(sch);
if (sfq_hard_mark(q)) {
/* We know we have at least one packet in queue */
if (sfq_headdrop(q) &&
@@ -464,7 +461,7 @@ congestion_drop:
}
enqueue:
- sch->qstats.backlog += qdisc_pkt_len(skb);
+ qdisc_qstats_backlog_inc(sch, skb);
slot->backlog += qdisc_pkt_len(skb);
slot_queue_add(slot, skb);
sfq_inc(q, x);
@@ -523,7 +520,7 @@ next_slot:
sfq_dec(q, a);
qdisc_bstats_update(sch, skb);
sch->q.qlen--;
- sch->qstats.backlog -= qdisc_pkt_len(skb);
+ qdisc_qstats_backlog_dec(sch, skb);
slot->backlog -= qdisc_pkt_len(skb);
/* Is the slot empty? */
if (slot->qlen == 0) {
@@ -589,7 +586,8 @@ static void sfq_rehash(struct Qdisc *sch)
if (x == SFQ_EMPTY_SLOT) {
x = q->dep[0].next; /* get a free slot */
if (x >= SFQ_MAX_FLOWS) {
-drop: sch->qstats.backlog -= qdisc_pkt_len(skb);
+drop:
+ qdisc_qstats_backlog_dec(sch, skb);
kfree_skb(skb);
dropped++;
continue;
@@ -841,7 +839,8 @@ static void sfq_put(struct Qdisc *q, unsigned long cl)
{
}
-static struct tcf_proto **sfq_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto __rcu **sfq_find_tcf(struct Qdisc *sch,
+ unsigned long cl)
{
struct sfq_sched_data *q = qdisc_priv(sch);
@@ -872,7 +871,7 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
qs.qlen = slot->qlen;
qs.backlog = slot->backlog;
}
- if (gnet_stats_copy_queue(d, &qs) < 0)
+ if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
return -1;
return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
}
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 18ff63433709..a4afde14e865 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -175,7 +175,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
ret = qdisc_enqueue(segs, q->qdisc);
if (ret != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(ret))
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
} else {
nb++;
}
@@ -201,7 +201,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
ret = qdisc_enqueue(skb, q->qdisc);
if (ret != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(ret))
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
return ret;
}
@@ -216,7 +216,7 @@ static unsigned int tbf_drop(struct Qdisc *sch)
if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
sch->q.qlen--;
- sch->qstats.drops++;
+ qdisc_qstats_drop(sch);
}
return len;
}
@@ -239,7 +239,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
s64 ptoks = 0;
unsigned int len = qdisc_pkt_len(skb);
- now = ktime_to_ns(ktime_get());
+ now = ktime_get_ns();
toks = min_t(s64, now - q->t_c, q->buffer);
if (tbf_peak_present(q)) {
@@ -268,7 +268,8 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
}
qdisc_watchdog_schedule_ns(&q->watchdog,
- now + max_t(long, -toks, -ptoks));
+ now + max_t(long, -toks, -ptoks),
+ true);
/* Maybe we have a shorter packet in the queue,
which can be sent now. It sounds cool,
@@ -281,7 +282,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
(cf. CSZ, HPFQ, HFSC)
*/
- sch->qstats.overlimits++;
+ qdisc_qstats_overlimit(sch);
}
return NULL;
}
@@ -292,7 +293,7 @@ static void tbf_reset(struct Qdisc *sch)
qdisc_reset(q->qdisc);
sch->q.qlen = 0;
- q->t_c = ktime_to_ns(ktime_get());
+ q->t_c = ktime_get_ns();
q->tokens = q->buffer;
q->ptokens = q->mtu;
qdisc_watchdog_cancel(&q->watchdog);
@@ -431,7 +432,7 @@ static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
if (opt == NULL)
return -EINVAL;
- q->t_c = ktime_to_ns(ktime_get());
+ q->t_c = ktime_get_ns();
qdisc_watchdog_init(&q->watchdog, sch);
q->qdisc = &noop_qdisc;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 474167162947..6ada42396a24 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -96,11 +96,14 @@ teql_dequeue(struct Qdisc *sch)
struct teql_sched_data *dat = qdisc_priv(sch);
struct netdev_queue *dat_queue;
struct sk_buff *skb;
+ struct Qdisc *q;
skb = __skb_dequeue(&dat->q);
dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
+ q = rcu_dereference_bh(dat_queue->qdisc);
+
if (skb == NULL) {
- struct net_device *m = qdisc_dev(dat_queue->qdisc);
+ struct net_device *m = qdisc_dev(q);
if (m) {
dat->m->slaves = sch;
netif_wake_queue(m);
@@ -108,7 +111,7 @@ teql_dequeue(struct Qdisc *sch)
} else {
qdisc_bstats_update(sch, skb);
}
- sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
+ sch->q.qlen = dat->q.qlen + q->q.qlen;
return skb;
}
@@ -157,9 +160,9 @@ teql_destroy(struct Qdisc *sch)
txq = netdev_get_tx_queue(master->dev, 0);
master->slaves = NULL;
- root_lock = qdisc_root_sleeping_lock(txq->qdisc);
+ root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
spin_lock_bh(root_lock);
- qdisc_reset(txq->qdisc);
+ qdisc_reset(rtnl_dereference(txq->qdisc));
spin_unlock_bh(root_lock);
}
}
@@ -266,7 +269,7 @@ static inline int teql_resolve(struct sk_buff *skb,
struct dst_entry *dst = skb_dst(skb);
int res;
- if (txq->qdisc == &noop_qdisc)
+ if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
return -ENODEV;
if (!dev->header_ops || !dst)
@@ -301,7 +304,6 @@ restart:
do {
struct net_device *slave = qdisc_dev(q);
struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
- const struct net_device_ops *slave_ops = slave->netdev_ops;
if (slave_txq->qdisc_sleeping != q)
continue;
@@ -317,8 +319,8 @@ restart:
unsigned int length = qdisc_pkt_len(skb);
if (!netif_xmit_frozen_or_stopped(slave_txq) &&
- slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
- txq_trans_update(slave_txq);
+ netdev_start_xmit(skb, slave, slave_txq, false) ==
+ NETDEV_TX_OK) {
__netif_tx_unlock(slave_txq);
master->slaves = NEXT_SLAVE(q);
netif_wake_queue(dev);
@@ -468,7 +470,7 @@ static __init void teql_master_setup(struct net_device *dev)
dev->tx_queue_len = 100;
dev->flags = IFF_NOARP;
dev->hard_header_len = LL_MAX_HEADER;
- dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+ netif_keep_dst(dev);
}
static LIST_HEAD(master_dev_list);
@@ -485,8 +487,8 @@ static int __init teql_init(void)
struct net_device *dev;
struct teql_master *master;
- dev = alloc_netdev(sizeof(struct teql_master),
- "teql%d", teql_master_setup);
+ dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
+ NET_NAME_UNKNOWN, teql_master_setup);
if (!dev) {
err = -ENOMEM;
break;
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 5c30b7a873df..3b4ffb021cf1 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -8,7 +8,7 @@ obj-$(CONFIG_NET_SCTPPROBE) += sctp_probe.o
sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
protocol.o endpointola.o associola.o \
transport.o chunk.o sm_make_chunk.o ulpevent.o \
- inqueue.o outqueue.o ulpqueue.o command.o \
+ inqueue.o outqueue.o ulpqueue.o \
tsnmap.o bind_addr.o socket.o primitive.o \
output.o input.o debug.o ssnmap.o auth.o
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 06a9ee6b2d3a..a88b8524846e 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -813,6 +813,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
else {
dst_release(transport->dst);
transport->dst = NULL;
+ ulp_notify = false;
}
spc_state = SCTP_ADDR_UNREACHABLE;
@@ -1244,7 +1245,7 @@ static struct sctp_transport *sctp_trans_elect_best(struct sctp_transport *curr,
{
u8 score_curr, score_best;
- if (best == NULL)
+ if (best == NULL || curr == best)
return curr;
score_curr = sctp_trans_score(curr);
@@ -1355,14 +1356,11 @@ static void sctp_select_active_and_retran_path(struct sctp_association *asoc)
trans_sec = trans_pri;
/* If we failed to find a usable transport, just camp on the
- * primary or retran, even if they are inactive, if possible
- * pick a PF iff it's the better choice.
+ * active or pick a PF iff it's the better choice.
*/
if (trans_pri == NULL) {
- trans_pri = sctp_trans_elect_best(asoc->peer.primary_path,
- asoc->peer.retran_path);
- trans_pri = sctp_trans_elect_best(trans_pri, trans_pf);
- trans_sec = asoc->peer.primary_path;
+ trans_pri = sctp_trans_elect_best(asoc->peer.active_path, trans_pf);
+ trans_sec = trans_pri;
}
/* Set the active and retran transports. */
diff --git a/net/sctp/command.c b/net/sctp/command.c
deleted file mode 100644
index dd7375851618..000000000000
--- a/net/sctp/command.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/* SCTP kernel implementation Copyright (C) 1999-2001
- * Cisco, Motorola, and IBM
- * Copyright 2001 La Monte H.P. Yarroll
- *
- * This file is part of the SCTP kernel implementation
- *
- * These functions manipulate sctp command sequences.
- *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- * ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING. If not, see
- * <http://www.gnu.org/licenses/>.
- *
- * Please send any bug reports or fixes you make to the
- * email address(es):
- * lksctp developers <linux-sctp@vger.kernel.org>
- *
- * Written or modified by:
- * La Monte H.P. Yarroll <piggy@acm.org>
- * Karl Knutson <karl@athena.chicago.il.us>
- */
-
-#include <linux/types.h>
-#include <net/sctp/sctp.h>
-#include <net/sctp/sm.h>
-
-/* Initialize a block of memory as a command sequence. */
-int sctp_init_cmd_seq(sctp_cmd_seq_t *seq)
-{
- memset(seq, 0, sizeof(sctp_cmd_seq_t));
- return 1; /* We always succeed. */
-}
-
-/* Add a command to a sctp_cmd_seq_t.
- * Return 0 if the command sequence is full.
- */
-void sctp_add_cmd_sf(sctp_cmd_seq_t *seq, sctp_verb_t verb, sctp_arg_t obj)
-{
- BUG_ON(seq->next_free_slot >= SCTP_MAX_NUM_COMMANDS);
-
- seq->cmds[seq->next_free_slot].verb = verb;
- seq->cmds[seq->next_free_slot++].obj = obj;
-}
-
-/* Return the next command structure in a sctp_cmd_seq.
- * Returns NULL at the end of the sequence.
- */
-sctp_cmd_t *sctp_next_cmd(sctp_cmd_seq_t *seq)
-{
- sctp_cmd_t *retval = NULL;
-
- if (seq->next_cmd < seq->next_free_slot)
- retval = &seq->cmds[seq->next_cmd++];
-
- return retval;
-}
-
diff --git a/net/sctp/input.c b/net/sctp/input.c
index f2e2cbd2d750..b6493b3f11a9 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -133,9 +133,13 @@ int sctp_rcv(struct sk_buff *skb)
__skb_pull(skb, skb_transport_offset(skb));
if (skb->len < sizeof(struct sctphdr))
goto discard_it;
- if (!sctp_checksum_disable && !skb_csum_unnecessary(skb) &&
- sctp_rcv_checksum(net, skb) < 0)
+
+ skb->csum_valid = 0; /* Previous value not applicable */
+ if (skb_csum_unnecessary(skb))
+ __skb_decr_checksum_unnecessary(skb);
+ else if (!sctp_checksum_disable && sctp_rcv_checksum(net, skb) < 0)
goto discard_it;
+ skb->csum_valid = 1;
skb_pull(skb, sizeof(struct sctphdr));
@@ -575,11 +579,6 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
int err;
struct net *net = dev_net(skb->dev);
- if (skb->len < ihlen + 8) {
- ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
- return;
- }
-
/* Fix up skb to look at the embedded net header. */
saveip = skb->network_header;
savesctp = skb->transport_header;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 1999592ba88c..0e4198ee2370 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -434,7 +434,7 @@ static void sctp_v6_from_sk(union sctp_addr *addr, struct sock *sk)
/* Initialize sk->sk_rcv_saddr from sctp_addr. */
static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk)
{
- if (addr->sa.sa_family == AF_INET && sctp_sk(sk)->v4mapped) {
+ if (addr->sa.sa_family == AF_INET) {
sk->sk_v6_rcv_saddr.s6_addr32[0] = 0;
sk->sk_v6_rcv_saddr.s6_addr32[1] = 0;
sk->sk_v6_rcv_saddr.s6_addr32[2] = htonl(0x0000ffff);
@@ -448,7 +448,7 @@ static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk)
/* Initialize sk->sk_daddr from sctp_addr. */
static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk)
{
- if (addr->sa.sa_family == AF_INET && sctp_sk(sk)->v4mapped) {
+ if (addr->sa.sa_family == AF_INET) {
sk->sk_v6_daddr.s6_addr32[0] = 0;
sk->sk_v6_daddr.s6_addr32[1] = 0;
sk->sk_v6_daddr.s6_addr32[2] = htonl(0x0000ffff);
@@ -556,8 +556,6 @@ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp)
if (IPV6_ADDR_ANY == type)
return 1;
if (type == IPV6_ADDR_MAPPED) {
- if (sp && !sp->v4mapped)
- return 0;
if (sp && ipv6_only_sock(sctp_opt2sk(sp)))
return 0;
sctp_v6_map_v4(addr);
@@ -587,8 +585,6 @@ static int sctp_v6_addr_valid(union sctp_addr *addr,
/* Note: This routine is used in input, so v4-mapped-v6
* are disallowed here when there is no sctp_sock.
*/
- if (!sp || !sp->v4mapped)
- return 0;
if (sp && ipv6_only_sock(sctp_opt2sk(sp)))
return 0;
sctp_v6_map_v4(addr);
@@ -675,11 +671,23 @@ out:
return newsk;
}
-/* Map v4 address to mapped v6 address */
-static void sctp_v6_addr_v4map(struct sctp_sock *sp, union sctp_addr *addr)
+/* Format a sockaddr for return to user space. This makes sure the return is
+ * AF_INET or AF_INET6 depending on the SCTP_I_WANT_MAPPED_V4_ADDR option.
+ */
+static int sctp_v6_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr)
{
- if (sp->v4mapped && AF_INET == addr->sa.sa_family)
- sctp_v4_map_v6(addr);
+ if (sp->v4mapped) {
+ if (addr->sa.sa_family == AF_INET)
+ sctp_v4_map_v6(addr);
+ } else {
+ if (addr->sa.sa_family == AF_INET6 &&
+ ipv6_addr_v4mapped(&addr->v6.sin6_addr))
+ sctp_v6_map_v4(addr);
+ }
+
+ if (addr->sa.sa_family == AF_INET)
+ return sizeof(struct sockaddr_in);
+ return sizeof(struct sockaddr_in6);
}
/* Where did this skb come from? */
@@ -706,82 +714,68 @@ static void sctp_v6_ecn_capable(struct sock *sk)
inet6_sk(sk)->tclass |= INET_ECN_ECT_0;
}
-/* Initialize a PF_INET6 socket msg_name. */
-static void sctp_inet6_msgname(char *msgname, int *addr_len)
-{
- struct sockaddr_in6 *sin6;
-
- sin6 = (struct sockaddr_in6 *)msgname;
- sin6->sin6_family = AF_INET6;
- sin6->sin6_flowinfo = 0;
- sin6->sin6_scope_id = 0; /*FIXME */
- *addr_len = sizeof(struct sockaddr_in6);
-}
-
/* Initialize a PF_INET msgname from a ulpevent. */
static void sctp_inet6_event_msgname(struct sctp_ulpevent *event,
char *msgname, int *addrlen)
{
- struct sockaddr_in6 *sin6, *sin6from;
-
- if (msgname) {
- union sctp_addr *addr;
- struct sctp_association *asoc;
-
- asoc = event->asoc;
- sctp_inet6_msgname(msgname, addrlen);
- sin6 = (struct sockaddr_in6 *)msgname;
- sin6->sin6_port = htons(asoc->peer.port);
- addr = &asoc->peer.primary_addr;
+ union sctp_addr *addr;
+ struct sctp_association *asoc;
+ union sctp_addr *paddr;
- /* Note: If we go to a common v6 format, this code
- * will change.
- */
+ if (!msgname)
+ return;
- /* Map ipv4 address into v4-mapped-on-v6 address. */
- if (sctp_sk(asoc->base.sk)->v4mapped &&
- AF_INET == addr->sa.sa_family) {
- sctp_v4_map_v6((union sctp_addr *)sin6);
- sin6->sin6_addr.s6_addr32[3] =
- addr->v4.sin_addr.s_addr;
- return;
- }
+ addr = (union sctp_addr *)msgname;
+ asoc = event->asoc;
+ paddr = &asoc->peer.primary_addr;
- sin6from = &asoc->peer.primary_addr.v6;
- sin6->sin6_addr = sin6from->sin6_addr;
- if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
- sin6->sin6_scope_id = sin6from->sin6_scope_id;
+ if (paddr->sa.sa_family == AF_INET) {
+ addr->v4.sin_family = AF_INET;
+ addr->v4.sin_port = htons(asoc->peer.port);
+ addr->v4.sin_addr = paddr->v4.sin_addr;
+ } else {
+ addr->v6.sin6_family = AF_INET6;
+ addr->v6.sin6_flowinfo = 0;
+ if (ipv6_addr_type(&paddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ addr->v6.sin6_scope_id = paddr->v6.sin6_scope_id;
+ else
+ addr->v6.sin6_scope_id = 0;
+ addr->v6.sin6_port = htons(asoc->peer.port);
+ addr->v6.sin6_addr = paddr->v6.sin6_addr;
}
+
+ *addrlen = sctp_v6_addr_to_user(sctp_sk(asoc->base.sk), addr);
}
/* Initialize a msg_name from an inbound skb. */
static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname,
int *addr_len)
{
+ union sctp_addr *addr;
struct sctphdr *sh;
- struct sockaddr_in6 *sin6;
-
- if (msgname) {
- sctp_inet6_msgname(msgname, addr_len);
- sin6 = (struct sockaddr_in6 *)msgname;
- sh = sctp_hdr(skb);
- sin6->sin6_port = sh->source;
-
- /* Map ipv4 address into v4-mapped-on-v6 address. */
- if (sctp_sk(skb->sk)->v4mapped &&
- ip_hdr(skb)->version == 4) {
- sctp_v4_map_v6((union sctp_addr *)sin6);
- sin6->sin6_addr.s6_addr32[3] = ip_hdr(skb)->saddr;
- return;
- }
- /* Otherwise, just copy the v6 address. */
- sin6->sin6_addr = ipv6_hdr(skb)->saddr;
- if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) {
+ if (!msgname)
+ return;
+
+ addr = (union sctp_addr *)msgname;
+ sh = sctp_hdr(skb);
+
+ if (ip_hdr(skb)->version == 4) {
+ addr->v4.sin_family = AF_INET;
+ addr->v4.sin_port = sh->source;
+ addr->v4.sin_addr.s_addr = ip_hdr(skb)->saddr;
+ } else {
+ addr->v6.sin6_family = AF_INET6;
+ addr->v6.sin6_flowinfo = 0;
+ addr->v6.sin6_port = sh->source;
+ addr->v6.sin6_addr = ipv6_hdr(skb)->saddr;
+ if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) {
struct sctp_ulpevent *ev = sctp_skb2event(skb);
- sin6->sin6_scope_id = ev->iif;
+ addr->v6.sin6_scope_id = ev->iif;
}
}
+
+ *addr_len = sctp_v6_addr_to_user(sctp_sk(skb->sk), addr);
}
/* Do we support this AF? */
@@ -857,9 +851,6 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr)
return 0;
}
rcu_read_unlock();
- } else if (type == IPV6_ADDR_MAPPED) {
- if (!opt->v4mapped)
- return 0;
}
af = opt->pf->af;
@@ -914,6 +905,23 @@ static int sctp_inet6_supported_addrs(const struct sctp_sock *opt,
return 1;
}
+/* Handle SCTP_I_WANT_MAPPED_V4_ADDR for getpeername() and getsockname() */
+static int sctp_getname(struct socket *sock, struct sockaddr *uaddr,
+ int *uaddr_len, int peer)
+{
+ int rc;
+
+ rc = inet6_getname(sock, uaddr, uaddr_len, peer);
+
+ if (rc != 0)
+ return rc;
+
+ *uaddr_len = sctp_v6_addr_to_user(sctp_sk(sock->sk),
+ (union sctp_addr *)uaddr);
+
+ return rc;
+}
+
static const struct proto_ops inet6_seqpacket_ops = {
.family = PF_INET6,
.owner = THIS_MODULE,
@@ -922,7 +930,7 @@ static const struct proto_ops inet6_seqpacket_ops = {
.connect = inet_dgram_connect,
.socketpair = sock_no_socketpair,
.accept = inet_accept,
- .getname = inet6_getname,
+ .getname = sctp_getname,
.poll = sctp_poll,
.ioctl = inet6_ioctl,
.listen = sctp_inet_listen,
@@ -974,8 +982,6 @@ static struct sctp_af sctp_af_inet6 = {
.copy_addrlist = sctp_v6_copy_addrlist,
.from_skb = sctp_v6_from_skb,
.from_sk = sctp_v6_from_sk,
- .to_sk_saddr = sctp_v6_to_sk_saddr,
- .to_sk_daddr = sctp_v6_to_sk_daddr,
.from_addr_param = sctp_v6_from_addr_param,
.to_addr_param = sctp_v6_to_addr_param,
.cmp_addr = sctp_v6_cmp_addr,
@@ -1005,7 +1011,9 @@ static struct sctp_pf sctp_pf_inet6 = {
.send_verify = sctp_inet6_send_verify,
.supported_addrs = sctp_inet6_supported_addrs,
.create_accept_sk = sctp_v6_create_accept_sk,
- .addr_v4map = sctp_v6_addr_v4map,
+ .addr_to_user = sctp_v6_addr_to_user,
+ .to_sk_saddr = sctp_v6_to_sk_saddr,
+ .to_sk_daddr = sctp_v6_to_sk_daddr,
.af = &sctp_af_inet6,
};
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 01ab8e0723f0..42dffd428389 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -178,7 +178,7 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
case SCTP_XMIT_RWND_FULL:
case SCTP_XMIT_OK:
- case SCTP_XMIT_NAGLE_DELAY:
+ case SCTP_XMIT_DELAY:
break;
}
@@ -599,7 +599,7 @@ out:
return err;
no_route:
kfree_skb(nskb);
- IP_INC_STATS_BH(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
+ IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
/* FIXME: Returning the 'err' will effect all the associations
* associated with a socket, although only one of the paths of the
@@ -633,7 +633,6 @@ nomem:
static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
struct sctp_chunk *chunk)
{
- sctp_xmit_t retval = SCTP_XMIT_OK;
size_t datasize, rwnd, inflight, flight_size;
struct sctp_transport *transport = packet->transport;
struct sctp_association *asoc = transport->asoc;
@@ -658,15 +657,11 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
datasize = sctp_data_size(chunk);
- if (datasize > rwnd) {
- if (inflight > 0) {
- /* We have (at least) one data chunk in flight,
- * so we can't fall back to rule 6.1 B).
- */
- retval = SCTP_XMIT_RWND_FULL;
- goto finish;
- }
- }
+ if (datasize > rwnd && inflight > 0)
+ /* We have (at least) one data chunk in flight,
+ * so we can't fall back to rule 6.1 B).
+ */
+ return SCTP_XMIT_RWND_FULL;
/* RFC 2960 6.1 Transmission of DATA Chunks
*
@@ -680,36 +675,44 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
* When a Fast Retransmit is being performed the sender SHOULD
* ignore the value of cwnd and SHOULD NOT delay retransmission.
*/
- if (chunk->fast_retransmit != SCTP_NEED_FRTX)
- if (flight_size >= transport->cwnd) {
- retval = SCTP_XMIT_RWND_FULL;
- goto finish;
- }
+ if (chunk->fast_retransmit != SCTP_NEED_FRTX &&
+ flight_size >= transport->cwnd)
+ return SCTP_XMIT_RWND_FULL;
/* Nagle's algorithm to solve small-packet problem:
* Inhibit the sending of new chunks when new outgoing data arrives
* if any previously transmitted data on the connection remains
* unacknowledged.
*/
- if (!sctp_sk(asoc->base.sk)->nodelay && sctp_packet_empty(packet) &&
- inflight && sctp_state(asoc, ESTABLISHED)) {
- unsigned int max = transport->pathmtu - packet->overhead;
- unsigned int len = chunk->skb->len + q->out_qlen;
-
- /* Check whether this chunk and all the rest of pending
- * data will fit or delay in hopes of bundling a full
- * sized packet.
- * Don't delay large message writes that may have been
- * fragmeneted into small peices.
- */
- if ((len < max) && chunk->msg->can_delay) {
- retval = SCTP_XMIT_NAGLE_DELAY;
- goto finish;
- }
- }
-finish:
- return retval;
+ if (sctp_sk(asoc->base.sk)->nodelay)
+ /* Nagle disabled */
+ return SCTP_XMIT_OK;
+
+ if (!sctp_packet_empty(packet))
+ /* Append to packet */
+ return SCTP_XMIT_OK;
+
+ if (inflight == 0)
+ /* Nothing unacked */
+ return SCTP_XMIT_OK;
+
+ if (!sctp_state(asoc, ESTABLISHED))
+ return SCTP_XMIT_OK;
+
+ /* Check whether this chunk and all the rest of pending data will fit
+ * or delay in hopes of bundling a full sized packet.
+ */
+ if (chunk->skb->len + q->out_qlen >= transport->pathmtu - packet->overhead)
+ /* Enough data queued to fill a packet */
+ return SCTP_XMIT_OK;
+
+ /* Don't delay large message writes that may have been fragmented */
+ if (!chunk->msg->can_delay)
+ return SCTP_XMIT_OK;
+
+ /* Defer until all data acked or packet full */
+ return SCTP_XMIT_DELAY;
}
/* This private function does management things when adding DATA chunk */
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 9c77947c0597..7e8f0a117106 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -629,7 +629,7 @@ redo:
done = 1;
break;
- case SCTP_XMIT_NAGLE_DELAY:
+ case SCTP_XMIT_DELAY:
/* Send this packet. */
error = sctp_packet_transmit(pkt);
@@ -1015,7 +1015,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
switch (status) {
case SCTP_XMIT_PMTU_FULL:
case SCTP_XMIT_RWND_FULL:
- case SCTP_XMIT_NAGLE_DELAY:
+ case SCTP_XMIT_DELAY:
/* We could not append this chunk, so put
* the chunk back on the output queue.
*/
@@ -1025,7 +1025,6 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
sctp_outq_head_data(q, chunk);
goto sctp_flush_out;
- break;
case SCTP_XMIT_OK:
/* The sender is in the SHUTDOWN-PENDING state,
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 6789d785e698..8f34b27d5775 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -366,7 +366,7 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) &&
ret != RTN_LOCAL &&
!sp->inet.freebind &&
- !sysctl_ip_nonlocal_bind)
+ !net->ipv4.sysctl_ip_nonlocal_bind)
return 0;
if (ipv6_only_sock(sctp_opt2sk(sp)))
@@ -576,10 +576,10 @@ out:
return newsk;
}
-/* Map address, empty for v4 family */
-static void sctp_v4_addr_v4map(struct sctp_sock *sp, union sctp_addr *addr)
+static int sctp_v4_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr)
{
- /* Empty */
+ /* No address mapping for V4 sockets */
+ return sizeof(struct sockaddr_in);
}
/* Dump the v4 addr to the seq file. */
@@ -976,7 +976,9 @@ static struct sctp_pf sctp_pf_inet = {
.send_verify = sctp_inet_send_verify,
.supported_addrs = sctp_inet_supported_addrs,
.create_accept_sk = sctp_v4_create_accept_sk,
- .addr_v4map = sctp_v4_addr_v4map,
+ .addr_to_user = sctp_v4_addr_to_user,
+ .to_sk_saddr = sctp_v4_to_sk_saddr,
+ .to_sk_daddr = sctp_v4_to_sk_daddr,
.af = &sctp_af_inet
};
@@ -1047,8 +1049,6 @@ static struct sctp_af sctp_af_inet = {
.copy_addrlist = sctp_v4_copy_addrlist,
.from_skb = sctp_v4_from_skb,
.from_sk = sctp_v4_from_sk,
- .to_sk_saddr = sctp_v4_to_sk_saddr,
- .to_sk_daddr = sctp_v4_to_sk_daddr,
.from_addr_param = sctp_v4_from_addr_param,
.to_addr_param = sctp_v4_to_addr_param,
.cmp_addr = sctp_v4_cmp_addr,
@@ -1341,7 +1341,7 @@ static __init int sctp_init(void)
if (!sctp_chunk_cachep)
goto err_chunk_cachep;
- status = percpu_counter_init(&sctp_sockets_allocated, 0);
+ status = percpu_counter_init(&sctp_sockets_allocated, 0, GFP_KERNEL);
if (status)
goto err_percpu_counter_init;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 5170a1ff95a1..c8f606324134 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1775,9 +1775,22 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net,
/* Update the content of current association. */
sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
- sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
- SCTP_STATE(SCTP_STATE_ESTABLISHED));
- sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
+ if (sctp_state(asoc, SHUTDOWN_PENDING) &&
+ (sctp_sstate(asoc->base.sk, CLOSING) ||
+ sock_flag(asoc->base.sk, SOCK_DEAD))) {
+ /* if were currently in SHUTDOWN_PENDING, but the socket
+ * has been closed by user, don't transition to ESTABLISHED.
+ * Instead trigger SHUTDOWN bundled with COOKIE_ACK.
+ */
+ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
+ return sctp_sf_do_9_2_start_shutdown(net, ep, asoc,
+ SCTP_ST_CHUNK(0), NULL,
+ commands);
+ } else {
+ sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+ SCTP_STATE(SCTP_STATE_ESTABLISHED));
+ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
+ }
return SCTP_DISPOSITION_CONSUME;
nomem_ev:
@@ -4182,7 +4195,6 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
case SCTP_CID_ACTION_DISCARD:
/* Discard the packet. */
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
- break;
case SCTP_CID_ACTION_DISCARD_ERR:
/* Generate an ERROR chunk as response. */
hdr = unk_chunk->chunk_hdr;
@@ -4198,11 +4210,9 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
/* Discard the packet. */
sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
return SCTP_DISPOSITION_CONSUME;
- break;
case SCTP_CID_ACTION_SKIP:
/* Skip the chunk. */
return SCTP_DISPOSITION_DISCARD;
- break;
case SCTP_CID_ACTION_SKIP_ERR:
/* Generate an ERROR chunk as response. */
hdr = unk_chunk->chunk_hdr;
@@ -4216,7 +4226,6 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
}
/* Skip the chunk. */
return SCTP_DISPOSITION_CONSUME;
- break;
default:
break;
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 429899689408..634a2abb5f3a 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -254,7 +254,7 @@ static struct sctp_transport *sctp_addr_id2transport(struct sock *sk,
if (id_asoc && (id_asoc != addr_asoc))
return NULL;
- sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk),
+ sctp_get_pf_specific(sk->sk_family)->addr_to_user(sctp_sk(sk),
(union sctp_addr *)addr);
return transport;
@@ -396,7 +396,7 @@ static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
/* Copy back into socket for getsockname() use. */
if (!ret) {
inet_sk(sk)->inet_sport = htons(inet_sk(sk)->inet_num);
- af->to_sk_saddr(addr, sk);
+ sp->pf->to_sk_saddr(addr, sk);
}
return ret;
@@ -1053,7 +1053,6 @@ static int __sctp_connect(struct sock *sk,
struct sctp_association *asoc2;
struct sctp_transport *transport;
union sctp_addr to;
- struct sctp_af *af;
sctp_scope_t scope;
long timeo;
int err = 0;
@@ -1081,6 +1080,8 @@ static int __sctp_connect(struct sock *sk,
/* Walk through the addrs buffer and count the number of addresses. */
addr_buf = kaddrs;
while (walk_size < addrs_size) {
+ struct sctp_af *af;
+
if (walk_size + sizeof(sa_family_t) > addrs_size) {
err = -EINVAL;
goto out_free;
@@ -1205,8 +1206,7 @@ static int __sctp_connect(struct sock *sk,
/* Initialize sk's dport and daddr for getpeername() */
inet_sk(sk)->inet_dport = htons(asoc->peer.port);
- af = sctp_get_af_specific(sa_addr->sa.sa_family);
- af->to_sk_daddr(sa_addr, sk);
+ sp->pf->to_sk_daddr(sa_addr, sk);
sk->sk_err = 0;
/* in-kernel sockets don't generally have a file allocated to them
@@ -1602,12 +1602,13 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
struct sctp_initmsg *sinit;
sctp_assoc_t associd = 0;
sctp_cmsgs_t cmsgs = { NULL };
- int err;
sctp_scope_t scope;
- long timeo;
- __u16 sinfo_flags = 0;
+ bool fill_sinfo_ttl = false;
struct sctp_datamsg *datamsg;
int msg_flags = msg->msg_flags;
+ __u16 sinfo_flags = 0;
+ long timeo;
+ int err;
err = 0;
sp = sctp_sk(sk);
@@ -1648,10 +1649,21 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
msg_name = msg->msg_name;
}
- sinfo = cmsgs.info;
sinit = cmsgs.init;
+ if (cmsgs.sinfo != NULL) {
+ memset(&default_sinfo, 0, sizeof(default_sinfo));
+ default_sinfo.sinfo_stream = cmsgs.sinfo->snd_sid;
+ default_sinfo.sinfo_flags = cmsgs.sinfo->snd_flags;
+ default_sinfo.sinfo_ppid = cmsgs.sinfo->snd_ppid;
+ default_sinfo.sinfo_context = cmsgs.sinfo->snd_context;
+ default_sinfo.sinfo_assoc_id = cmsgs.sinfo->snd_assoc_id;
- /* Did the user specify SNDRCVINFO? */
+ sinfo = &default_sinfo;
+ fill_sinfo_ttl = true;
+ } else {
+ sinfo = cmsgs.srinfo;
+ }
+ /* Did the user specify SNDINFO/SNDRCVINFO? */
if (sinfo) {
sinfo_flags = sinfo->sinfo_flags;
associd = sinfo->sinfo_assoc_id;
@@ -1858,8 +1870,8 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
pr_debug("%s: we have a valid association\n", __func__);
if (!sinfo) {
- /* If the user didn't specify SNDRCVINFO, make up one with
- * some defaults.
+ /* If the user didn't specify SNDINFO/SNDRCVINFO, make up
+ * one with some defaults.
*/
memset(&default_sinfo, 0, sizeof(default_sinfo));
default_sinfo.sinfo_stream = asoc->default_stream;
@@ -1868,7 +1880,13 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
default_sinfo.sinfo_context = asoc->default_context;
default_sinfo.sinfo_timetolive = asoc->default_timetolive;
default_sinfo.sinfo_assoc_id = sctp_assoc2id(asoc);
+
sinfo = &default_sinfo;
+ } else if (fill_sinfo_ttl) {
+ /* In case SNDINFO was specified, we still need to fill
+ * it with a default ttl from the assoc here.
+ */
+ sinfo->sinfo_timetolive = asoc->default_timetolive;
}
/* API 7.1.7, the sndbuf size per association bounds the
@@ -2042,8 +2060,6 @@ static int sctp_skb_pull(struct sk_buff *skb, int len)
* flags - flags sent or received with the user message, see Section
* 5 for complete description of the flags.
*/
-static struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *);
-
static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t len, int noblock,
int flags, int *addr_len)
@@ -2094,9 +2110,16 @@ static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk,
sp->pf->skb_msgname(skb, msg->msg_name, addr_len);
}
+ /* Check if we allow SCTP_NXTINFO. */
+ if (sp->recvnxtinfo)
+ sctp_ulpevent_read_nxtinfo(event, msg, sk);
+ /* Check if we allow SCTP_RCVINFO. */
+ if (sp->recvrcvinfo)
+ sctp_ulpevent_read_rcvinfo(event, msg);
/* Check if we allow SCTP_SNDRCVINFO. */
if (sp->subscribe.sctp_data_io_event)
sctp_ulpevent_read_sndrcvinfo(event, msg);
+
#if 0
/* FIXME: we should be calling IP/IPv6 layers. */
if (sk->sk_protinfo.af_inet.cmsg_flags)
@@ -2182,8 +2205,13 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval,
if (copy_from_user(&sctp_sk(sk)->subscribe, optval, optlen))
return -EFAULT;
- /*
- * At the time when a user app subscribes to SCTP_SENDER_DRY_EVENT,
+ if (sctp_sk(sk)->subscribe.sctp_data_io_event)
+ pr_warn_ratelimited(DEPRECATED "%s (pid %d) "
+ "Requested SCTP_SNDRCVINFO event.\n"
+ "Use SCTP_RCVINFO through SCTP_RECVRCVINFO option instead.\n",
+ current->comm, task_pid_nr(current));
+
+ /* At the time when a user app subscribes to SCTP_SENDER_DRY_EVENT,
* if there is no data to be sent or retransmit, the stack will
* immediately send up this notification.
*/
@@ -2747,19 +2775,22 @@ static int sctp_setsockopt_default_send_param(struct sock *sk,
char __user *optval,
unsigned int optlen)
{
- struct sctp_sndrcvinfo info;
- struct sctp_association *asoc;
struct sctp_sock *sp = sctp_sk(sk);
+ struct sctp_association *asoc;
+ struct sctp_sndrcvinfo info;
- if (optlen != sizeof(struct sctp_sndrcvinfo))
+ if (optlen != sizeof(info))
return -EINVAL;
if (copy_from_user(&info, optval, optlen))
return -EFAULT;
+ if (info.sinfo_flags &
+ ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
+ SCTP_ABORT | SCTP_EOF))
+ return -EINVAL;
asoc = sctp_id2assoc(sk, info.sinfo_assoc_id);
if (!asoc && info.sinfo_assoc_id && sctp_style(sk, UDP))
return -EINVAL;
-
if (asoc) {
asoc->default_stream = info.sinfo_stream;
asoc->default_flags = info.sinfo_flags;
@@ -2777,6 +2808,44 @@ static int sctp_setsockopt_default_send_param(struct sock *sk,
return 0;
}
+/* RFC6458, Section 8.1.31. Set/get Default Send Parameters
+ * (SCTP_DEFAULT_SNDINFO)
+ */
+static int sctp_setsockopt_default_sndinfo(struct sock *sk,
+ char __user *optval,
+ unsigned int optlen)
+{
+ struct sctp_sock *sp = sctp_sk(sk);
+ struct sctp_association *asoc;
+ struct sctp_sndinfo info;
+
+ if (optlen != sizeof(info))
+ return -EINVAL;
+ if (copy_from_user(&info, optval, optlen))
+ return -EFAULT;
+ if (info.snd_flags &
+ ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
+ SCTP_ABORT | SCTP_EOF))
+ return -EINVAL;
+
+ asoc = sctp_id2assoc(sk, info.snd_assoc_id);
+ if (!asoc && info.snd_assoc_id && sctp_style(sk, UDP))
+ return -EINVAL;
+ if (asoc) {
+ asoc->default_stream = info.snd_sid;
+ asoc->default_flags = info.snd_flags;
+ asoc->default_ppid = info.snd_ppid;
+ asoc->default_context = info.snd_context;
+ } else {
+ sp->default_stream = info.snd_sid;
+ sp->default_flags = info.snd_flags;
+ sp->default_ppid = info.snd_ppid;
+ sp->default_context = info.snd_context;
+ }
+
+ return 0;
+}
+
/* 7.1.10 Set Primary Address (SCTP_PRIMARY_ADDR)
*
* Requests that the local SCTP stack use the enclosed peer address as
@@ -3523,7 +3592,6 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval,
return 0;
}
-
/*
* SCTP_PEER_ADDR_THLDS
*
@@ -3574,6 +3642,38 @@ static int sctp_setsockopt_paddr_thresholds(struct sock *sk,
return 0;
}
+static int sctp_setsockopt_recvrcvinfo(struct sock *sk,
+ char __user *optval,
+ unsigned int optlen)
+{
+ int val;
+
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (get_user(val, (int __user *) optval))
+ return -EFAULT;
+
+ sctp_sk(sk)->recvrcvinfo = (val == 0) ? 0 : 1;
+
+ return 0;
+}
+
+static int sctp_setsockopt_recvnxtinfo(struct sock *sk,
+ char __user *optval,
+ unsigned int optlen)
+{
+ int val;
+
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (get_user(val, (int __user *) optval))
+ return -EFAULT;
+
+ sctp_sk(sk)->recvnxtinfo = (val == 0) ? 0 : 1;
+
+ return 0;
+}
+
/* API 6.2 setsockopt(), getsockopt()
*
* Applications use setsockopt() and getsockopt() to set or retrieve
@@ -3671,6 +3771,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
retval = sctp_setsockopt_default_send_param(sk, optval,
optlen);
break;
+ case SCTP_DEFAULT_SNDINFO:
+ retval = sctp_setsockopt_default_sndinfo(sk, optval, optlen);
+ break;
case SCTP_PRIMARY_ADDR:
retval = sctp_setsockopt_primary_addr(sk, optval, optlen);
break;
@@ -3725,6 +3828,12 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
case SCTP_PEER_ADDR_THLDS:
retval = sctp_setsockopt_paddr_thresholds(sk, optval, optlen);
break;
+ case SCTP_RECVRCVINFO:
+ retval = sctp_setsockopt_recvrcvinfo(sk, optval, optlen);
+ break;
+ case SCTP_RECVNXTINFO:
+ retval = sctp_setsockopt_recvnxtinfo(sk, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@ -3971,6 +4080,9 @@ static int sctp_init_sock(struct sock *sk)
/* Enable Nagle algorithm by default. */
sp->nodelay = 0;
+ sp->recvrcvinfo = 0;
+ sp->recvnxtinfo = 0;
+
/* Enable by default. */
sp->v4mapped = 1;
@@ -4131,7 +4243,7 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len,
transport = asoc->peer.primary_path;
status.sstat_assoc_id = sctp_assoc2id(asoc);
- status.sstat_state = asoc->state;
+ status.sstat_state = sctp_assoc_to_state(asoc);
status.sstat_rwnd = asoc->peer.rwnd;
status.sstat_unackdata = asoc->unack_data;
@@ -4143,7 +4255,7 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len,
memcpy(&status.sstat_primary.spinfo_address, &transport->ipaddr,
transport->af_specific->sockaddr_len);
/* Map ipv4 address into v4-mapped-on-v6 address. */
- sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk),
+ sctp_get_pf_specific(sk->sk_family)->addr_to_user(sctp_sk(sk),
(union sctp_addr *)&status.sstat_primary.spinfo_address);
status.sstat_primary.spinfo_state = transport->state;
status.sstat_primary.spinfo_cwnd = transport->cwnd;
@@ -4301,8 +4413,8 @@ static int sctp_getsockopt_autoclose(struct sock *sk, int len, char __user *optv
int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
{
struct sctp_association *asoc = sctp_id2assoc(sk, id);
+ struct sctp_sock *sp = sctp_sk(sk);
struct socket *sock;
- struct sctp_af *af;
int err = 0;
if (!asoc)
@@ -4324,8 +4436,7 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
/* Make peeled-off sockets more like 1-1 accepted sockets.
* Set the daddr and initialize id to something more random
*/
- af = sctp_get_af_specific(asoc->peer.primary_addr.sa.sa_family);
- af->to_sk_daddr(&asoc->peer.primary_addr, sk);
+ sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sk);
/* Populate the fields of the newsk from the oldsk and migrate the
* asoc to the newsk.
@@ -4709,8 +4820,8 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len,
list_for_each_entry(from, &asoc->peer.transport_addr_list,
transports) {
memcpy(&temp, &from->ipaddr, sizeof(temp));
- sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
- addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
+ addrlen = sctp_get_pf_specific(sk->sk_family)
+ ->addr_to_user(sp, &temp);
if (space_left < addrlen)
return -ENOMEM;
if (copy_to_user(to, &temp, addrlen))
@@ -4754,9 +4865,9 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to,
if (!temp.v4.sin_port)
temp.v4.sin_port = htons(port);
- sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk),
- &temp);
- addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
+ addrlen = sctp_get_pf_specific(sk->sk_family)
+ ->addr_to_user(sctp_sk(sk), &temp);
+
if (space_left < addrlen) {
cnt = -ENOMEM;
break;
@@ -4844,8 +4955,8 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
*/
list_for_each_entry(addr, &bp->address_list, list) {
memcpy(&temp, &addr->a, sizeof(temp));
- sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
- addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
+ addrlen = sctp_get_pf_specific(sk->sk_family)
+ ->addr_to_user(sp, &temp);
if (space_left < addrlen) {
err = -ENOMEM; /*fixme: right error?*/
goto out;
@@ -4904,7 +5015,7 @@ static int sctp_getsockopt_primary_addr(struct sock *sk, int len,
memcpy(&prim.ssp_addr, &asoc->peer.primary_path->ipaddr,
asoc->peer.primary_path->af_specific->sockaddr_len);
- sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp,
+ sctp_get_pf_specific(sk->sk_family)->addr_to_user(sp,
(union sctp_addr *)&prim.ssp_addr);
if (put_user(len, optlen))
@@ -4964,14 +5075,14 @@ static int sctp_getsockopt_default_send_param(struct sock *sk,
int len, char __user *optval,
int __user *optlen)
{
- struct sctp_sndrcvinfo info;
- struct sctp_association *asoc;
struct sctp_sock *sp = sctp_sk(sk);
+ struct sctp_association *asoc;
+ struct sctp_sndrcvinfo info;
- if (len < sizeof(struct sctp_sndrcvinfo))
+ if (len < sizeof(info))
return -EINVAL;
- len = sizeof(struct sctp_sndrcvinfo);
+ len = sizeof(info);
if (copy_from_user(&info, optval, len))
return -EFAULT;
@@ -4979,7 +5090,6 @@ static int sctp_getsockopt_default_send_param(struct sock *sk,
asoc = sctp_id2assoc(sk, info.sinfo_assoc_id);
if (!asoc && info.sinfo_assoc_id && sctp_style(sk, UDP))
return -EINVAL;
-
if (asoc) {
info.sinfo_stream = asoc->default_stream;
info.sinfo_flags = asoc->default_flags;
@@ -5002,6 +5112,48 @@ static int sctp_getsockopt_default_send_param(struct sock *sk,
return 0;
}
+/* RFC6458, Section 8.1.31. Set/get Default Send Parameters
+ * (SCTP_DEFAULT_SNDINFO)
+ */
+static int sctp_getsockopt_default_sndinfo(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_sock *sp = sctp_sk(sk);
+ struct sctp_association *asoc;
+ struct sctp_sndinfo info;
+
+ if (len < sizeof(info))
+ return -EINVAL;
+
+ len = sizeof(info);
+
+ if (copy_from_user(&info, optval, len))
+ return -EFAULT;
+
+ asoc = sctp_id2assoc(sk, info.snd_assoc_id);
+ if (!asoc && info.snd_assoc_id && sctp_style(sk, UDP))
+ return -EINVAL;
+ if (asoc) {
+ info.snd_sid = asoc->default_stream;
+ info.snd_flags = asoc->default_flags;
+ info.snd_ppid = asoc->default_ppid;
+ info.snd_context = asoc->default_context;
+ } else {
+ info.snd_sid = sp->default_stream;
+ info.snd_flags = sp->default_flags;
+ info.snd_ppid = sp->default_ppid;
+ info.snd_context = sp->default_context;
+ }
+
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &info, len))
+ return -EFAULT;
+
+ return 0;
+}
+
/*
*
* 7.1.5 SCTP_NODELAY
@@ -5752,6 +5904,46 @@ static int sctp_getsockopt_assoc_stats(struct sock *sk, int len,
return 0;
}
+static int sctp_getsockopt_recvrcvinfo(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ int val = 0;
+
+ if (len < sizeof(int))
+ return -EINVAL;
+
+ len = sizeof(int);
+ if (sctp_sk(sk)->recvrcvinfo)
+ val = 1;
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &val, len))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int sctp_getsockopt_recvnxtinfo(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ int val = 0;
+
+ if (len < sizeof(int))
+ return -EINVAL;
+
+ len = sizeof(int);
+ if (sctp_sk(sk)->recvnxtinfo)
+ val = 1;
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &val, len))
+ return -EFAULT;
+
+ return 0;
+}
+
static int sctp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -5821,6 +6013,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
retval = sctp_getsockopt_default_send_param(sk, len,
optval, optlen);
break;
+ case SCTP_DEFAULT_SNDINFO:
+ retval = sctp_getsockopt_default_sndinfo(sk, len,
+ optval, optlen);
+ break;
case SCTP_PRIMARY_ADDR:
retval = sctp_getsockopt_primary_addr(sk, len, optval, optlen);
break;
@@ -5895,6 +6091,12 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
case SCTP_GET_ASSOC_STATS:
retval = sctp_getsockopt_assoc_stats(sk, len, optval, optlen);
break;
+ case SCTP_RECVRCVINFO:
+ retval = sctp_getsockopt_recvrcvinfo(sk, len, optval, optlen);
+ break;
+ case SCTP_RECVNXTINFO:
+ retval = sctp_getsockopt_recvnxtinfo(sk, len, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@ -6390,8 +6592,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
struct cmsghdr *cmsg;
struct msghdr *my_msg = (struct msghdr *)msg;
- for (cmsg = CMSG_FIRSTHDR(msg);
- cmsg != NULL;
+ for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
cmsg = CMSG_NXTHDR(my_msg, cmsg)) {
if (!CMSG_OK(my_msg, cmsg))
return -EINVAL;
@@ -6404,7 +6605,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
switch (cmsg->cmsg_type) {
case SCTP_INIT:
/* SCTP Socket API Extension
- * 5.2.1 SCTP Initiation Structure (SCTP_INIT)
+ * 5.3.1 SCTP Initiation Structure (SCTP_INIT)
*
* This cmsghdr structure provides information for
* initializing new SCTP associations with sendmsg().
@@ -6416,15 +6617,15 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
* ------------ ------------ ----------------------
* IPPROTO_SCTP SCTP_INIT struct sctp_initmsg
*/
- if (cmsg->cmsg_len !=
- CMSG_LEN(sizeof(struct sctp_initmsg)))
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_initmsg)))
return -EINVAL;
- cmsgs->init = (struct sctp_initmsg *)CMSG_DATA(cmsg);
+
+ cmsgs->init = CMSG_DATA(cmsg);
break;
case SCTP_SNDRCV:
/* SCTP Socket API Extension
- * 5.2.2 SCTP Header Information Structure(SCTP_SNDRCV)
+ * 5.3.2 SCTP Header Information Structure(SCTP_SNDRCV)
*
* This cmsghdr structure specifies SCTP options for
* sendmsg() and describes SCTP header information
@@ -6434,24 +6635,44 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
* ------------ ------------ ----------------------
* IPPROTO_SCTP SCTP_SNDRCV struct sctp_sndrcvinfo
*/
- if (cmsg->cmsg_len !=
- CMSG_LEN(sizeof(struct sctp_sndrcvinfo)))
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_sndrcvinfo)))
return -EINVAL;
- cmsgs->info =
- (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
+ cmsgs->srinfo = CMSG_DATA(cmsg);
- /* Minimally, validate the sinfo_flags. */
- if (cmsgs->info->sinfo_flags &
+ if (cmsgs->srinfo->sinfo_flags &
~(SCTP_UNORDERED | SCTP_ADDR_OVER |
SCTP_ABORT | SCTP_EOF))
return -EINVAL;
break;
+ case SCTP_SNDINFO:
+ /* SCTP Socket API Extension
+ * 5.3.4 SCTP Send Information Structure (SCTP_SNDINFO)
+ *
+ * This cmsghdr structure specifies SCTP options for
+ * sendmsg(). This structure and SCTP_RCVINFO replaces
+ * SCTP_SNDRCV which has been deprecated.
+ *
+ * cmsg_level cmsg_type cmsg_data[]
+ * ------------ ------------ ---------------------
+ * IPPROTO_SCTP SCTP_SNDINFO struct sctp_sndinfo
+ */
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_sndinfo)))
+ return -EINVAL;
+
+ cmsgs->sinfo = CMSG_DATA(cmsg);
+
+ if (cmsgs->sinfo->snd_flags &
+ ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
+ SCTP_ABORT | SCTP_EOF))
+ return -EINVAL;
+ break;
default:
return -EINVAL;
}
}
+
return 0;
}
@@ -6518,8 +6739,8 @@ out:
* Note: This is pretty much the same routine as in core/datagram.c
* with a few changes to make lksctp work.
*/
-static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
- int noblock, int *err)
+struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
+ int noblock, int *err)
{
int error;
struct sk_buff *skb;
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 12c7e01c2677..2e9ada10fd84 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -424,8 +424,9 @@ static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
- pr_warn_once("Changing rto_alpha or rto_beta may lead to "
- "suboptimal rtt/srtt estimations!\n");
+ if (write)
+ pr_warn_once("Changing rto_alpha or rto_beta may lead to "
+ "suboptimal rtt/srtt estimations!\n");
return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
}
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 7dd672fa651f..a0a431824f63 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -289,8 +289,8 @@ void sctp_transport_route(struct sctp_transport *transport,
*/
if (asoc && (!asoc->peer.primary_path ||
(transport == asoc->peer.active_path)))
- opt->pf->af->to_sk_saddr(&transport->saddr,
- asoc->base.sk);
+ opt->pf->to_sk_saddr(&transport->saddr,
+ asoc->base.sk);
} else
transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
}
@@ -594,15 +594,16 @@ void sctp_transport_burst_reset(struct sctp_transport *t)
}
/* What is the next timeout value for this transport? */
-unsigned long sctp_transport_timeout(struct sctp_transport *t)
+unsigned long sctp_transport_timeout(struct sctp_transport *trans)
{
- unsigned long timeout;
- timeout = t->rto + sctp_jitter(t->rto);
- if ((t->state != SCTP_UNCONFIRMED) &&
- (t->state != SCTP_PF))
- timeout += t->hbinterval;
- timeout += jiffies;
- return timeout;
+ /* RTO + timer slack +/- 50% of RTO */
+ unsigned long timeout = (trans->rto >> 1) + prandom_u32_max(trans->rto);
+
+ if (trans->state != SCTP_UNCONFIRMED &&
+ trans->state != SCTP_PF)
+ timeout += trans->hbinterval;
+
+ return timeout + jiffies;
}
/* Reset transport variables to their initial values */
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index b6842fdb53d4..d1e38308f615 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -341,7 +341,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change(
memcpy(&spc->spc_aaddr, aaddr, sizeof(struct sockaddr_storage));
/* Map ipv4 address into v4-mapped-on-v6 address. */
- sctp_get_pf_specific(asoc->base.sk->sk_family)->addr_v4map(
+ sctp_get_pf_specific(asoc->base.sk->sk_family)->addr_to_user(
sctp_sk(asoc->base.sk),
(union sctp_addr *)&spc->spc_aaddr);
@@ -886,6 +886,69 @@ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event,
sizeof(sinfo), &sinfo);
}
+/* RFC6458, Section 5.3.5 SCTP Receive Information Structure
+ * (SCTP_SNDRCV)
+ */
+void sctp_ulpevent_read_rcvinfo(const struct sctp_ulpevent *event,
+ struct msghdr *msghdr)
+{
+ struct sctp_rcvinfo rinfo;
+
+ if (sctp_ulpevent_is_notification(event))
+ return;
+
+ memset(&rinfo, 0, sizeof(struct sctp_rcvinfo));
+ rinfo.rcv_sid = event->stream;
+ rinfo.rcv_ssn = event->ssn;
+ rinfo.rcv_ppid = event->ppid;
+ rinfo.rcv_flags = event->flags;
+ rinfo.rcv_tsn = event->tsn;
+ rinfo.rcv_cumtsn = event->cumtsn;
+ rinfo.rcv_assoc_id = sctp_assoc2id(event->asoc);
+ rinfo.rcv_context = event->asoc->default_rcv_context;
+
+ put_cmsg(msghdr, IPPROTO_SCTP, SCTP_RCVINFO,
+ sizeof(rinfo), &rinfo);
+}
+
+/* RFC6458, Section 5.3.6. SCTP Next Receive Information Structure
+ * (SCTP_NXTINFO)
+ */
+static void __sctp_ulpevent_read_nxtinfo(const struct sctp_ulpevent *event,
+ struct msghdr *msghdr,
+ const struct sk_buff *skb)
+{
+ struct sctp_nxtinfo nxtinfo;
+
+ memset(&nxtinfo, 0, sizeof(nxtinfo));
+ nxtinfo.nxt_sid = event->stream;
+ nxtinfo.nxt_ppid = event->ppid;
+ nxtinfo.nxt_flags = event->flags;
+ if (sctp_ulpevent_is_notification(event))
+ nxtinfo.nxt_flags |= SCTP_NOTIFICATION;
+ nxtinfo.nxt_length = skb->len;
+ nxtinfo.nxt_assoc_id = sctp_assoc2id(event->asoc);
+
+ put_cmsg(msghdr, IPPROTO_SCTP, SCTP_NXTINFO,
+ sizeof(nxtinfo), &nxtinfo);
+}
+
+void sctp_ulpevent_read_nxtinfo(const struct sctp_ulpevent *event,
+ struct msghdr *msghdr,
+ struct sock *sk)
+{
+ struct sk_buff *skb;
+ int err;
+
+ skb = sctp_skb_recv_datagram(sk, MSG_PEEK, 1, &err);
+ if (skb != NULL) {
+ __sctp_ulpevent_read_nxtinfo(sctp_skb2event(skb),
+ msghdr, skb);
+ /* Just release refcount here. */
+ kfree_skb(skb);
+ }
+}
+
/* Do accounting for bytes received and hold a reference to the association
* for each skb.
*/
diff --git a/net/socket.c b/net/socket.c
index abf56b2a14f9..fe20c319a0bb 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -106,6 +106,7 @@
#include <linux/sockios.h>
#include <linux/atalk.h>
#include <net/busy_poll.h>
+#include <linux/errqueue.h>
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int sysctl_net_busy_read __read_mostly;
@@ -609,17 +610,25 @@ void sock_release(struct socket *sock)
}
EXPORT_SYMBOL(sock_release);
-void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
+void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags)
{
- *tx_flags = 0;
- if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
- *tx_flags |= SKBTX_HW_TSTAMP;
- if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
- *tx_flags |= SKBTX_SW_TSTAMP;
- if (sock_flag(sk, SOCK_WIFI_STATUS))
- *tx_flags |= SKBTX_WIFI_STATUS;
+ u8 flags = *tx_flags;
+
+ if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
+ flags |= SKBTX_HW_TSTAMP;
+
+ if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
+ flags |= SKBTX_SW_TSTAMP;
+
+ if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)
+ flags |= SKBTX_SCHED_TSTAMP;
+
+ if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)
+ flags |= SKBTX_ACK_TSTAMP;
+
+ *tx_flags = flags;
}
-EXPORT_SYMBOL(sock_tx_timestamp);
+EXPORT_SYMBOL(__sock_tx_timestamp);
static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size)
@@ -697,7 +706,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb)
{
int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
- struct timespec ts[3];
+ struct scm_timestamping tss;
int empty = 1;
struct skb_shared_hwtstamps *shhwtstamps =
skb_hwtstamps(skb);
@@ -714,28 +723,24 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
sizeof(tv), &tv);
} else {
- skb_get_timestampns(skb, &ts[0]);
+ struct timespec ts;
+ skb_get_timestampns(skb, &ts);
put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
- sizeof(ts[0]), &ts[0]);
+ sizeof(ts), &ts);
}
}
-
- memset(ts, 0, sizeof(ts));
- if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) &&
- ktime_to_timespec_cond(skb->tstamp, ts + 0))
+ memset(&tss, 0, sizeof(tss));
+ if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
+ ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
+ empty = 0;
+ if (shhwtstamps &&
+ (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
+ ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
empty = 0;
- if (shhwtstamps) {
- if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
- ktime_to_timespec_cond(shhwtstamps->syststamp, ts + 1))
- empty = 0;
- if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
- ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2))
- empty = 0;
- }
if (!empty)
put_cmsg(msg, SOL_SOCKET,
- SCM_TIMESTAMPING, sizeof(ts), &ts);
+ SCM_TIMESTAMPING, sizeof(tss), &tss);
}
EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
@@ -1060,7 +1065,8 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
err = -EFAULT;
if (get_user(pid, (int __user *)argp))
break;
- err = f_setown(sock->file, pid, 1);
+ f_setown(sock->file, pid, 1);
+ err = 0;
break;
case FIOGETOWN:
case SIOCGPGRP:
@@ -1988,6 +1994,9 @@ static int copy_msghdr_from_user(struct msghdr *kmsg,
if (copy_from_user(kmsg, umsg, sizeof(struct msghdr)))
return -EFAULT;
+ if (kmsg->msg_name == NULL)
+ kmsg->msg_namelen = 0;
+
if (kmsg->msg_namelen < 0)
return -EINVAL;
@@ -2593,7 +2602,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
*
* This function is called by a protocol handler that wants to
* advertise its address family, and have it linked into the
- * socket interface. The value ops->family coresponds to the
+ * socket interface. The value ops->family corresponds to the
* socket system call protocol family.
*/
int sock_register(const struct net_proto_family *ops)
diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c
index a622ad64acd8..2e0a6f92e563 100644
--- a/net/sunrpc/addr.c
+++ b/net/sunrpc/addr.c
@@ -176,7 +176,7 @@ static int rpc_parse_scope_id(struct net *net, const char *buf,
len = (buf + buflen) - delim - 1;
p = kstrndup(delim + 1, len, GFP_KERNEL);
if (p) {
- unsigned long scope_id = 0;
+ u32 scope_id = 0;
struct net_device *dev;
dev = dev_get_by_name(net, p);
@@ -184,7 +184,7 @@ static int rpc_parse_scope_id(struct net *net, const char *buf,
scope_id = dev->ifindex;
dev_put(dev);
} else {
- if (strict_strtoul(p, 10, &scope_id) == 0) {
+ if (kstrtou32(p, 10, &scope_id) == 0) {
kfree(p);
return 0;
}
@@ -304,7 +304,7 @@ char *rpc_sockaddr2uaddr(const struct sockaddr *sap, gfp_t gfp_flags)
* @sap: buffer into which to plant socket address
* @salen: size of buffer
*
- * @uaddr does not have to be '\0'-terminated, but strict_strtoul() and
+ * @uaddr does not have to be '\0'-terminated, but kstrtou8() and
* rpc_pton() require proper string termination to be successful.
*
* Returns the size of the socket address if successful; otherwise
@@ -315,7 +315,7 @@ size_t rpc_uaddr2sockaddr(struct net *net, const char *uaddr,
const size_t salen)
{
char *c, buf[RPCBIND_MAXUADDRLEN + sizeof('\0')];
- unsigned long portlo, porthi;
+ u8 portlo, porthi;
unsigned short port;
if (uaddr_len > RPCBIND_MAXUADDRLEN)
@@ -327,18 +327,14 @@ size_t rpc_uaddr2sockaddr(struct net *net, const char *uaddr,
c = strrchr(buf, '.');
if (unlikely(c == NULL))
return 0;
- if (unlikely(strict_strtoul(c + 1, 10, &portlo) != 0))
- return 0;
- if (unlikely(portlo > 255))
+ if (unlikely(kstrtou8(c + 1, 10, &portlo) != 0))
return 0;
*c = '\0';
c = strrchr(buf, '.');
if (unlikely(c == NULL))
return 0;
- if (unlikely(strict_strtoul(c + 1, 10, &porthi) != 0))
- return 0;
- if (unlikely(porthi > 255))
+ if (unlikely(kstrtou8(c + 1, 10, &porthi) != 0))
return 0;
port = (unsigned short)((porthi << 8) | portlo);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index f77366717420..383eb919ac0b 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -48,7 +48,7 @@ static int param_set_hashtbl_sz(const char *val, const struct kernel_param *kp)
if (!val)
goto out_inval;
- ret = strict_strtoul(val, 0, &num);
+ ret = kstrtoul(val, 0, &num);
if (ret == -EINVAL)
goto out_inval;
nbits = fls(num);
@@ -80,6 +80,10 @@ static struct kernel_param_ops param_ops_hashtbl_sz = {
module_param_named(auth_hashtable_size, auth_hashbits, hashtbl_sz, 0644);
MODULE_PARM_DESC(auth_hashtable_size, "RPC credential cache hashtable size");
+static unsigned long auth_max_cred_cachesize = ULONG_MAX;
+module_param(auth_max_cred_cachesize, ulong, 0644);
+MODULE_PARM_DESC(auth_max_cred_cachesize, "RPC credential maximum total cache size");
+
static u32
pseudoflavor_to_flavor(u32 flavor) {
if (flavor > RPC_AUTH_MAXFLAVOR)
@@ -363,6 +367,15 @@ rpcauth_cred_key_to_expire(struct rpc_cred *cred)
}
EXPORT_SYMBOL_GPL(rpcauth_cred_key_to_expire);
+char *
+rpcauth_stringify_acceptor(struct rpc_cred *cred)
+{
+ if (!cred->cr_ops->crstringify_acceptor)
+ return NULL;
+ return cred->cr_ops->crstringify_acceptor(cred);
+}
+EXPORT_SYMBOL_GPL(rpcauth_stringify_acceptor);
+
/*
* Destroy a list of credentials
*/
@@ -472,6 +485,20 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
return freed;
}
+static unsigned long
+rpcauth_cache_do_shrink(int nr_to_scan)
+{
+ LIST_HEAD(free);
+ unsigned long freed;
+
+ spin_lock(&rpc_credcache_lock);
+ freed = rpcauth_prune_expired(&free, nr_to_scan);
+ spin_unlock(&rpc_credcache_lock);
+ rpcauth_destroy_credlist(&free);
+
+ return freed;
+}
+
/*
* Run memory cache shrinker.
*/
@@ -479,9 +506,6 @@ static unsigned long
rpcauth_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
{
- LIST_HEAD(free);
- unsigned long freed;
-
if ((sc->gfp_mask & GFP_KERNEL) != GFP_KERNEL)
return SHRINK_STOP;
@@ -489,12 +513,7 @@ rpcauth_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
if (list_empty(&cred_unused))
return SHRINK_STOP;
- spin_lock(&rpc_credcache_lock);
- freed = rpcauth_prune_expired(&free, sc->nr_to_scan);
- spin_unlock(&rpc_credcache_lock);
- rpcauth_destroy_credlist(&free);
-
- return freed;
+ return rpcauth_cache_do_shrink(sc->nr_to_scan);
}
static unsigned long
@@ -504,6 +523,21 @@ rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
}
+static void
+rpcauth_cache_enforce_limit(void)
+{
+ unsigned long diff;
+ unsigned int nr_to_scan;
+
+ if (number_cred_unused <= auth_max_cred_cachesize)
+ return;
+ diff = number_cred_unused - auth_max_cred_cachesize;
+ nr_to_scan = 100;
+ if (diff < nr_to_scan)
+ nr_to_scan = diff;
+ rpcauth_cache_do_shrink(nr_to_scan);
+}
+
/*
* Look up a process' credentials in the authentication cache
*/
@@ -523,6 +557,12 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) {
if (!entry->cr_ops->crmatch(acred, entry, flags))
continue;
+ if (flags & RPCAUTH_LOOKUP_RCU) {
+ if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) &&
+ !test_bit(RPCAUTH_CRED_NEW, &entry->cr_flags))
+ cred = entry;
+ break;
+ }
spin_lock(&cache->lock);
if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) == 0) {
spin_unlock(&cache->lock);
@@ -537,6 +577,9 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
if (cred != NULL)
goto found;
+ if (flags & RPCAUTH_LOOKUP_RCU)
+ return ERR_PTR(-ECHILD);
+
new = auth->au_ops->crcreate(auth, acred, flags);
if (IS_ERR(new)) {
cred = new;
@@ -557,6 +600,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
} else
list_add_tail(&new->cr_lru, &free);
spin_unlock(&cache->lock);
+ rpcauth_cache_enforce_limit();
found:
if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) &&
cred->cr_ops->cr_init != NULL &&
@@ -586,10 +630,8 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags)
memset(&acred, 0, sizeof(acred));
acred.uid = cred->fsuid;
acred.gid = cred->fsgid;
- acred.group_info = get_group_info(((struct cred *)cred)->group_info);
-
+ acred.group_info = cred->group_info;
ret = auth->au_ops->lookup_cred(auth, &acred, flags);
- put_group_info(acred.group_info);
return ret;
}
EXPORT_SYMBOL_GPL(rpcauth_lookupcred);
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index ed04869b2d4f..6f6b829c9e8e 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -38,6 +38,12 @@ struct rpc_cred *rpc_lookup_cred(void)
}
EXPORT_SYMBOL_GPL(rpc_lookup_cred);
+struct rpc_cred *rpc_lookup_cred_nonblock(void)
+{
+ return rpcauth_lookupcred(&generic_auth, RPCAUTH_LOOKUP_RCU);
+}
+EXPORT_SYMBOL_GPL(rpc_lookup_cred_nonblock);
+
/*
* Public call interface for looking up machine creds.
*/
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index b6e440baccc3..afb292cd797d 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -183,8 +183,9 @@ gss_cred_get_ctx(struct rpc_cred *cred)
struct gss_cl_ctx *ctx = NULL;
rcu_read_lock();
- if (gss_cred->gc_ctx)
- ctx = gss_get_ctx(gss_cred->gc_ctx);
+ ctx = rcu_dereference(gss_cred->gc_ctx);
+ if (ctx)
+ gss_get_ctx(ctx);
rcu_read_unlock();
return ctx;
}
@@ -262,9 +263,22 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
p = ERR_PTR(ret);
goto err;
}
- dprintk("RPC: %s Success. gc_expiry %lu now %lu timeout %u\n",
- __func__, ctx->gc_expiry, now, timeout);
- return q;
+
+ /* is there any trailing data? */
+ if (q == end) {
+ p = q;
+ goto done;
+ }
+
+ /* pull in acceptor name (if there is one) */
+ p = simple_get_netobj(q, end, &ctx->gc_acceptor);
+ if (IS_ERR(p))
+ goto err;
+done:
+ dprintk("RPC: %s Success. gc_expiry %lu now %lu timeout %u acceptor %.*s\n",
+ __func__, ctx->gc_expiry, now, timeout, ctx->gc_acceptor.len,
+ ctx->gc_acceptor.data);
+ return p;
err:
dprintk("RPC: %s returns error %ld\n", __func__, -PTR_ERR(p));
return p;
@@ -1194,13 +1208,13 @@ gss_destroying_context(struct rpc_cred *cred)
{
struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth);
+ struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1);
struct rpc_task *task;
- if (gss_cred->gc_ctx == NULL ||
- test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0)
+ if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0)
return 0;
- gss_cred->gc_ctx->gc_proc = RPC_GSS_PROC_DESTROY;
+ ctx->gc_proc = RPC_GSS_PROC_DESTROY;
cred->cr_ops = &gss_nullops;
/* Take a reference to ensure the cred will be destroyed either
@@ -1225,6 +1239,7 @@ gss_do_free_ctx(struct gss_cl_ctx *ctx)
gss_delete_sec_context(&ctx->gc_gss_ctx);
kfree(ctx->gc_wire_ctx.data);
+ kfree(ctx->gc_acceptor.data);
kfree(ctx);
}
@@ -1260,7 +1275,7 @@ gss_destroy_nullcred(struct rpc_cred *cred)
{
struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth);
- struct gss_cl_ctx *ctx = gss_cred->gc_ctx;
+ struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1);
RCU_INIT_POINTER(gss_cred->gc_ctx, NULL);
call_rcu(&cred->cr_rcu, gss_free_cred_callback);
@@ -1332,6 +1347,36 @@ gss_cred_init(struct rpc_auth *auth, struct rpc_cred *cred)
return err;
}
+static char *
+gss_stringify_acceptor(struct rpc_cred *cred)
+{
+ char *string = NULL;
+ struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
+ struct gss_cl_ctx *ctx;
+ struct xdr_netobj *acceptor;
+
+ rcu_read_lock();
+ ctx = rcu_dereference(gss_cred->gc_ctx);
+ if (!ctx)
+ goto out;
+
+ acceptor = &ctx->gc_acceptor;
+
+ /* no point if there's no string */
+ if (!acceptor->len)
+ goto out;
+
+ string = kmalloc(acceptor->len + 1, GFP_KERNEL);
+ if (!string)
+ goto out;
+
+ memcpy(string, acceptor->data, acceptor->len);
+ string[acceptor->len] = '\0';
+out:
+ rcu_read_unlock();
+ return string;
+}
+
/*
* Returns -EACCES if GSS context is NULL or will expire within the
* timeout (miliseconds)
@@ -1340,15 +1385,16 @@ static int
gss_key_timeout(struct rpc_cred *rc)
{
struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base);
+ struct gss_cl_ctx *ctx;
unsigned long now = jiffies;
unsigned long expire;
- if (gss_cred->gc_ctx == NULL)
- return -EACCES;
-
- expire = gss_cred->gc_ctx->gc_expiry - (gss_key_expire_timeo * HZ);
-
- if (time_after(now, expire))
+ rcu_read_lock();
+ ctx = rcu_dereference(gss_cred->gc_ctx);
+ if (ctx)
+ expire = ctx->gc_expiry - (gss_key_expire_timeo * HZ);
+ rcu_read_unlock();
+ if (!ctx || time_after(now, expire))
return -EACCES;
return 0;
}
@@ -1357,13 +1403,19 @@ static int
gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags)
{
struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base);
+ struct gss_cl_ctx *ctx;
int ret;
if (test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags))
goto out;
/* Don't match with creds that have expired. */
- if (time_after(jiffies, gss_cred->gc_ctx->gc_expiry))
+ rcu_read_lock();
+ ctx = rcu_dereference(gss_cred->gc_ctx);
+ if (!ctx || time_after(jiffies, ctx->gc_expiry)) {
+ rcu_read_unlock();
return 0;
+ }
+ rcu_read_unlock();
if (!test_bit(RPCAUTH_CRED_UPTODATE, &rc->cr_flags))
return 0;
out:
@@ -1909,29 +1961,31 @@ static const struct rpc_authops authgss_ops = {
};
static const struct rpc_credops gss_credops = {
- .cr_name = "AUTH_GSS",
- .crdestroy = gss_destroy_cred,
- .cr_init = gss_cred_init,
- .crbind = rpcauth_generic_bind_cred,
- .crmatch = gss_match,
- .crmarshal = gss_marshal,
- .crrefresh = gss_refresh,
- .crvalidate = gss_validate,
- .crwrap_req = gss_wrap_req,
- .crunwrap_resp = gss_unwrap_resp,
- .crkey_timeout = gss_key_timeout,
+ .cr_name = "AUTH_GSS",
+ .crdestroy = gss_destroy_cred,
+ .cr_init = gss_cred_init,
+ .crbind = rpcauth_generic_bind_cred,
+ .crmatch = gss_match,
+ .crmarshal = gss_marshal,
+ .crrefresh = gss_refresh,
+ .crvalidate = gss_validate,
+ .crwrap_req = gss_wrap_req,
+ .crunwrap_resp = gss_unwrap_resp,
+ .crkey_timeout = gss_key_timeout,
+ .crstringify_acceptor = gss_stringify_acceptor,
};
static const struct rpc_credops gss_nullops = {
- .cr_name = "AUTH_GSS",
- .crdestroy = gss_destroy_nullcred,
- .crbind = rpcauth_generic_bind_cred,
- .crmatch = gss_match,
- .crmarshal = gss_marshal,
- .crrefresh = gss_refresh_null,
- .crvalidate = gss_validate,
- .crwrap_req = gss_wrap_req,
- .crunwrap_resp = gss_unwrap_resp,
+ .cr_name = "AUTH_GSS",
+ .crdestroy = gss_destroy_nullcred,
+ .crbind = rpcauth_generic_bind_cred,
+ .crmatch = gss_match,
+ .crmarshal = gss_marshal,
+ .crrefresh = gss_refresh_null,
+ .crvalidate = gss_validate,
+ .crwrap_req = gss_wrap_req,
+ .crunwrap_resp = gss_unwrap_resp,
+ .crstringify_acceptor = gss_stringify_acceptor,
};
static const struct rpc_pipe_ops gss_upcall_ops_v0 = {
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 0f43e894bc0a..f5ed9f6ece06 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -641,7 +641,7 @@ out:
u32
gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
- struct xdr_buf *buf, int ec, struct page **pages)
+ struct xdr_buf *buf, struct page **pages)
{
u32 err;
struct xdr_netobj hmac;
@@ -684,13 +684,8 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
ecptr = buf->tail[0].iov_base;
}
- memset(ecptr, 'X', ec);
- buf->tail[0].iov_len += ec;
- buf->len += ec;
-
/* copy plaintext gss token header after filler (if any) */
- memcpy(ecptr + ec, buf->head[0].iov_base + offset,
- GSS_KRB5_TOK_HDR_LEN);
+ memcpy(ecptr, buf->head[0].iov_base + offset, GSS_KRB5_TOK_HDR_LEN);
buf->tail[0].iov_len += GSS_KRB5_TOK_HDR_LEN;
buf->len += GSS_KRB5_TOK_HDR_LEN;
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 62ae3273186c..42768e5c3994 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -70,31 +70,37 @@
DEFINE_SPINLOCK(krb5_seq_lock);
-static char *
+static void *
setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token)
{
- __be16 *ptr, *krb5_hdr;
+ u16 *ptr;
+ void *krb5_hdr;
int body_size = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
token->len = g_token_size(&ctx->mech_used, body_size);
- ptr = (__be16 *)token->data;
+ ptr = (u16 *)token->data;
g_make_token_header(&ctx->mech_used, body_size, (unsigned char **)&ptr);
/* ptr now at start of header described in rfc 1964, section 1.2.1: */
krb5_hdr = ptr;
*ptr++ = KG_TOK_MIC_MSG;
- *ptr++ = cpu_to_le16(ctx->gk5e->signalg);
+ /*
+ * signalg is stored as if it were converted from LE to host endian, even
+ * though it's an opaque pair of bytes according to the RFC.
+ */
+ *ptr++ = (__force u16)cpu_to_le16(ctx->gk5e->signalg);
*ptr++ = SEAL_ALG_NONE;
- *ptr++ = 0xffff;
+ *ptr = 0xffff;
- return (char *)krb5_hdr;
+ return krb5_hdr;
}
static void *
setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
{
- __be16 *ptr, *krb5_hdr;
+ u16 *ptr;
+ void *krb5_hdr;
u8 *p, flags = 0x00;
if ((ctx->flags & KRB5_CTX_FLAG_INITIATOR) == 0)
@@ -104,15 +110,15 @@ setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
/* Per rfc 4121, sec 4.2.6.1, there is no header,
* just start the token */
- krb5_hdr = ptr = (__be16 *)token->data;
+ krb5_hdr = ptr = (u16 *)token->data;
*ptr++ = KG2_TOK_MIC;
p = (u8 *)ptr;
*p++ = flags;
*p++ = 0xff;
- ptr = (__be16 *)p;
- *ptr++ = 0xffff;
+ ptr = (u16 *)p;
*ptr++ = 0xffff;
+ *ptr = 0xffff;
token->len = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
return krb5_hdr;
@@ -181,7 +187,7 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
spin_lock(&krb5_seq_lock);
seq_send = ctx->seq_send64++;
spin_unlock(&krb5_seq_lock);
- *((u64 *)(krb5_hdr + 8)) = cpu_to_be64(seq_send);
+ *((__be64 *)(krb5_hdr + 8)) = cpu_to_be64(seq_send);
if (ctx->initiate) {
cksumkey = ctx->initiator_sign;
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 42560e55d978..4b614c604fe0 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -201,9 +201,15 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength;
- *(__be16 *)(ptr + 2) = cpu_to_le16(kctx->gk5e->signalg);
- memset(ptr + 4, 0xff, 4);
- *(__be16 *)(ptr + 4) = cpu_to_le16(kctx->gk5e->sealalg);
+ /*
+ * signalg and sealalg are stored as if they were converted from LE
+ * to host endian, even though they're opaque pairs of bytes according
+ * to the RFC.
+ */
+ *(__le16 *)(ptr + 2) = cpu_to_le16(kctx->gk5e->signalg);
+ *(__le16 *)(ptr + 4) = cpu_to_le16(kctx->gk5e->sealalg);
+ ptr[6] = 0xff;
+ ptr[7] = 0xff;
gss_krb5_make_confounder(msg_start, conflen);
@@ -438,7 +444,7 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
u8 *ptr, *plainhdr;
s32 now;
u8 flags = 0x00;
- __be16 *be16ptr, ec = 0;
+ __be16 *be16ptr;
__be64 *be64ptr;
u32 err;
@@ -468,16 +474,16 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
be16ptr = (__be16 *)ptr;
blocksize = crypto_blkcipher_blocksize(kctx->acceptor_enc);
- *be16ptr++ = cpu_to_be16(ec);
+ *be16ptr++ = 0;
/* "inner" token header always uses 0 for RRC */
- *be16ptr++ = cpu_to_be16(0);
+ *be16ptr++ = 0;
be64ptr = (__be64 *)be16ptr;
spin_lock(&krb5_seq_lock);
*be64ptr = cpu_to_be64(kctx->seq_send64++);
spin_unlock(&krb5_seq_lock);
- err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, ec, pages);
+ err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, pages);
if (err)
return err;
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 4ce5eccec1f6..c548ab213f76 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -886,7 +886,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs
u32 priv_len, maj_stat;
int pad, saved_len, remaining_len, offset;
- rqstp->rq_splice_ok = 0;
+ rqstp->rq_splice_ok = false;
priv_len = svc_getnl(&buf->head[0]);
if (rqstp->rq_deferred) {
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index f0ebe07978a2..712c123e04e9 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -35,6 +35,8 @@ nul_destroy(struct rpc_auth *auth)
static struct rpc_cred *
nul_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
{
+ if (flags & RPCAUTH_LOOKUP_RCU)
+ return &null_cred;
return get_rpccred(&null_cred);
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 2e6ab10734f6..9acd6ce88db7 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -461,6 +461,8 @@ struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
if (args->flags & RPC_CLNT_CREATE_AUTOBIND)
clnt->cl_autobind = 1;
+ if (args->flags & RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT)
+ clnt->cl_noretranstimeo = 1;
if (args->flags & RPC_CLNT_CREATE_DISCRTRY)
clnt->cl_discrtry = 1;
if (!(args->flags & RPC_CLNT_CREATE_QUIET))
@@ -579,6 +581,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
/* Turn off autobind on clones */
new->cl_autobind = 0;
new->cl_softrtry = clnt->cl_softrtry;
+ new->cl_noretranstimeo = clnt->cl_noretranstimeo;
new->cl_discrtry = clnt->cl_discrtry;
new->cl_chatty = clnt->cl_chatty;
return new;
@@ -1746,6 +1749,7 @@ call_bind_status(struct rpc_task *task)
case -EHOSTDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
+ case -ENOBUFS:
case -EPIPE:
dprintk("RPC: %5u remote rpcbind unreachable: %d\n",
task->tk_pid, task->tk_status);
@@ -1812,6 +1816,8 @@ call_connect_status(struct rpc_task *task)
case -ECONNABORTED:
case -ENETUNREACH:
case -EHOSTUNREACH:
+ case -ENOBUFS:
+ case -EPIPE:
if (RPC_IS_SOFTCONN(task))
break;
/* retry with existing socket, after a delay */
@@ -1910,6 +1916,7 @@ call_transmit_status(struct rpc_task *task)
case -EHOSTDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
+ case -EPERM:
if (RPC_IS_SOFTCONN(task)) {
xprt_end_transmit(task);
rpc_exit(task, task->tk_status);
@@ -1918,6 +1925,7 @@ call_transmit_status(struct rpc_task *task)
case -ECONNRESET:
case -ECONNABORTED:
case -ENOTCONN:
+ case -ENOBUFS:
case -EPIPE:
rpc_task_force_reencode(task);
}
@@ -2014,6 +2022,7 @@ call_status(struct rpc_task *task)
case -EHOSTDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
+ case -EPERM:
if (RPC_IS_SOFTCONN(task)) {
rpc_exit(task, status);
break;
@@ -2034,6 +2043,7 @@ call_status(struct rpc_task *task)
case -ECONNRESET:
case -ECONNABORTED:
rpc_force_rebind(clnt);
+ case -ENOBUFS:
rpc_delay(task, 3*HZ);
case -EPIPE:
case -ENOTCONN:
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index b18554898562..2d12b76b5a64 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -195,7 +195,7 @@ static struct inode *
rpc_alloc_inode(struct super_block *sb)
{
struct rpc_inode *rpci;
- rpci = (struct rpc_inode *)kmem_cache_alloc(rpc_inode_cachep, GFP_KERNEL);
+ rpci = kmem_cache_alloc(rpc_inode_cachep, GFP_KERNEL);
if (!rpci)
return NULL;
return &rpci->vfs_inode;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index c0365c14b858..fe3441abdbe5 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -250,7 +250,7 @@ void rpc_destroy_wait_queue(struct rpc_wait_queue *queue)
}
EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue);
-static int rpc_wait_bit_killable(void *word)
+static int rpc_wait_bit_killable(struct wait_bit_key *key)
{
if (fatal_signal_pending(current))
return -ERESTARTSYS;
@@ -309,7 +309,7 @@ static int rpc_complete_task(struct rpc_task *task)
* to enforce taking of the wq->lock and hence avoid races with
* rpc_complete_task().
*/
-int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *))
+int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *action)
{
if (action == NULL)
action = rpc_wait_bit_killable;
@@ -821,9 +821,7 @@ void rpc_execute(struct rpc_task *task)
static void rpc_async_schedule(struct work_struct *work)
{
- current->flags |= PF_FSTRANS;
__rpc_execute(container_of(work, struct rpc_task, u.tk_work));
- current->flags &= ~PF_FSTRANS;
}
/**
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 5de6801cd924..ca8a7958f4e6 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -612,8 +612,6 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
if (!rqstp)
goto out_enomem;
- init_waitqueue_head(&rqstp->rq_wait);
-
serv->sv_nrthreads++;
spin_lock_bh(&pool->sp_lock);
pool->sp_nrthreads++;
@@ -1086,9 +1084,9 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
goto err_short_len;
/* Will be turned off only in gss privacy case: */
- rqstp->rq_splice_ok = 1;
+ rqstp->rq_splice_ok = true;
/* Will be turned off only when NFSv4 Sessions are used */
- rqstp->rq_usedeferral = 1;
+ rqstp->rq_usedeferral = true;
rqstp->rq_dropme = false;
/* Setup reply header */
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index b4737fbdec13..c179ca2a5aa4 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -23,6 +23,7 @@ static int svc_deferred_recv(struct svc_rqst *rqstp);
static struct cache_deferred_req *svc_defer(struct cache_req *req);
static void svc_age_temp_xprts(unsigned long closure);
static void svc_delete_xprt(struct svc_xprt *xprt);
+static void svc_xprt_do_enqueue(struct svc_xprt *xprt);
/* apparently the "standard" is that clients close
* idle connections after 5 minutes, servers after
@@ -222,11 +223,12 @@ static void svc_xprt_received(struct svc_xprt *xprt)
if (!test_bit(XPT_BUSY, &xprt->xpt_flags))
return;
/* As soon as we clear busy, the xprt could be closed and
- * 'put', so we need a reference to call svc_xprt_enqueue with:
+ * 'put', so we need a reference to call svc_xprt_do_enqueue with:
*/
svc_xprt_get(xprt);
+ smp_mb__before_atomic();
clear_bit(XPT_BUSY, &xprt->xpt_flags);
- svc_xprt_enqueue(xprt);
+ svc_xprt_do_enqueue(xprt);
svc_xprt_put(xprt);
}
@@ -335,12 +337,7 @@ static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
return false;
}
-/*
- * Queue up a transport with data pending. If there are idle nfsd
- * processes, wake 'em up.
- *
- */
-void svc_xprt_enqueue(struct svc_xprt *xprt)
+static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
{
struct svc_pool *pool;
struct svc_rqst *rqstp;
@@ -349,20 +346,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
if (!svc_xprt_has_something_to_do(xprt))
return;
- cpu = get_cpu();
- pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
- put_cpu();
-
- spin_lock_bh(&pool->sp_lock);
-
- if (!list_empty(&pool->sp_threads) &&
- !list_empty(&pool->sp_sockets))
- printk(KERN_ERR
- "svc_xprt_enqueue: "
- "threads and transports both waiting??\n");
-
- pool->sp_stats.packets++;
-
/* Mark transport as busy. It will remain in this state until
* the provider calls svc_xprt_received. We update XPT_BUSY
* atomically because it also guards against trying to enqueue
@@ -371,9 +354,15 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) {
/* Don't enqueue transport while already enqueued */
dprintk("svc: transport %p busy, not enqueued\n", xprt);
- goto out_unlock;
+ return;
}
+ cpu = get_cpu();
+ pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
+ spin_lock_bh(&pool->sp_lock);
+
+ pool->sp_stats.packets++;
+
if (!list_empty(&pool->sp_threads)) {
rqstp = list_entry(pool->sp_threads.next,
struct svc_rqst,
@@ -385,18 +374,35 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
printk(KERN_ERR
"svc_xprt_enqueue: server %p, rq_xprt=%p!\n",
rqstp, rqstp->rq_xprt);
- rqstp->rq_xprt = xprt;
+ /* Note the order of the following 3 lines:
+ * We want to assign xprt to rqstp->rq_xprt only _after_
+ * we've woken up the process, so that we don't race with
+ * the lockless check in svc_get_next_xprt().
+ */
svc_xprt_get(xprt);
+ wake_up_process(rqstp->rq_task);
+ rqstp->rq_xprt = xprt;
pool->sp_stats.threads_woken++;
- wake_up(&rqstp->rq_wait);
} else {
dprintk("svc: transport %p put into queue\n", xprt);
list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
pool->sp_stats.sockets_queued++;
}
-out_unlock:
spin_unlock_bh(&pool->sp_lock);
+ put_cpu();
+}
+
+/*
+ * Queue up a transport with data pending. If there are idle nfsd
+ * processes, wake 'em up.
+ *
+ */
+void svc_xprt_enqueue(struct svc_xprt *xprt)
+{
+ if (test_bit(XPT_BUSY, &xprt->xpt_flags))
+ return;
+ svc_xprt_do_enqueue(xprt);
}
EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
@@ -439,6 +445,8 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved);
rqstp->rq_reserved = space;
+ if (xprt->xpt_ops->xpo_adjust_wspace)
+ xprt->xpt_ops->xpo_adjust_wspace(xprt);
svc_xprt_enqueue(xprt);
}
}
@@ -498,7 +506,7 @@ void svc_wake_up(struct svc_serv *serv)
svc_thread_dequeue(pool, rqstp);
rqstp->rq_xprt = NULL;
*/
- wake_up(&rqstp->rq_wait);
+ wake_up_process(rqstp->rq_task);
} else
pool->sp_task_pending = 1;
spin_unlock_bh(&pool->sp_lock);
@@ -617,8 +625,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
{
struct svc_xprt *xprt;
struct svc_pool *pool = rqstp->rq_pool;
- DECLARE_WAITQUEUE(wait, current);
- long time_left;
+ long time_left = 0;
/* Normally we will wait up to 5 seconds for any required
* cache information to be provided.
@@ -640,40 +647,32 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
} else {
if (pool->sp_task_pending) {
pool->sp_task_pending = 0;
- spin_unlock_bh(&pool->sp_lock);
- return ERR_PTR(-EAGAIN);
+ xprt = ERR_PTR(-EAGAIN);
+ goto out;
}
- /* No data pending. Go to sleep */
- svc_thread_enqueue(pool, rqstp);
-
/*
* We have to be able to interrupt this wait
* to bring down the daemons ...
*/
set_current_state(TASK_INTERRUPTIBLE);
- /*
- * checking kthread_should_stop() here allows us to avoid
- * locking and signalling when stopping kthreads that call
- * svc_recv. If the thread has already been woken up, then
- * we can exit here without sleeping. If not, then it
- * it'll be woken up quickly during the schedule_timeout
- */
- if (kthread_should_stop()) {
- set_current_state(TASK_RUNNING);
- spin_unlock_bh(&pool->sp_lock);
- return ERR_PTR(-EINTR);
- }
-
- add_wait_queue(&rqstp->rq_wait, &wait);
+ /* No data pending. Go to sleep */
+ svc_thread_enqueue(pool, rqstp);
spin_unlock_bh(&pool->sp_lock);
- time_left = schedule_timeout(timeout);
+ if (!(signalled() || kthread_should_stop())) {
+ time_left = schedule_timeout(timeout);
+ __set_current_state(TASK_RUNNING);
+
+ try_to_freeze();
- try_to_freeze();
+ xprt = rqstp->rq_xprt;
+ if (xprt != NULL)
+ return xprt;
+ } else
+ __set_current_state(TASK_RUNNING);
spin_lock_bh(&pool->sp_lock);
- remove_wait_queue(&rqstp->rq_wait, &wait);
if (!time_left)
pool->sp_stats.threads_timedout++;
@@ -688,6 +687,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
return ERR_PTR(-EAGAIN);
}
}
+out:
spin_unlock_bh(&pool->sp_lock);
return xprt;
}
@@ -733,7 +733,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
svc_add_new_temp_xprt(serv, newxpt);
else
module_put(xprt->xpt_class->xcl_owner);
- } else if (xprt->xpt_ops->xpo_has_wspace(xprt)) {
+ } else {
/* XPT_DATA|XPT_DEFERRED case: */
dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
rqstp, rqstp->rq_pool->sp_id, xprt,
@@ -770,10 +770,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
printk(KERN_ERR
"svc_recv: service %p, transport not NULL!\n",
rqstp);
- if (waitqueue_active(&rqstp->rq_wait))
- printk(KERN_ERR
- "svc_recv: service %p, wait queue active!\n",
- rqstp);
err = svc_alloc_arg(rqstp);
if (err)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index b507cd327d9b..3f959c681885 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -312,19 +312,6 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
}
/*
- * Check input queue length
- */
-static int svc_recv_available(struct svc_sock *svsk)
-{
- struct socket *sock = svsk->sk_sock;
- int avail, err;
-
- err = kernel_sock_ioctl(sock, TIOCINQ, (unsigned long) &avail);
-
- return (err >= 0)? avail : err;
-}
-
-/*
* Generic recvfrom routine.
*/
static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
@@ -339,8 +326,14 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
rqstp->rq_xprt_hlen = 0;
+ clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen,
msg.msg_flags);
+ /* If we read a full record, then assume there may be more
+ * data to read (stream based sockets only!)
+ */
+ if (len == buflen)
+ set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
svsk, iov[0].iov_base, iov[0].iov_len, len);
@@ -446,15 +439,43 @@ static void svc_write_space(struct sock *sk)
}
}
+static int svc_tcp_has_wspace(struct svc_xprt *xprt)
+{
+ struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+ struct svc_serv *serv = svsk->sk_xprt.xpt_server;
+ int required;
+
+ if (test_bit(XPT_LISTENER, &xprt->xpt_flags))
+ return 1;
+ required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg;
+ if (sk_stream_wspace(svsk->sk_sk) >= required ||
+ (sk_stream_min_wspace(svsk->sk_sk) == 0 &&
+ atomic_read(&xprt->xpt_reserved) == 0))
+ return 1;
+ set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
+ return 0;
+}
+
static void svc_tcp_write_space(struct sock *sk)
{
+ struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
struct socket *sock = sk->sk_socket;
- if (sk_stream_is_writeable(sk) && sock)
+ if (!sk_stream_is_writeable(sk) || !sock)
+ return;
+ if (!svsk || svc_tcp_has_wspace(&svsk->sk_xprt))
clear_bit(SOCK_NOSPACE, &sock->flags);
svc_write_space(sk);
}
+static void svc_tcp_adjust_wspace(struct svc_xprt *xprt)
+{
+ struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+
+ if (svc_tcp_has_wspace(xprt))
+ clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
+}
+
/*
* See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo
*/
@@ -692,6 +713,7 @@ static struct svc_xprt_class svc_udp_class = {
.xcl_owner = THIS_MODULE,
.xcl_ops = &svc_udp_ops,
.xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP,
+ .xcl_ident = XPRT_TRANSPORT_UDP,
};
static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
@@ -951,8 +973,6 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
unsigned int want;
int len;
- clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
-
if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) {
struct kvec iov;
@@ -1007,7 +1027,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
"%s: Got unrecognized reply: "
"calldir 0x%x xpt_bc_xprt %p xid %08x\n",
__func__, ntohl(calldir),
- bc_xprt, xid);
+ bc_xprt, ntohl(xid));
return -EAGAIN;
}
@@ -1044,8 +1064,6 @@ static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len)
static void svc_tcp_fragment_received(struct svc_sock *svsk)
{
/* If we have more data, signal svc_xprt_enqueue() to try again */
- if (svc_recv_available(svsk) > sizeof(rpc_fraghdr))
- set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
dprintk("svc: TCP %s record (%d bytes)\n",
svc_sock_final_rec(svsk) ? "final" : "nonfinal",
svc_sock_reclen(svsk));
@@ -1197,23 +1215,6 @@ static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
svc_putnl(resv, 0);
}
-static int svc_tcp_has_wspace(struct svc_xprt *xprt)
-{
- struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
- struct svc_serv *serv = svsk->sk_xprt.xpt_server;
- int required;
-
- if (test_bit(XPT_LISTENER, &xprt->xpt_flags))
- return 1;
- required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg;
- if (sk_stream_wspace(svsk->sk_sk) >= required ||
- (sk_stream_min_wspace(svsk->sk_sk) == 0 &&
- atomic_read(&xprt->xpt_reserved) == 0))
- return 1;
- set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
- return 0;
-}
-
static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
struct net *net,
struct sockaddr *sa, int salen,
@@ -1285,6 +1286,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
.xpo_has_wspace = svc_tcp_has_wspace,
.xpo_accept = svc_tcp_accept,
.xpo_secure_port = svc_sock_secure_port,
+ .xpo_adjust_wspace = svc_tcp_adjust_wspace,
};
static struct svc_xprt_class svc_tcp_class = {
@@ -1292,6 +1294,7 @@ static struct svc_xprt_class svc_tcp_class = {
.xcl_owner = THIS_MODULE,
.xcl_ops = &svc_tcp_ops,
.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
+ .xcl_ident = XPRT_TRANSPORT_TCP,
};
void svc_init_xprt_sock(void)
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 23fb4e75e245..290af97bf6f9 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -509,7 +509,8 @@ void xdr_commit_encode(struct xdr_stream *xdr)
}
EXPORT_SYMBOL_GPL(xdr_commit_encode);
-__be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, size_t nbytes)
+static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
+ size_t nbytes)
{
static __be32 *p;
int space_left;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index c3b2b3369e52..56e4e150e80e 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -744,6 +744,7 @@ static void xprt_connect_status(struct rpc_task *task)
case -ECONNABORTED:
case -ENETUNREACH:
case -EHOSTUNREACH:
+ case -EPIPE:
case -EAGAIN:
dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid);
break;
@@ -1306,7 +1307,7 @@ struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
}
}
spin_unlock(&xprt_list_lock);
- printk(KERN_ERR "RPC: transport (%d) not supported\n", args->ident);
+ dprintk("RPC: transport (%d) not supported\n", args->ident);
return ERR_PTR(-EIO);
found:
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 693966d3f33b..6166c985fe24 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -53,14 +53,6 @@
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif
-enum rpcrdma_chunktype {
- rpcrdma_noch = 0,
- rpcrdma_readch,
- rpcrdma_areadch,
- rpcrdma_writech,
- rpcrdma_replych
-};
-
#ifdef RPC_DEBUG
static const char transfertypes[][12] = {
"pure inline", /* no chunks */
@@ -279,13 +271,37 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
return (unsigned char *)iptr - (unsigned char *)headerp;
out:
- for (pos = 0; nchunks--;)
- pos += rpcrdma_deregister_external(
- &req->rl_segments[pos], r_xprt);
+ if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR) {
+ for (pos = 0; nchunks--;)
+ pos += rpcrdma_deregister_external(
+ &req->rl_segments[pos], r_xprt);
+ }
return n;
}
/*
+ * Marshal chunks. This routine returns the header length
+ * consumed by marshaling.
+ *
+ * Returns positive RPC/RDMA header size, or negative errno.
+ */
+
+ssize_t
+rpcrdma_marshal_chunks(struct rpc_rqst *rqst, ssize_t result)
+{
+ struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+ struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)req->rl_base;
+
+ if (req->rl_rtype != rpcrdma_noch)
+ result = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf,
+ headerp, req->rl_rtype);
+ else if (req->rl_wtype != rpcrdma_noch)
+ result = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf,
+ headerp, req->rl_wtype);
+ return result;
+}
+
+/*
* Copy write data inline.
* This function is used for "small" requests. Data which is passed
* to RPC via iovecs (or page list) is copied directly into the
@@ -377,7 +393,6 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
char *base;
size_t rpclen, padlen;
ssize_t hdrlen;
- enum rpcrdma_chunktype rtype, wtype;
struct rpcrdma_msg *headerp;
/*
@@ -415,13 +430,13 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
* into pages; otherwise use reply chunks.
*/
if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst))
- wtype = rpcrdma_noch;
+ req->rl_wtype = rpcrdma_noch;
else if (rqst->rq_rcv_buf.page_len == 0)
- wtype = rpcrdma_replych;
+ req->rl_wtype = rpcrdma_replych;
else if (rqst->rq_rcv_buf.flags & XDRBUF_READ)
- wtype = rpcrdma_writech;
+ req->rl_wtype = rpcrdma_writech;
else
- wtype = rpcrdma_replych;
+ req->rl_wtype = rpcrdma_replych;
/*
* Chunks needed for arguments?
@@ -438,16 +453,16 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
* TBD check NFSv4 setacl
*/
if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst))
- rtype = rpcrdma_noch;
+ req->rl_rtype = rpcrdma_noch;
else if (rqst->rq_snd_buf.page_len == 0)
- rtype = rpcrdma_areadch;
+ req->rl_rtype = rpcrdma_areadch;
else
- rtype = rpcrdma_readch;
+ req->rl_rtype = rpcrdma_readch;
/* The following simplification is not true forever */
- if (rtype != rpcrdma_noch && wtype == rpcrdma_replych)
- wtype = rpcrdma_noch;
- if (rtype != rpcrdma_noch && wtype != rpcrdma_noch) {
+ if (req->rl_rtype != rpcrdma_noch && req->rl_wtype == rpcrdma_replych)
+ req->rl_wtype = rpcrdma_noch;
+ if (req->rl_rtype != rpcrdma_noch && req->rl_wtype != rpcrdma_noch) {
dprintk("RPC: %s: cannot marshal multiple chunk lists\n",
__func__);
return -EIO;
@@ -461,7 +476,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
* When padding is in use and applies to the transfer, insert
* it and change the message type.
*/
- if (rtype == rpcrdma_noch) {
+ if (req->rl_rtype == rpcrdma_noch) {
padlen = rpcrdma_inline_pullup(rqst,
RPCRDMA_INLINE_PAD_VALUE(rqst));
@@ -476,7 +491,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero;
headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero;
hdrlen += 2 * sizeof(u32); /* extra words in padhdr */
- if (wtype != rpcrdma_noch) {
+ if (req->rl_wtype != rpcrdma_noch) {
dprintk("RPC: %s: invalid chunk list\n",
__func__);
return -EIO;
@@ -497,30 +512,18 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
* on receive. Therefore, we request a reply chunk
* for non-writes wherever feasible and efficient.
*/
- if (wtype == rpcrdma_noch)
- wtype = rpcrdma_replych;
+ if (req->rl_wtype == rpcrdma_noch)
+ req->rl_wtype = rpcrdma_replych;
}
}
- /*
- * Marshal chunks. This routine will return the header length
- * consumed by marshaling.
- */
- if (rtype != rpcrdma_noch) {
- hdrlen = rpcrdma_create_chunks(rqst,
- &rqst->rq_snd_buf, headerp, rtype);
- wtype = rtype; /* simplify dprintk */
-
- } else if (wtype != rpcrdma_noch) {
- hdrlen = rpcrdma_create_chunks(rqst,
- &rqst->rq_rcv_buf, headerp, wtype);
- }
+ hdrlen = rpcrdma_marshal_chunks(rqst, hdrlen);
if (hdrlen < 0)
return hdrlen;
dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd"
" headerp 0x%p base 0x%p lkey 0x%x\n",
- __func__, transfertypes[wtype], hdrlen, rpclen, padlen,
+ __func__, transfertypes[req->rl_wtype], hdrlen, rpclen, padlen,
headerp, base, req->rl_iov.lkey);
/*
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 8f92a61ee2df..e0110270d650 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -43,6 +43,7 @@
#include <linux/sunrpc/debug.h>
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/spinlock.h>
+#include <linux/highmem.h>
#include <asm/unaligned.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
@@ -435,6 +436,32 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
return ret;
}
+/*
+ * To avoid a separate RDMA READ just for a handful of zero bytes,
+ * RFC 5666 section 3.7 allows the client to omit the XDR zero pad
+ * in chunk lists.
+ */
+static void
+rdma_fix_xdr_pad(struct xdr_buf *buf)
+{
+ unsigned int page_len = buf->page_len;
+ unsigned int size = (XDR_QUADLEN(page_len) << 2) - page_len;
+ unsigned int offset, pg_no;
+ char *p;
+
+ if (size == 0)
+ return;
+
+ pg_no = page_len >> PAGE_SHIFT;
+ offset = page_len & ~PAGE_MASK;
+ p = page_address(buf->pages[pg_no]);
+ memset(p + offset, 0, size);
+
+ buf->page_len += size;
+ buf->buflen += size;
+ buf->len += size;
+}
+
static int rdma_read_complete(struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *head)
{
@@ -449,6 +476,7 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
rqstp->rq_pages[page_no] = head->pages[page_no];
}
/* Point rq_arg.pages past header */
+ rdma_fix_xdr_pad(&head->arg);
rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count];
rqstp->rq_arg.page_len = head->arg.page_len;
rqstp->rq_arg.page_base = head->arg.page_base;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 49fd21a5c215..9f1b50689c0f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -192,6 +192,8 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
xdr_sge_no++;
BUG_ON(xdr_sge_no > vec->count);
bc -= sge_bytes;
+ if (sge_no == xprt->sc_max_sge)
+ break;
}
/* Prepare WRITE WR */
@@ -209,7 +211,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
atomic_inc(&rdma_stat_write);
if (svc_rdma_send(xprt, &write_wr))
goto err;
- return 0;
+ return write_len - bc;
err:
svc_rdma_unmap_dma(ctxt);
svc_rdma_put_context(ctxt, 0);
@@ -225,7 +227,6 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
{
u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len;
int write_len;
- int max_write;
u32 xdr_off;
int chunk_off;
int chunk_no;
@@ -239,8 +240,6 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
res_ary = (struct rpcrdma_write_array *)
&rdma_resp->rm_body.rm_chunks[1];
- max_write = xprt->sc_max_sge * PAGE_SIZE;
-
/* Write chunks start at the pagelist */
for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
xfer_len && chunk_no < arg_ary->wc_nchunks;
@@ -260,23 +259,21 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
write_len);
chunk_off = 0;
while (write_len) {
- int this_write;
- this_write = min(write_len, max_write);
ret = send_write(xprt, rqstp,
ntohl(arg_ch->rs_handle),
rs_offset + chunk_off,
xdr_off,
- this_write,
+ write_len,
vec);
- if (ret) {
+ if (ret <= 0) {
dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
ret);
return -EIO;
}
- chunk_off += this_write;
- xdr_off += this_write;
- xfer_len -= this_write;
- write_len -= this_write;
+ chunk_off += ret;
+ xdr_off += ret;
+ xfer_len -= ret;
+ write_len -= ret;
}
}
/* Update the req with the number of chunks actually used */
@@ -293,7 +290,6 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
{
u32 xfer_len = rqstp->rq_res.len;
int write_len;
- int max_write;
u32 xdr_off;
int chunk_no;
int chunk_off;
@@ -311,8 +307,6 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
res_ary = (struct rpcrdma_write_array *)
&rdma_resp->rm_body.rm_chunks[2];
- max_write = xprt->sc_max_sge * PAGE_SIZE;
-
/* xdr offset starts at RPC message */
nchunks = ntohl(arg_ary->wc_nchunks);
for (xdr_off = 0, chunk_no = 0;
@@ -330,24 +324,21 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
write_len);
chunk_off = 0;
while (write_len) {
- int this_write;
-
- this_write = min(write_len, max_write);
ret = send_write(xprt, rqstp,
ntohl(ch->rs_handle),
rs_offset + chunk_off,
xdr_off,
- this_write,
+ write_len,
vec);
- if (ret) {
+ if (ret <= 0) {
dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
ret);
return -EIO;
}
- chunk_off += this_write;
- xdr_off += this_write;
- xfer_len -= this_write;
- write_len -= this_write;
+ chunk_off += ret;
+ xdr_off += ret;
+ xfer_len -= ret;
+ write_len -= ret;
}
}
/* Update the req with the number of chunks actually used */
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index e7323fbbd348..4e618808bc98 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -91,7 +91,8 @@ struct svc_xprt_class svc_rdma_class = {
.xcl_name = "rdma",
.xcl_owner = THIS_MODULE,
.xcl_ops = &svc_rdma_ops,
- .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
+ .xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA,
+ .xcl_ident = XPRT_TRANSPORT_RDMA,
};
struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
@@ -942,23 +943,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr);
if (ret) {
- /*
- * XXX: This is a hack. We need a xx_request_qp interface
- * that will adjust the qp_attr's with a best-effort
- * number
- */
- qp_attr.cap.max_send_sge -= 2;
- qp_attr.cap.max_recv_sge -= 2;
- ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd,
- &qp_attr);
- if (ret) {
- dprintk("svcrdma: failed to create QP, ret=%d\n", ret);
- goto errout;
- }
- newxprt->sc_max_sge = qp_attr.cap.max_send_sge;
- newxprt->sc_max_sge = qp_attr.cap.max_recv_sge;
- newxprt->sc_sq_depth = qp_attr.cap.max_send_wr;
- newxprt->sc_max_requests = qp_attr.cap.max_recv_wr;
+ dprintk("svcrdma: failed to create QP, ret=%d\n", ret);
+ goto errout;
}
newxprt->sc_qp = newxprt->sc_cm_id->qp;
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 66f91f0d071a..6a4615dd0261 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -205,7 +205,6 @@ xprt_rdma_connect_worker(struct work_struct *work)
struct rpc_xprt *xprt = &r_xprt->xprt;
int rc = 0;
- current->flags |= PF_FSTRANS;
xprt_clear_connected(xprt);
dprintk("RPC: %s: %sconnect\n", __func__,
@@ -216,7 +215,6 @@ xprt_rdma_connect_worker(struct work_struct *work)
dprintk("RPC: %s: exit\n", __func__);
xprt_clear_connecting(xprt);
- current->flags &= ~PF_FSTRANS;
}
/*
@@ -296,7 +294,6 @@ xprt_setup_rdma(struct xprt_create *args)
xprt->resvport = 0; /* privileged port not needed */
xprt->tsh_size = 0; /* RPC-RDMA handles framing */
- xprt->max_payload = RPCRDMA_MAX_DATA_SEGS * PAGE_SIZE;
xprt->ops = &xprt_rdma_procs;
/*
@@ -382,6 +379,9 @@ xprt_setup_rdma(struct xprt_create *args)
new_ep->rep_xprt = xprt;
xprt_rdma_format_addresses(xprt);
+ xprt->max_payload = rpcrdma_max_payload(new_xprt);
+ dprintk("RPC: %s: transport data payload maximum: %zu bytes\n",
+ __func__, xprt->max_payload);
if (!try_module_get(THIS_MODULE))
goto out4;
@@ -412,7 +412,7 @@ xprt_rdma_close(struct rpc_xprt *xprt)
if (r_xprt->rx_ep.rep_connected > 0)
xprt->reestablish_timeout = 0;
xprt_disconnect_done(xprt);
- (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
+ rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
}
static void
@@ -595,13 +595,14 @@ xprt_rdma_send_request(struct rpc_task *task)
struct rpc_xprt *xprt = rqst->rq_xprt;
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- int rc;
+ int rc = 0;
- if (req->rl_niovs == 0) {
+ if (req->rl_niovs == 0)
rc = rpcrdma_marshal_req(rqst);
- if (rc < 0)
- goto failed_marshal;
- }
+ else if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
+ rc = rpcrdma_marshal_chunks(rqst, 0);
+ if (rc < 0)
+ goto failed_marshal;
if (req->rl_reply == NULL) /* e.g. reconnection */
rpcrdma_recv_buffer_get(req);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 13dbd1c389ff..61c41298b4ea 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -61,6 +61,8 @@
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif
+static void rpcrdma_reset_frmrs(struct rpcrdma_ia *);
+
/*
* internal functions
*/
@@ -103,17 +105,6 @@ rpcrdma_run_tasklet(unsigned long data)
static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
-static inline void
-rpcrdma_schedule_tasklet(struct rpcrdma_rep *rep)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
- list_add_tail(&rep->rr_list, &rpcrdma_tasklets_g);
- spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
- tasklet_schedule(&rpcrdma_tasklet_g);
-}
-
static void
rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
{
@@ -153,12 +144,7 @@ rpcrdma_sendcq_process_wc(struct ib_wc *wc)
if (wc->wr_id == 0ULL)
return;
if (wc->status != IB_WC_SUCCESS)
- return;
-
- if (wc->opcode == IB_WC_FAST_REG_MR)
- frmr->r.frmr.state = FRMR_IS_VALID;
- else if (wc->opcode == IB_WC_LOCAL_INV)
- frmr->r.frmr.state = FRMR_IS_INVALID;
+ frmr->r.frmr.fr_state = FRMR_IS_STALE;
}
static int
@@ -217,7 +203,7 @@ rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
}
static void
-rpcrdma_recvcq_process_wc(struct ib_wc *wc)
+rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
{
struct rpcrdma_rep *rep =
(struct rpcrdma_rep *)(unsigned long)wc->wr_id;
@@ -248,28 +234,38 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc)
}
out_schedule:
- rpcrdma_schedule_tasklet(rep);
+ list_add_tail(&rep->rr_list, sched_list);
}
static int
rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
{
+ struct list_head sched_list;
struct ib_wc *wcs;
int budget, count, rc;
+ unsigned long flags;
+ INIT_LIST_HEAD(&sched_list);
budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
do {
wcs = ep->rep_recv_wcs;
rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
if (rc <= 0)
- return rc;
+ goto out_schedule;
count = rc;
while (count-- > 0)
- rpcrdma_recvcq_process_wc(wcs++);
+ rpcrdma_recvcq_process_wc(wcs++, &sched_list);
} while (rc == RPCRDMA_POLLSIZE && --budget);
- return 0;
+ rc = 0;
+
+out_schedule:
+ spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
+ list_splice_tail(&sched_list, &rpcrdma_tasklets_g);
+ spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
+ tasklet_schedule(&rpcrdma_tasklet_g);
+ return rc;
}
/*
@@ -310,6 +306,13 @@ rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
rpcrdma_recvcq_poll(cq, ep);
}
+static void
+rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
+{
+ rpcrdma_recvcq_upcall(ep->rep_attr.recv_cq, ep);
+ rpcrdma_sendcq_upcall(ep->rep_attr.send_cq, ep);
+}
+
#ifdef RPC_DEBUG
static const char * const conn[] = {
"address resolved",
@@ -323,8 +326,16 @@ static const char * const conn[] = {
"rejected",
"established",
"disconnected",
- "device removal"
+ "device removal",
+ "multicast join",
+ "multicast error",
+ "address change",
+ "timewait exit",
};
+
+#define CONNECTION_MSG(status) \
+ ((status) < ARRAY_SIZE(conn) ? \
+ conn[(status)] : "unrecognized connection error")
#endif
static int
@@ -382,23 +393,18 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
case RDMA_CM_EVENT_DEVICE_REMOVAL:
connstate = -ENODEV;
connected:
- dprintk("RPC: %s: %s: %pI4:%u (ep 0x%p event 0x%x)\n",
- __func__,
- (event->event <= 11) ? conn[event->event] :
- "unknown connection error",
- &addr->sin_addr.s_addr,
- ntohs(addr->sin_port),
- ep, event->event);
atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1);
dprintk("RPC: %s: %sconnected\n",
__func__, connstate > 0 ? "" : "dis");
ep->rep_connected = connstate;
ep->rep_func(ep);
wake_up_all(&ep->rep_connect_wait);
- break;
+ /*FALLTHROUGH*/
default:
- dprintk("RPC: %s: unexpected CM event %d\n",
- __func__, event->event);
+ dprintk("RPC: %s: %pI4:%u (ep 0x%p): %s\n",
+ __func__, &addr->sin_addr.s_addr,
+ ntohs(addr->sin_port), ep,
+ CONNECTION_MSG(event->event));
break;
}
@@ -558,12 +564,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
if (!ia->ri_id->device->alloc_fmr) {
dprintk("RPC: %s: MTHCAFMR registration "
"not supported by HCA\n", __func__);
-#if RPCRDMA_PERSISTENT_REGISTRATION
memreg = RPCRDMA_ALLPHYSICAL;
-#else
- rc = -ENOMEM;
- goto out2;
-#endif
}
}
@@ -578,20 +579,16 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
switch (memreg) {
case RPCRDMA_FRMR:
break;
-#if RPCRDMA_PERSISTENT_REGISTRATION
case RPCRDMA_ALLPHYSICAL:
mem_priv = IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_READ;
goto register_setup;
-#endif
case RPCRDMA_MTHCAFMR:
if (ia->ri_have_dma_lkey)
break;
mem_priv = IB_ACCESS_LOCAL_WRITE;
-#if RPCRDMA_PERSISTENT_REGISTRATION
register_setup:
-#endif
ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
if (IS_ERR(ia->ri_bind_mem)) {
printk(KERN_ALERT "%s: ib_get_dma_mr for "
@@ -613,6 +610,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
/* Else will do memory reg/dereg for each chunk */
ia->ri_memreg_strategy = memreg;
+ rwlock_init(&ia->ri_qplock);
return 0;
out2:
rdma_destroy_id(ia->ri_id);
@@ -826,10 +824,7 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
cancel_delayed_work_sync(&ep->rep_connect_worker);
if (ia->ri_id->qp) {
- rc = rpcrdma_ep_disconnect(ep, ia);
- if (rc)
- dprintk("RPC: %s: rpcrdma_ep_disconnect"
- " returned %i\n", __func__, rc);
+ rpcrdma_ep_disconnect(ep, ia);
rdma_destroy_qp(ia->ri_id);
ia->ri_id->qp = NULL;
}
@@ -859,7 +854,7 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
int
rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
- struct rdma_cm_id *id;
+ struct rdma_cm_id *id, *old;
int rc = 0;
int retry_count = 0;
@@ -867,13 +862,12 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
struct rpcrdma_xprt *xprt;
retry:
dprintk("RPC: %s: reconnecting...\n", __func__);
- rc = rpcrdma_ep_disconnect(ep, ia);
- if (rc && rc != -ENOTCONN)
- dprintk("RPC: %s: rpcrdma_ep_disconnect"
- " status %i\n", __func__, rc);
- rpcrdma_clean_cq(ep->rep_attr.recv_cq);
- rpcrdma_clean_cq(ep->rep_attr.send_cq);
+ rpcrdma_ep_disconnect(ep, ia);
+ rpcrdma_flush_cqs(ep);
+
+ if (ia->ri_memreg_strategy == RPCRDMA_FRMR)
+ rpcrdma_reset_frmrs(ia);
xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
id = rpcrdma_create_id(xprt, ia,
@@ -905,9 +899,14 @@ retry:
rc = -ENETUNREACH;
goto out;
}
- rdma_destroy_qp(ia->ri_id);
- rdma_destroy_id(ia->ri_id);
+
+ write_lock(&ia->ri_qplock);
+ old = ia->ri_id;
ia->ri_id = id;
+ write_unlock(&ia->ri_qplock);
+
+ rdma_destroy_qp(old);
+ rdma_destroy_id(old);
} else {
dprintk("RPC: %s: connecting...\n", __func__);
rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
@@ -974,13 +973,12 @@ out:
* This call is not reentrant, and must not be made in parallel
* on the same endpoint.
*/
-int
+void
rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
int rc;
- rpcrdma_clean_cq(ep->rep_attr.recv_cq);
- rpcrdma_clean_cq(ep->rep_attr.send_cq);
+ rpcrdma_flush_cqs(ep);
rc = rdma_disconnect(ia->ri_id);
if (!rc) {
/* returns without wait if not connected */
@@ -992,12 +990,93 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
ep->rep_connected = rc;
}
+}
+
+static int
+rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
+{
+ int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ;
+ struct ib_fmr_attr fmr_attr = {
+ .max_pages = RPCRDMA_MAX_DATA_SEGS,
+ .max_maps = 1,
+ .page_shift = PAGE_SHIFT
+ };
+ struct rpcrdma_mw *r;
+ int i, rc;
+
+ i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
+ dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i);
+
+ while (i--) {
+ r = kzalloc(sizeof(*r), GFP_KERNEL);
+ if (r == NULL)
+ return -ENOMEM;
+
+ r->r.fmr = ib_alloc_fmr(ia->ri_pd, mr_access_flags, &fmr_attr);
+ if (IS_ERR(r->r.fmr)) {
+ rc = PTR_ERR(r->r.fmr);
+ dprintk("RPC: %s: ib_alloc_fmr failed %i\n",
+ __func__, rc);
+ goto out_free;
+ }
+
+ list_add(&r->mw_list, &buf->rb_mws);
+ list_add(&r->mw_all, &buf->rb_all);
+ }
+ return 0;
+
+out_free:
+ kfree(r);
+ return rc;
+}
+
+static int
+rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
+{
+ struct rpcrdma_frmr *f;
+ struct rpcrdma_mw *r;
+ int i, rc;
+
+ i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
+ dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i);
+
+ while (i--) {
+ r = kzalloc(sizeof(*r), GFP_KERNEL);
+ if (r == NULL)
+ return -ENOMEM;
+ f = &r->r.frmr;
+
+ f->fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
+ ia->ri_max_frmr_depth);
+ if (IS_ERR(f->fr_mr)) {
+ rc = PTR_ERR(f->fr_mr);
+ dprintk("RPC: %s: ib_alloc_fast_reg_mr "
+ "failed %i\n", __func__, rc);
+ goto out_free;
+ }
+
+ f->fr_pgl = ib_alloc_fast_reg_page_list(ia->ri_id->device,
+ ia->ri_max_frmr_depth);
+ if (IS_ERR(f->fr_pgl)) {
+ rc = PTR_ERR(f->fr_pgl);
+ dprintk("RPC: %s: ib_alloc_fast_reg_page_list "
+ "failed %i\n", __func__, rc);
+
+ ib_dereg_mr(f->fr_mr);
+ goto out_free;
+ }
+
+ list_add(&r->mw_list, &buf->rb_mws);
+ list_add(&r->mw_all, &buf->rb_all);
+ }
+
+ return 0;
+
+out_free:
+ kfree(r);
return rc;
}
-/*
- * Initialize buffer memory
- */
int
rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
@@ -1005,7 +1084,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
char *p;
size_t len, rlen, wlen;
int i, rc;
- struct rpcrdma_mw *r;
buf->rb_max_requests = cdata->max_requests;
spin_lock_init(&buf->rb_lock);
@@ -1016,28 +1094,12 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
* 2. arrays of struct rpcrdma_req to fill in pointers
* 3. array of struct rpcrdma_rep for replies
* 4. padding, if any
- * 5. mw's, fmr's or frmr's, if any
* Send/recv buffers in req/rep need to be registered
*/
-
len = buf->rb_max_requests *
(sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
len += cdata->padding;
- switch (ia->ri_memreg_strategy) {
- case RPCRDMA_FRMR:
- len += buf->rb_max_requests * RPCRDMA_MAX_SEGS *
- sizeof(struct rpcrdma_mw);
- break;
- case RPCRDMA_MTHCAFMR:
- /* TBD we are perhaps overallocating here */
- len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
- sizeof(struct rpcrdma_mw);
- break;
- default:
- break;
- }
- /* allocate 1, 4 and 5 in one shot */
p = kzalloc(len, GFP_KERNEL);
if (p == NULL) {
dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
@@ -1064,51 +1126,17 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
p += cdata->padding;
INIT_LIST_HEAD(&buf->rb_mws);
- r = (struct rpcrdma_mw *)p;
+ INIT_LIST_HEAD(&buf->rb_all);
switch (ia->ri_memreg_strategy) {
case RPCRDMA_FRMR:
- for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
- r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
- ia->ri_max_frmr_depth);
- if (IS_ERR(r->r.frmr.fr_mr)) {
- rc = PTR_ERR(r->r.frmr.fr_mr);
- dprintk("RPC: %s: ib_alloc_fast_reg_mr"
- " failed %i\n", __func__, rc);
- goto out;
- }
- r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
- ia->ri_id->device,
- ia->ri_max_frmr_depth);
- if (IS_ERR(r->r.frmr.fr_pgl)) {
- rc = PTR_ERR(r->r.frmr.fr_pgl);
- dprintk("RPC: %s: "
- "ib_alloc_fast_reg_page_list "
- "failed %i\n", __func__, rc);
-
- ib_dereg_mr(r->r.frmr.fr_mr);
- goto out;
- }
- list_add(&r->mw_list, &buf->rb_mws);
- ++r;
- }
+ rc = rpcrdma_init_frmrs(ia, buf);
+ if (rc)
+ goto out;
break;
case RPCRDMA_MTHCAFMR:
- /* TBD we are perhaps overallocating here */
- for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
- static struct ib_fmr_attr fa =
- { RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT };
- r->r.fmr = ib_alloc_fmr(ia->ri_pd,
- IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ,
- &fa);
- if (IS_ERR(r->r.fmr)) {
- rc = PTR_ERR(r->r.fmr);
- dprintk("RPC: %s: ib_alloc_fmr"
- " failed %i\n", __func__, rc);
- goto out;
- }
- list_add(&r->mw_list, &buf->rb_mws);
- ++r;
- }
+ rc = rpcrdma_init_fmrs(ia, buf);
+ if (rc)
+ goto out;
break;
default:
break;
@@ -1176,24 +1204,57 @@ out:
return rc;
}
-/*
- * Unregister and destroy buffer memory. Need to deal with
- * partial initialization, so it's callable from failed create.
- * Must be called before destroying endpoint, as registrations
- * reference it.
- */
+static void
+rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf)
+{
+ struct rpcrdma_mw *r;
+ int rc;
+
+ while (!list_empty(&buf->rb_all)) {
+ r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
+ list_del(&r->mw_all);
+ list_del(&r->mw_list);
+
+ rc = ib_dealloc_fmr(r->r.fmr);
+ if (rc)
+ dprintk("RPC: %s: ib_dealloc_fmr failed %i\n",
+ __func__, rc);
+
+ kfree(r);
+ }
+}
+
+static void
+rpcrdma_destroy_frmrs(struct rpcrdma_buffer *buf)
+{
+ struct rpcrdma_mw *r;
+ int rc;
+
+ while (!list_empty(&buf->rb_all)) {
+ r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
+ list_del(&r->mw_all);
+ list_del(&r->mw_list);
+
+ rc = ib_dereg_mr(r->r.frmr.fr_mr);
+ if (rc)
+ dprintk("RPC: %s: ib_dereg_mr failed %i\n",
+ __func__, rc);
+ ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
+
+ kfree(r);
+ }
+}
+
void
rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
{
- int rc, i;
struct rpcrdma_ia *ia = rdmab_to_ia(buf);
- struct rpcrdma_mw *r;
+ int i;
/* clean up in reverse order from create
* 1. recv mr memory (mr free, then kfree)
* 2. send mr memory (mr free, then kfree)
- * 3. padding (if any) [moved to rpcrdma_ep_destroy]
- * 4. arrays
+ * 3. MWs
*/
dprintk("RPC: %s: entering\n", __func__);
@@ -1212,34 +1273,217 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
}
}
+ switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_FRMR:
+ rpcrdma_destroy_frmrs(buf);
+ break;
+ case RPCRDMA_MTHCAFMR:
+ rpcrdma_destroy_fmrs(buf);
+ break;
+ default:
+ break;
+ }
+
+ kfree(buf->rb_pool);
+}
+
+/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
+ * an unusable state. Find FRMRs in this state and dereg / reg
+ * each. FRMRs that are VALID and attached to an rpcrdma_req are
+ * also torn down.
+ *
+ * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
+ *
+ * This is invoked only in the transport connect worker in order
+ * to serialize with rpcrdma_register_frmr_external().
+ */
+static void
+rpcrdma_reset_frmrs(struct rpcrdma_ia *ia)
+{
+ struct rpcrdma_xprt *r_xprt =
+ container_of(ia, struct rpcrdma_xprt, rx_ia);
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+ struct list_head *pos;
+ struct rpcrdma_mw *r;
+ int rc;
+
+ list_for_each(pos, &buf->rb_all) {
+ r = list_entry(pos, struct rpcrdma_mw, mw_all);
+
+ if (r->r.frmr.fr_state == FRMR_IS_INVALID)
+ continue;
+
+ rc = ib_dereg_mr(r->r.frmr.fr_mr);
+ if (rc)
+ dprintk("RPC: %s: ib_dereg_mr failed %i\n",
+ __func__, rc);
+ ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
+
+ r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
+ ia->ri_max_frmr_depth);
+ if (IS_ERR(r->r.frmr.fr_mr)) {
+ rc = PTR_ERR(r->r.frmr.fr_mr);
+ dprintk("RPC: %s: ib_alloc_fast_reg_mr"
+ " failed %i\n", __func__, rc);
+ continue;
+ }
+ r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
+ ia->ri_id->device,
+ ia->ri_max_frmr_depth);
+ if (IS_ERR(r->r.frmr.fr_pgl)) {
+ rc = PTR_ERR(r->r.frmr.fr_pgl);
+ dprintk("RPC: %s: "
+ "ib_alloc_fast_reg_page_list "
+ "failed %i\n", __func__, rc);
+
+ ib_dereg_mr(r->r.frmr.fr_mr);
+ continue;
+ }
+ r->r.frmr.fr_state = FRMR_IS_INVALID;
+ }
+}
+
+/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
+ * some req segments uninitialized.
+ */
+static void
+rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
+{
+ if (*mw) {
+ list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
+ *mw = NULL;
+ }
+}
+
+/* Cycle mw's back in reverse order, and "spin" them.
+ * This delays and scrambles reuse as much as possible.
+ */
+static void
+rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
+{
+ struct rpcrdma_mr_seg *seg = req->rl_segments;
+ struct rpcrdma_mr_seg *seg1 = seg;
+ int i;
+
+ for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
+ rpcrdma_buffer_put_mr(&seg->mr_chunk.rl_mw, buf);
+ rpcrdma_buffer_put_mr(&seg1->mr_chunk.rl_mw, buf);
+}
+
+static void
+rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
+{
+ buf->rb_send_bufs[--buf->rb_send_index] = req;
+ req->rl_niovs = 0;
+ if (req->rl_reply) {
+ buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
+ req->rl_reply->rr_func = NULL;
+ req->rl_reply = NULL;
+ }
+}
+
+/* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external().
+ * Redo only the ib_post_send().
+ */
+static void
+rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
+{
+ struct rpcrdma_xprt *r_xprt =
+ container_of(ia, struct rpcrdma_xprt, rx_ia);
+ struct ib_send_wr invalidate_wr, *bad_wr;
+ int rc;
+
+ dprintk("RPC: %s: FRMR %p is stale\n", __func__, r);
+
+ /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
+ r->r.frmr.fr_state = FRMR_IS_INVALID;
+
+ memset(&invalidate_wr, 0, sizeof(invalidate_wr));
+ invalidate_wr.wr_id = (unsigned long)(void *)r;
+ invalidate_wr.opcode = IB_WR_LOCAL_INV;
+ invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
+ DECR_CQCOUNT(&r_xprt->rx_ep);
+
+ dprintk("RPC: %s: frmr %p invalidating rkey %08x\n",
+ __func__, r, r->r.frmr.fr_mr->rkey);
+
+ read_lock(&ia->ri_qplock);
+ rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
+ read_unlock(&ia->ri_qplock);
+ if (rc) {
+ /* Force rpcrdma_buffer_get() to retry */
+ r->r.frmr.fr_state = FRMR_IS_STALE;
+ dprintk("RPC: %s: ib_post_send failed, %i\n",
+ __func__, rc);
+ }
+}
+
+static void
+rpcrdma_retry_flushed_linv(struct list_head *stale,
+ struct rpcrdma_buffer *buf)
+{
+ struct rpcrdma_ia *ia = rdmab_to_ia(buf);
+ struct list_head *pos;
+ struct rpcrdma_mw *r;
+ unsigned long flags;
+
+ list_for_each(pos, stale) {
+ r = list_entry(pos, struct rpcrdma_mw, mw_list);
+ rpcrdma_retry_local_inv(r, ia);
+ }
+
+ spin_lock_irqsave(&buf->rb_lock, flags);
+ list_splice_tail(stale, &buf->rb_mws);
+ spin_unlock_irqrestore(&buf->rb_lock, flags);
+}
+
+static struct rpcrdma_req *
+rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
+ struct list_head *stale)
+{
+ struct rpcrdma_mw *r;
+ int i;
+
+ i = RPCRDMA_MAX_SEGS - 1;
while (!list_empty(&buf->rb_mws)) {
r = list_entry(buf->rb_mws.next,
- struct rpcrdma_mw, mw_list);
+ struct rpcrdma_mw, mw_list);
list_del(&r->mw_list);
- switch (ia->ri_memreg_strategy) {
- case RPCRDMA_FRMR:
- rc = ib_dereg_mr(r->r.frmr.fr_mr);
- if (rc)
- dprintk("RPC: %s:"
- " ib_dereg_mr"
- " failed %i\n",
- __func__, rc);
- ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
- break;
- case RPCRDMA_MTHCAFMR:
- rc = ib_dealloc_fmr(r->r.fmr);
- if (rc)
- dprintk("RPC: %s:"
- " ib_dealloc_fmr"
- " failed %i\n",
- __func__, rc);
- break;
- default:
- break;
+ if (r->r.frmr.fr_state == FRMR_IS_STALE) {
+ list_add(&r->mw_list, stale);
+ continue;
}
+ req->rl_segments[i].mr_chunk.rl_mw = r;
+ if (unlikely(i-- == 0))
+ return req; /* Success */
}
- kfree(buf->rb_pool);
+ /* Not enough entries on rb_mws for this req */
+ rpcrdma_buffer_put_sendbuf(req, buf);
+ rpcrdma_buffer_put_mrs(req, buf);
+ return NULL;
+}
+
+static struct rpcrdma_req *
+rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
+{
+ struct rpcrdma_mw *r;
+ int i;
+
+ i = RPCRDMA_MAX_SEGS - 1;
+ while (!list_empty(&buf->rb_mws)) {
+ r = list_entry(buf->rb_mws.next,
+ struct rpcrdma_mw, mw_list);
+ list_del(&r->mw_list);
+ req->rl_segments[i].mr_chunk.rl_mw = r;
+ if (unlikely(i-- == 0))
+ return req; /* Success */
+ }
+
+ /* Not enough entries on rb_mws for this req */
+ rpcrdma_buffer_put_sendbuf(req, buf);
+ rpcrdma_buffer_put_mrs(req, buf);
+ return NULL;
}
/*
@@ -1254,10 +1498,10 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
struct rpcrdma_req *
rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
{
+ struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
+ struct list_head stale;
struct rpcrdma_req *req;
unsigned long flags;
- int i;
- struct rpcrdma_mw *r;
spin_lock_irqsave(&buffers->rb_lock, flags);
if (buffers->rb_send_index == buffers->rb_max_requests) {
@@ -1277,16 +1521,21 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
}
buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
- if (!list_empty(&buffers->rb_mws)) {
- i = RPCRDMA_MAX_SEGS - 1;
- do {
- r = list_entry(buffers->rb_mws.next,
- struct rpcrdma_mw, mw_list);
- list_del(&r->mw_list);
- req->rl_segments[i].mr_chunk.rl_mw = r;
- } while (--i >= 0);
+
+ INIT_LIST_HEAD(&stale);
+ switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_FRMR:
+ req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
+ break;
+ case RPCRDMA_MTHCAFMR:
+ req = rpcrdma_buffer_get_fmrs(req, buffers);
+ break;
+ default:
+ break;
}
spin_unlock_irqrestore(&buffers->rb_lock, flags);
+ if (!list_empty(&stale))
+ rpcrdma_retry_flushed_linv(&stale, buffers);
return req;
}
@@ -1299,34 +1548,14 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
{
struct rpcrdma_buffer *buffers = req->rl_buffer;
struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
- int i;
unsigned long flags;
spin_lock_irqsave(&buffers->rb_lock, flags);
- buffers->rb_send_bufs[--buffers->rb_send_index] = req;
- req->rl_niovs = 0;
- if (req->rl_reply) {
- buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply;
- req->rl_reply->rr_func = NULL;
- req->rl_reply = NULL;
- }
+ rpcrdma_buffer_put_sendbuf(req, buffers);
switch (ia->ri_memreg_strategy) {
case RPCRDMA_FRMR:
case RPCRDMA_MTHCAFMR:
- /*
- * Cycle mw's back in reverse order, and "spin" them.
- * This delays and scrambles reuse as much as possible.
- */
- i = 1;
- do {
- struct rpcrdma_mw **mw;
- mw = &req->rl_segments[i].mr_chunk.rl_mw;
- list_add_tail(&(*mw)->mw_list, &buffers->rb_mws);
- *mw = NULL;
- } while (++i < RPCRDMA_MAX_SEGS);
- list_add_tail(&req->rl_segments[0].mr_chunk.rl_mw->mw_list,
- &buffers->rb_mws);
- req->rl_segments[0].mr_chunk.rl_mw = NULL;
+ rpcrdma_buffer_put_mrs(req, buffers);
break;
default:
break;
@@ -1388,6 +1617,9 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
*/
iov->addr = ib_dma_map_single(ia->ri_id->device,
va, len, DMA_BIDIRECTIONAL);
+ if (ib_dma_mapping_error(ia->ri_id->device, iov->addr))
+ return -ENOMEM;
+
iov->length = len;
if (ia->ri_have_dma_lkey) {
@@ -1483,8 +1715,10 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_mr_seg *seg1 = seg;
- struct ib_send_wr invalidate_wr, frmr_wr, *bad_wr, *post_wr;
-
+ struct rpcrdma_mw *mw = seg1->mr_chunk.rl_mw;
+ struct rpcrdma_frmr *frmr = &mw->r.frmr;
+ struct ib_mr *mr = frmr->fr_mr;
+ struct ib_send_wr fastreg_wr, *bad_wr;
u8 key;
int len, pageoff;
int i, rc;
@@ -1502,8 +1736,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
rpcrdma_map_one(ia, seg, writing);
pa = seg->mr_dma;
for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
- seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->
- page_list[page_no++] = pa;
+ frmr->fr_pgl->page_list[page_no++] = pa;
pa += PAGE_SIZE;
}
len += seg->mr_len;
@@ -1515,65 +1748,51 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
break;
}
dprintk("RPC: %s: Using frmr %p to map %d segments\n",
- __func__, seg1->mr_chunk.rl_mw, i);
-
- if (unlikely(seg1->mr_chunk.rl_mw->r.frmr.state == FRMR_IS_VALID)) {
- dprintk("RPC: %s: frmr %x left valid, posting invalidate.\n",
- __func__,
- seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey);
- /* Invalidate before using. */
- memset(&invalidate_wr, 0, sizeof invalidate_wr);
- invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
- invalidate_wr.next = &frmr_wr;
- invalidate_wr.opcode = IB_WR_LOCAL_INV;
- invalidate_wr.send_flags = IB_SEND_SIGNALED;
- invalidate_wr.ex.invalidate_rkey =
- seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
- DECR_CQCOUNT(&r_xprt->rx_ep);
- post_wr = &invalidate_wr;
- } else
- post_wr = &frmr_wr;
-
- /* Prepare FRMR WR */
- memset(&frmr_wr, 0, sizeof frmr_wr);
- frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
- frmr_wr.opcode = IB_WR_FAST_REG_MR;
- frmr_wr.send_flags = IB_SEND_SIGNALED;
- frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma;
- frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
- frmr_wr.wr.fast_reg.page_list_len = page_no;
- frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
- frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
- if (frmr_wr.wr.fast_reg.length < len) {
- while (seg1->mr_nsegs--)
- rpcrdma_unmap_one(ia, seg++);
- return -EIO;
+ __func__, mw, i);
+
+ frmr->fr_state = FRMR_IS_VALID;
+
+ memset(&fastreg_wr, 0, sizeof(fastreg_wr));
+ fastreg_wr.wr_id = (unsigned long)(void *)mw;
+ fastreg_wr.opcode = IB_WR_FAST_REG_MR;
+ fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma;
+ fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
+ fastreg_wr.wr.fast_reg.page_list_len = page_no;
+ fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
+ fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
+ if (fastreg_wr.wr.fast_reg.length < len) {
+ rc = -EIO;
+ goto out_err;
}
/* Bump the key */
- key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
- ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
+ key = (u8)(mr->rkey & 0x000000FF);
+ ib_update_fast_reg_key(mr, ++key);
- frmr_wr.wr.fast_reg.access_flags = (writing ?
+ fastreg_wr.wr.fast_reg.access_flags = (writing ?
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ);
- frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
+ fastreg_wr.wr.fast_reg.rkey = mr->rkey;
DECR_CQCOUNT(&r_xprt->rx_ep);
- rc = ib_post_send(ia->ri_id->qp, post_wr, &bad_wr);
-
+ rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
if (rc) {
dprintk("RPC: %s: failed ib_post_send for register,"
" status %i\n", __func__, rc);
- while (i--)
- rpcrdma_unmap_one(ia, --seg);
+ ib_update_fast_reg_key(mr, --key);
+ goto out_err;
} else {
- seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
+ seg1->mr_rkey = mr->rkey;
seg1->mr_base = seg1->mr_dma + pageoff;
seg1->mr_nsegs = i;
seg1->mr_len = len;
}
*nsegs = i;
+ return 0;
+out_err:
+ frmr->fr_state = FRMR_IS_INVALID;
+ while (i--)
+ rpcrdma_unmap_one(ia, --seg);
return rc;
}
@@ -1585,20 +1804,25 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
struct ib_send_wr invalidate_wr, *bad_wr;
int rc;
- while (seg1->mr_nsegs--)
- rpcrdma_unmap_one(ia, seg++);
+ seg1->mr_chunk.rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
memset(&invalidate_wr, 0, sizeof invalidate_wr);
invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
invalidate_wr.opcode = IB_WR_LOCAL_INV;
- invalidate_wr.send_flags = IB_SEND_SIGNALED;
invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
DECR_CQCOUNT(&r_xprt->rx_ep);
+ read_lock(&ia->ri_qplock);
+ while (seg1->mr_nsegs--)
+ rpcrdma_unmap_one(ia, seg++);
rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
- if (rc)
+ read_unlock(&ia->ri_qplock);
+ if (rc) {
+ /* Force rpcrdma_buffer_get() to retry */
+ seg1->mr_chunk.rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
dprintk("RPC: %s: failed ib_post_send for invalidate,"
" status %i\n", __func__, rc);
+ }
return rc;
}
@@ -1656,8 +1880,10 @@ rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l);
rc = ib_unmap_fmr(&l);
+ read_lock(&ia->ri_qplock);
while (seg1->mr_nsegs--)
rpcrdma_unmap_one(ia, seg++);
+ read_unlock(&ia->ri_qplock);
if (rc)
dprintk("RPC: %s: failed ib_unmap_fmr,"
" status %i\n", __func__, rc);
@@ -1673,7 +1899,6 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
switch (ia->ri_memreg_strategy) {
-#if RPCRDMA_PERSISTENT_REGISTRATION
case RPCRDMA_ALLPHYSICAL:
rpcrdma_map_one(ia, seg, writing);
seg->mr_rkey = ia->ri_bind_mem->rkey;
@@ -1681,7 +1906,6 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
seg->mr_nsegs = 1;
nsegs = 1;
break;
-#endif
/* Registration using frmr registration */
case RPCRDMA_FRMR:
@@ -1711,11 +1935,11 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
switch (ia->ri_memreg_strategy) {
-#if RPCRDMA_PERSISTENT_REGISTRATION
case RPCRDMA_ALLPHYSICAL:
+ read_lock(&ia->ri_qplock);
rpcrdma_unmap_one(ia, seg);
+ read_unlock(&ia->ri_qplock);
break;
-#endif
case RPCRDMA_FRMR:
rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
@@ -1809,3 +2033,44 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
rc);
return rc;
}
+
+/* Physical mapping means one Read/Write list entry per-page.
+ * All list entries must fit within an inline buffer
+ *
+ * NB: The server must return a Write list for NFS READ,
+ * which has the same constraint. Factor in the inline
+ * rsize as well.
+ */
+static size_t
+rpcrdma_physical_max_payload(struct rpcrdma_xprt *r_xprt)
+{
+ struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
+ unsigned int inline_size, pages;
+
+ inline_size = min_t(unsigned int,
+ cdata->inline_wsize, cdata->inline_rsize);
+ inline_size -= RPCRDMA_HDRLEN_MIN;
+ pages = inline_size / sizeof(struct rpcrdma_segment);
+ return pages << PAGE_SHIFT;
+}
+
+static size_t
+rpcrdma_mr_max_payload(struct rpcrdma_xprt *r_xprt)
+{
+ return RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT;
+}
+
+size_t
+rpcrdma_max_payload(struct rpcrdma_xprt *r_xprt)
+{
+ size_t result;
+
+ switch (r_xprt->rx_ia.ri_memreg_strategy) {
+ case RPCRDMA_ALLPHYSICAL:
+ result = rpcrdma_physical_max_payload(r_xprt);
+ break;
+ default:
+ result = rpcrdma_mr_max_payload(r_xprt);
+ }
+ return result;
+}
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 89e7cd479705..ac7fc9a31342 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -51,6 +51,7 @@
#include <linux/sunrpc/clnt.h> /* rpc_xprt */
#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */
#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */
+#include <linux/sunrpc/svc.h> /* RPCSVC_MAXPAYLOAD */
#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */
#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */
@@ -59,6 +60,7 @@
* Interface Adapter -- one per transport instance
*/
struct rpcrdma_ia {
+ rwlock_t ri_qplock;
struct rdma_cm_id *ri_id;
struct ib_pd *ri_pd;
struct ib_mr *ri_bind_mem;
@@ -98,6 +100,14 @@ struct rpcrdma_ep {
#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
+enum rpcrdma_chunktype {
+ rpcrdma_noch = 0,
+ rpcrdma_readch,
+ rpcrdma_areadch,
+ rpcrdma_writech,
+ rpcrdma_replych
+};
+
/*
* struct rpcrdma_rep -- this structure encapsulates state required to recv
* and complete a reply, asychronously. It needs several pieces of
@@ -137,6 +147,40 @@ struct rpcrdma_rep {
};
/*
+ * struct rpcrdma_mw - external memory region metadata
+ *
+ * An external memory region is any buffer or page that is registered
+ * on the fly (ie, not pre-registered).
+ *
+ * Each rpcrdma_buffer has a list of free MWs anchored in rb_mws. During
+ * call_allocate, rpcrdma_buffer_get() assigns one to each segment in
+ * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep
+ * track of registration metadata while each RPC is pending.
+ * rpcrdma_deregister_external() uses this metadata to unmap and
+ * release these resources when an RPC is complete.
+ */
+enum rpcrdma_frmr_state {
+ FRMR_IS_INVALID, /* ready to be used */
+ FRMR_IS_VALID, /* in use */
+ FRMR_IS_STALE, /* failed completion */
+};
+
+struct rpcrdma_frmr {
+ struct ib_fast_reg_page_list *fr_pgl;
+ struct ib_mr *fr_mr;
+ enum rpcrdma_frmr_state fr_state;
+};
+
+struct rpcrdma_mw {
+ union {
+ struct ib_fmr *fmr;
+ struct rpcrdma_frmr frmr;
+ } r;
+ struct list_head mw_list;
+ struct list_head mw_all;
+};
+
+/*
* struct rpcrdma_req -- structure central to the request/reply sequence.
*
* N of these are associated with a transport instance, and stored in
@@ -163,17 +207,7 @@ struct rpcrdma_rep {
struct rpcrdma_mr_seg { /* chunk descriptors */
union { /* chunk memory handles */
struct ib_mr *rl_mr; /* if registered directly */
- struct rpcrdma_mw { /* if registered from region */
- union {
- struct ib_fmr *fmr;
- struct {
- struct ib_fast_reg_page_list *fr_pgl;
- struct ib_mr *fr_mr;
- enum { FRMR_IS_INVALID, FRMR_IS_VALID } state;
- } frmr;
- } r;
- struct list_head mw_list;
- } *rl_mw;
+ struct rpcrdma_mw *rl_mw; /* if registered from region */
} mr_chunk;
u64 mr_base; /* registration result */
u32 mr_rkey; /* registration result */
@@ -191,6 +225,7 @@ struct rpcrdma_req {
unsigned int rl_niovs; /* 0, 2 or 4 */
unsigned int rl_nchunks; /* non-zero if chunks */
unsigned int rl_connect_cookie; /* retry detection */
+ enum rpcrdma_chunktype rl_rtype, rl_wtype;
struct rpcrdma_buffer *rl_buffer; /* home base for this structure */
struct rpcrdma_rep *rl_reply;/* holder for reply buffer */
struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */
@@ -214,6 +249,7 @@ struct rpcrdma_buffer {
atomic_t rb_credits; /* most recent server credits */
int rb_max_requests;/* client max requests */
struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */
+ struct list_head rb_all;
int rb_send_index;
struct rpcrdma_req **rb_send_bufs;
int rb_recv_index;
@@ -306,7 +342,7 @@ int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
struct rpcrdma_create_data_internal *);
void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
-int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
+void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
struct rpcrdma_req *);
@@ -346,7 +382,9 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *);
/*
* RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
*/
+ssize_t rpcrdma_marshal_chunks(struct rpc_rqst *, ssize_t);
int rpcrdma_marshal_req(struct rpc_rqst *);
+size_t rpcrdma_max_payload(struct rpcrdma_xprt *);
/* Temporary NFS request map cache. Created in svc_rdma.c */
extern struct kmem_cache *svc_rdma_map_cachep;
@@ -355,4 +393,10 @@ extern struct kmem_cache *svc_rdma_ctxt_cachep;
/* Workqueue created in svc_rdma.c */
extern struct workqueue_struct *svc_rdma_wq;
+#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
+#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
+#else
+#define RPCSVC_MAXPAYLOAD_RDMA (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
+#endif
+
#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index be8bbd5d65ec..3b305ab17afe 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -399,13 +399,13 @@ static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen,
return kernel_sendmsg(sock, &msg, NULL, 0, 0);
}
-static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more, bool zerocopy)
+static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more, bool zerocopy, int *sent_p)
{
ssize_t (*do_sendpage)(struct socket *sock, struct page *page,
int offset, size_t size, int flags);
struct page **ppage;
unsigned int remainder;
- int err, sent = 0;
+ int err;
remainder = xdr->page_len - base;
base += xdr->page_base;
@@ -424,15 +424,15 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i
err = do_sendpage(sock, *ppage, base, len, flags);
if (remainder == 0 || err != len)
break;
- sent += err;
+ *sent_p += err;
ppage++;
base = 0;
}
- if (sent == 0)
- return err;
- if (err > 0)
- sent += err;
- return sent;
+ if (err > 0) {
+ *sent_p += err;
+ err = 0;
+ }
+ return err;
}
/**
@@ -443,12 +443,14 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i
* @xdr: buffer containing this request
* @base: starting position in the buffer
* @zerocopy: true if it is safe to use sendpage()
+ * @sent_p: return the total number of bytes successfully queued for sending
*
*/
-static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, bool zerocopy)
+static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, bool zerocopy, int *sent_p)
{
unsigned int remainder = xdr->len - base;
- int err, sent = 0;
+ int err = 0;
+ int sent = 0;
if (unlikely(!sock))
return -ENOTSOCK;
@@ -465,7 +467,7 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen,
err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0], base, remainder != 0);
if (remainder == 0 || err != len)
goto out;
- sent += err;
+ *sent_p += err;
base = 0;
} else
base -= xdr->head[0].iov_len;
@@ -473,23 +475,23 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen,
if (base < xdr->page_len) {
unsigned int len = xdr->page_len - base;
remainder -= len;
- err = xs_send_pagedata(sock, xdr, base, remainder != 0, zerocopy);
- if (remainder == 0 || err != len)
+ err = xs_send_pagedata(sock, xdr, base, remainder != 0, zerocopy, &sent);
+ *sent_p += sent;
+ if (remainder == 0 || sent != len)
goto out;
- sent += err;
base = 0;
} else
base -= xdr->page_len;
if (base >= xdr->tail[0].iov_len)
- return sent;
+ return 0;
err = xs_send_kvec(sock, NULL, 0, &xdr->tail[0], base, 0);
out:
- if (sent == 0)
- return err;
- if (err > 0)
- sent += err;
- return sent;
+ if (err > 0) {
+ *sent_p += err;
+ err = 0;
+ }
+ return err;
}
static void xs_nospace_callback(struct rpc_task *task)
@@ -573,19 +575,20 @@ static int xs_local_send_request(struct rpc_task *task)
container_of(xprt, struct sock_xprt, xprt);
struct xdr_buf *xdr = &req->rq_snd_buf;
int status;
+ int sent = 0;
xs_encode_stream_record_marker(&req->rq_snd_buf);
xs_pktdump("packet data:",
req->rq_svec->iov_base, req->rq_svec->iov_len);
- status = xs_sendpages(transport->sock, NULL, 0,
- xdr, req->rq_bytes_sent, true);
+ status = xs_sendpages(transport->sock, NULL, 0, xdr, req->rq_bytes_sent,
+ true, &sent);
dprintk("RPC: %s(%u) = %d\n",
__func__, xdr->len - req->rq_bytes_sent, status);
- if (likely(status >= 0)) {
- req->rq_bytes_sent += status;
- req->rq_xmit_bytes_sent += status;
+ if (likely(sent > 0) || status == 0) {
+ req->rq_bytes_sent += sent;
+ req->rq_xmit_bytes_sent += sent;
if (likely(req->rq_bytes_sent >= req->rq_slen)) {
req->rq_bytes_sent = 0;
return 0;
@@ -594,6 +597,7 @@ static int xs_local_send_request(struct rpc_task *task)
}
switch (status) {
+ case -ENOBUFS:
case -EAGAIN:
status = xs_nospace(task);
break;
@@ -625,6 +629,7 @@ static int xs_udp_send_request(struct rpc_task *task)
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct xdr_buf *xdr = &req->rq_snd_buf;
+ int sent = 0;
int status;
xs_pktdump("packet data:",
@@ -633,22 +638,25 @@ static int xs_udp_send_request(struct rpc_task *task)
if (!xprt_bound(xprt))
return -ENOTCONN;
- status = xs_sendpages(transport->sock,
- xs_addr(xprt),
- xprt->addrlen, xdr,
- req->rq_bytes_sent, true);
+ status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen,
+ xdr, req->rq_bytes_sent, true, &sent);
dprintk("RPC: xs_udp_send_request(%u) = %d\n",
xdr->len - req->rq_bytes_sent, status);
- if (status >= 0) {
- req->rq_xmit_bytes_sent += status;
- if (status >= req->rq_slen)
+ /* firewall is blocking us, don't return -EAGAIN or we end up looping */
+ if (status == -EPERM)
+ goto process_status;
+
+ if (sent > 0 || status == 0) {
+ req->rq_xmit_bytes_sent += sent;
+ if (sent >= req->rq_slen)
return 0;
/* Still some bytes left; set up for a retry later. */
status = -EAGAIN;
}
+process_status:
switch (status) {
case -ENOTSOCK:
status = -ENOTCONN;
@@ -661,8 +669,10 @@ static int xs_udp_send_request(struct rpc_task *task)
dprintk("RPC: sendmsg returned unrecognized error %d\n",
-status);
case -ENETUNREACH:
+ case -ENOBUFS:
case -EPIPE:
case -ECONNREFUSED:
+ case -EPERM:
/* When the server has died, an ICMP port unreachable message
* prompts ECONNREFUSED. */
clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
@@ -711,6 +721,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
struct xdr_buf *xdr = &req->rq_snd_buf;
bool zerocopy = true;
int status;
+ int sent;
xs_encode_stream_record_marker(&req->rq_snd_buf);
@@ -728,26 +739,26 @@ static int xs_tcp_send_request(struct rpc_task *task)
* to cope with writespace callbacks arriving _after_ we have
* called sendmsg(). */
while (1) {
- status = xs_sendpages(transport->sock,
- NULL, 0, xdr, req->rq_bytes_sent,
- zerocopy);
+ sent = 0;
+ status = xs_sendpages(transport->sock, NULL, 0, xdr,
+ req->rq_bytes_sent, zerocopy, &sent);
dprintk("RPC: xs_tcp_send_request(%u) = %d\n",
xdr->len - req->rq_bytes_sent, status);
- if (unlikely(status < 0))
+ if (unlikely(sent == 0 && status < 0))
break;
/* If we've sent the entire packet, immediately
* reset the count of bytes sent. */
- req->rq_bytes_sent += status;
- req->rq_xmit_bytes_sent += status;
+ req->rq_bytes_sent += sent;
+ req->rq_xmit_bytes_sent += sent;
if (likely(req->rq_bytes_sent >= req->rq_slen)) {
req->rq_bytes_sent = 0;
return 0;
}
- if (status != 0)
+ if (sent != 0)
continue;
status = -EAGAIN;
break;
@@ -758,6 +769,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
status = -ENOTCONN;
/* Should we call xs_close() here? */
break;
+ case -ENOBUFS:
case -EAGAIN:
status = xs_nospace(task);
break;
@@ -842,6 +854,8 @@ static void xs_error_report(struct sock *sk)
dprintk("RPC: xs_error_report client %p, error=%d...\n",
xprt, -err);
trace_rpc_socket_error(xprt, sk->sk_socket, err);
+ if (test_bit(XPRT_CONNECTION_REUSE, &xprt->state))
+ goto out;
xprt_wake_pending_tasks(xprt, err);
out:
read_unlock_bh(&sk->sk_callback_lock);
@@ -1743,13 +1757,29 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
unsigned short port = xs_get_srcport(transport);
unsigned short last;
+ /*
+ * If we are asking for any ephemeral port (i.e. port == 0 &&
+ * transport->xprt.resvport == 0), don't bind. Let the local
+ * port selection happen implicitly when the socket is used
+ * (for example at connect time).
+ *
+ * This ensures that we can continue to establish TCP
+ * connections even when all local ephemeral ports are already
+ * a part of some TCP connection. This makes no difference
+ * for UDP sockets, but also doens't harm them.
+ *
+ * If we're asking for any reserved port (i.e. port == 0 &&
+ * transport->xprt.resvport == 1) xs_get_srcport above will
+ * ensure that port is non-zero and we will bind as needed.
+ */
+ if (port == 0)
+ return 0;
+
memcpy(&myaddr, &transport->srcaddr, transport->xprt.addrlen);
do {
rpc_set_port((struct sockaddr *)&myaddr, port);
err = kernel_bind(sock, (struct sockaddr *)&myaddr,
transport->xprt.addrlen);
- if (port == 0)
- break;
if (err == 0) {
transport->srcport = port;
break;
@@ -1924,8 +1954,6 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
struct socket *sock;
int status = -EIO;
- current->flags |= PF_FSTRANS;
-
clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
status = __sock_create(xprt->xprt_net, AF_LOCAL,
SOCK_STREAM, 0, &sock, 1);
@@ -1946,6 +1974,7 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
dprintk("RPC: xprt %p connected to %s\n",
xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
xprt_set_connected(xprt);
+ case -ENOBUFS:
break;
case -ENOENT:
dprintk("RPC: xprt %p: socket %s does not exist\n",
@@ -1964,7 +1993,6 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
out:
xprt_clear_connecting(xprt);
xprt_wake_pending_tasks(xprt, status);
- current->flags &= ~PF_FSTRANS;
return status;
}
@@ -2067,8 +2095,6 @@ static void xs_udp_setup_socket(struct work_struct *work)
struct socket *sock = transport->sock;
int status = -EIO;
- current->flags |= PF_FSTRANS;
-
/* Start by resetting any existing state */
xs_reset_transport(transport);
sock = xs_create_sock(xprt, transport,
@@ -2088,7 +2114,6 @@ static void xs_udp_setup_socket(struct work_struct *work)
out:
xprt_clear_connecting(xprt);
xprt_wake_pending_tasks(xprt, status);
- current->flags &= ~PF_FSTRANS;
}
/*
@@ -2225,8 +2250,6 @@ static void xs_tcp_setup_socket(struct work_struct *work)
struct rpc_xprt *xprt = &transport->xprt;
int status = -EIO;
- current->flags |= PF_FSTRANS;
-
if (!sock) {
clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
sock = xs_create_sock(xprt, transport,
@@ -2241,7 +2264,9 @@ static void xs_tcp_setup_socket(struct work_struct *work)
abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT,
&xprt->state);
/* "close" the socket, preserving the local port */
+ set_bit(XPRT_CONNECTION_REUSE, &xprt->state);
xs_tcp_reuse_connection(transport);
+ clear_bit(XPRT_CONNECTION_REUSE, &xprt->state);
if (abort_and_exit)
goto out_eagain;
@@ -2272,7 +2297,6 @@ static void xs_tcp_setup_socket(struct work_struct *work)
case -EINPROGRESS:
case -EALREADY:
xprt_clear_connecting(xprt);
- current->flags &= ~PF_FSTRANS;
return;
case -EINVAL:
/* Happens, for instance, if the user specified a link
@@ -2281,6 +2305,7 @@ static void xs_tcp_setup_socket(struct work_struct *work)
case -ECONNREFUSED:
case -ECONNRESET:
case -ENETUNREACH:
+ case -ENOBUFS:
/* retry with existing socket, after a delay */
goto out;
}
@@ -2289,7 +2314,6 @@ out_eagain:
out:
xprt_clear_connecting(xprt);
xprt_wake_pending_tasks(xprt, status);
- current->flags &= ~PF_FSTRANS;
}
/**
@@ -3054,12 +3078,12 @@ static int param_set_uint_minmax(const char *val,
const struct kernel_param *kp,
unsigned int min, unsigned int max)
{
- unsigned long num;
+ unsigned int num;
int ret;
if (!val)
return -EINVAL;
- ret = strict_strtoul(val, 0, &num);
+ ret = kstrtouint(val, 0, &num);
if (ret == -EINVAL || num < min || num > max)
return -EINVAL;
*((unsigned int *)kp->arg) = num;
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index a080c66d819a..b8a13caad59a 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_TIPC) := tipc.o
tipc-y += addr.o bcast.o bearer.o config.o \
core.o link.o discover.o msg.o \
name_distr.o subscr.o name_table.o net.o \
- netlink.o node.o node_subscr.o port.o ref.o \
+ netlink.o node.o node_subscr.o \
socket.o log.o eth_media.o server.o
tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 55c6c9d3e1ce..b8670bf262e2 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -1,7 +1,7 @@
/*
* net/tipc/bcast.c: TIPC broadcast code
*
- * Copyright (c) 2004-2006, Ericsson AB
+ * Copyright (c) 2004-2006, 2014, Ericsson AB
* Copyright (c) 2004, Intel Corporation.
* Copyright (c) 2005, 2010-2011, Wind River Systems
* All rights reserved.
@@ -37,7 +37,8 @@
#include "core.h"
#include "link.h"
-#include "port.h"
+#include "socket.h"
+#include "msg.h"
#include "bcast.h"
#include "name_distr.h"
@@ -138,6 +139,11 @@ static void tipc_bclink_unlock(void)
tipc_link_reset_all(node);
}
+uint tipc_bclink_get_mtu(void)
+{
+ return MAX_PKT_DEFAULT_MCAST;
+}
+
void tipc_bclink_set_flags(unsigned int flags)
{
bclink->flags |= flags;
@@ -220,6 +226,17 @@ static void bclink_retransmit_pkt(u32 after, u32 to)
}
/**
+ * tipc_bclink_wakeup_users - wake up pending users
+ *
+ * Called with no locks taken
+ */
+void tipc_bclink_wakeup_users(void)
+{
+ while (skb_queue_len(&bclink->link.waiting_sks))
+ tipc_sk_rcv(skb_dequeue(&bclink->link.waiting_sks));
+}
+
+/**
* tipc_bclink_acknowledge - handle acknowledgement of broadcast packets
* @n_ptr: node that sent acknowledgement info
* @acked: broadcast sequence # that has been acknowledged
@@ -293,8 +310,9 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
tipc_link_push_queue(bcl);
bclink_set_last_sent();
}
- if (unlikely(released && !list_empty(&bcl->waiting_ports)))
- tipc_link_wakeup_ports(bcl, 0);
+ if (unlikely(released && !skb_queue_empty(&bcl->waiting_sks)))
+ n_ptr->action_flags |= TIPC_WAKEUP_BCAST_USERS;
+
exit:
tipc_bclink_unlock();
}
@@ -382,30 +400,50 @@ static void bclink_peek_nack(struct tipc_msg *msg)
tipc_node_unlock(n_ptr);
}
-/*
- * tipc_bclink_xmit - broadcast a packet to all nodes in cluster
+/* tipc_bclink_xmit - broadcast buffer chain to all nodes in cluster
+ * and to identified node local sockets
+ * @buf: chain of buffers containing message
+ * Consumes the buffer chain, except when returning -ELINKCONG
+ * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE
*/
int tipc_bclink_xmit(struct sk_buff *buf)
{
- int res;
-
- tipc_bclink_lock();
+ int rc = 0;
+ int bc = 0;
+ struct sk_buff *clbuf;
- if (!bclink->bcast_nodes.count) {
- res = msg_data_sz(buf_msg(buf));
- kfree_skb(buf);
- goto exit;
+ /* Prepare clone of message for local node */
+ clbuf = tipc_msg_reassemble(buf);
+ if (unlikely(!clbuf)) {
+ kfree_skb_list(buf);
+ return -EHOSTUNREACH;
}
- res = __tipc_link_xmit(bcl, buf);
- if (likely(res >= 0)) {
- bclink_set_last_sent();
- bcl->stats.queue_sz_counts++;
- bcl->stats.accu_queue_sz += bcl->out_queue_size;
+ /* Broadcast to all other nodes */
+ if (likely(bclink)) {
+ tipc_bclink_lock();
+ if (likely(bclink->bcast_nodes.count)) {
+ rc = __tipc_link_xmit(bcl, buf);
+ if (likely(!rc)) {
+ bclink_set_last_sent();
+ bcl->stats.queue_sz_counts++;
+ bcl->stats.accu_queue_sz += bcl->out_queue_size;
+ }
+ bc = 1;
+ }
+ tipc_bclink_unlock();
}
-exit:
- tipc_bclink_unlock();
- return res;
+
+ if (unlikely(!bc))
+ kfree_skb_list(buf);
+
+ /* Deliver message clone */
+ if (likely(!rc))
+ tipc_sk_mcast_rcv(clbuf);
+ else
+ kfree_skb(clbuf);
+
+ return rc;
}
/**
@@ -443,7 +481,7 @@ void tipc_bclink_rcv(struct sk_buff *buf)
struct tipc_node *node;
u32 next_in;
u32 seqno;
- int deferred;
+ int deferred = 0;
/* Screen out unwanted broadcast messages */
@@ -494,7 +532,7 @@ receive:
tipc_bclink_unlock();
tipc_node_unlock(node);
if (likely(msg_mcast(msg)))
- tipc_port_mcast_rcv(buf, NULL);
+ tipc_sk_mcast_rcv(buf);
else
kfree_skb(buf);
} else if (msg_user(msg) == MSG_BUNDLER) {
@@ -573,8 +611,7 @@ receive:
node->bclink.deferred_size += deferred;
bclink_update_last_sent(node, seqno);
buf = NULL;
- } else
- deferred = 0;
+ }
tipc_bclink_lock();
@@ -611,6 +648,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1,
struct tipc_media_addr *unused2)
{
int bp_index;
+ struct tipc_msg *msg = buf_msg(buf);
/* Prepare broadcast link message for reliable transmission,
* if first time trying to send it;
@@ -618,10 +656,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1,
* since they are sent in an unreliable manner and don't need it
*/
if (likely(!msg_non_seq(buf_msg(buf)))) {
- struct tipc_msg *msg;
-
bcbuf_set_acks(buf, bclink->bcast_nodes.count);
- msg = buf_msg(buf);
msg_set_non_seq(msg, 1);
msg_set_mc_netid(msg, tipc_net_id);
bcl->stats.sent_info++;
@@ -638,12 +673,14 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1,
for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) {
struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary;
struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary;
- struct tipc_bearer *b = p;
+ struct tipc_bearer *bp[2] = {p, s};
+ struct tipc_bearer *b = bp[msg_link_selector(msg)];
struct sk_buff *tbuf;
if (!p)
break; /* No more bearers to try */
-
+ if (!b)
+ b = p;
tipc_nmap_diff(&bcbearer->remains, &b->nodes,
&bcbearer->remains_new);
if (bcbearer->remains_new.count == bcbearer->remains.count)
@@ -660,13 +697,6 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1,
tipc_bearer_send(b->identity, tbuf, &b->bcast_addr);
kfree_skb(tbuf); /* Bearer keeps a clone */
}
-
- /* Swap bearers for next packet */
- if (s) {
- bcbearer->bpairs[bp_index].primary = s;
- bcbearer->bpairs[bp_index].secondary = p;
- }
-
if (bcbearer->remains_new.count == 0)
break; /* All targets reached */
@@ -821,9 +851,10 @@ int tipc_bclink_init(void)
sprintf(bcbearer->media.name, "tipc-broadcast");
spin_lock_init(&bclink->lock);
- INIT_LIST_HEAD(&bcl->waiting_ports);
+ __skb_queue_head_init(&bcl->waiting_sks);
bcl->next_out_no = 1;
spin_lock_init(&bclink->node.lock);
+ __skb_queue_head_init(&bclink->node.waiting_sks);
bcl->owner = &bclink->node;
bcl->max_pkt = MAX_PKT_DEFAULT_MCAST;
tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 00330c45df3e..e7b0f85a82bc 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -1,7 +1,7 @@
/*
* net/tipc/bcast.h: Include file for TIPC broadcast code
*
- * Copyright (c) 2003-2006, Ericsson AB
+ * Copyright (c) 2003-2006, 2014, Ericsson AB
* Copyright (c) 2005, 2010-2011, Wind River Systems
* All rights reserved.
*
@@ -89,7 +89,6 @@ void tipc_bclink_add_node(u32 addr);
void tipc_bclink_remove_node(u32 addr);
struct tipc_node *tipc_bclink_retransmit_to(void);
void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked);
-int tipc_bclink_xmit(struct sk_buff *buf);
void tipc_bclink_rcv(struct sk_buff *buf);
u32 tipc_bclink_get_last_sent(void);
u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr);
@@ -98,5 +97,7 @@ int tipc_bclink_stats(char *stats_buf, const u32 buf_size);
int tipc_bclink_reset_stats(void);
int tipc_bclink_set_queue_limits(u32 limit);
void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action);
-
+uint tipc_bclink_get_mtu(void);
+int tipc_bclink_xmit(struct sk_buff *buf);
+void tipc_bclink_wakeup_users(void);
#endif
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 2b42403ad33a..876f4c6a2631 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -35,7 +35,7 @@
*/
#include "core.h"
-#include "port.h"
+#include "socket.h"
#include "name_table.h"
#include "config.h"
#include "server.h"
@@ -266,7 +266,7 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
rep_tlv_buf = tipc_media_get_names();
break;
case TIPC_CMD_SHOW_PORTS:
- rep_tlv_buf = tipc_port_get_ports();
+ rep_tlv_buf = tipc_sk_socks_show();
break;
case TIPC_CMD_SHOW_STATS:
rep_tlv_buf = tipc_show_stats();
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 676d18015dd8..a5737b8407dd 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -35,11 +35,10 @@
*/
#include "core.h"
-#include "ref.h"
#include "name_table.h"
#include "subscr.h"
#include "config.h"
-#include "port.h"
+#include "socket.h"
#include <linux/module.h>
@@ -85,7 +84,7 @@ static void tipc_core_stop(void)
tipc_netlink_stop();
tipc_subscr_stop();
tipc_nametbl_stop();
- tipc_ref_table_stop();
+ tipc_sk_ref_table_stop();
tipc_socket_stop();
tipc_unregister_sysctl();
}
@@ -99,7 +98,7 @@ static int tipc_core_start(void)
get_random_bytes(&tipc_random, sizeof(tipc_random));
- err = tipc_ref_table_init(tipc_max_ports, tipc_random);
+ err = tipc_sk_ref_table_init(tipc_max_ports, tipc_random);
if (err)
goto out_reftbl;
@@ -139,7 +138,7 @@ out_socket:
out_netlink:
tipc_nametbl_stop();
out_nametbl:
- tipc_ref_table_stop();
+ tipc_sk_ref_table_stop();
out_reftbl:
return err;
}
diff --git a/net/tipc/core.h b/net/tipc/core.h
index bb26ed1ee966..f773b148722f 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -81,6 +81,7 @@ extern u32 tipc_own_addr __read_mostly;
extern int tipc_max_ports __read_mostly;
extern int tipc_net_id __read_mostly;
extern int sysctl_tipc_rmem[3] __read_mostly;
+extern int sysctl_tipc_named_timeout __read_mostly;
/*
* Other global variables
@@ -187,8 +188,11 @@ static inline void k_term_timer(struct timer_list *timer)
struct tipc_skb_cb {
void *handle;
- bool deferred;
struct sk_buff *tail;
+ bool deferred;
+ bool wakeup_pending;
+ u16 chain_sz;
+ u16 chain_imp;
};
#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0]))
diff --git a/net/tipc/link.c b/net/tipc/link.c
index ad2c57f5868d..65410e18b8a6 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -36,7 +36,6 @@
#include "core.h"
#include "link.h"
-#include "port.h"
#include "socket.h"
#include "name_distr.h"
#include "discover.h"
@@ -82,15 +81,13 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf);
static int tipc_link_tunnel_rcv(struct tipc_node *n_ptr,
struct sk_buff **buf);
static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance);
-static int tipc_link_iovec_long_xmit(struct tipc_port *sender,
- struct iovec const *msg_sect,
- unsigned int len, u32 destnode);
static void link_state_event(struct tipc_link *l_ptr, u32 event);
static void link_reset_statistics(struct tipc_link *l_ptr);
static void link_print(struct tipc_link *l_ptr, const char *str);
-static int tipc_link_frag_xmit(struct tipc_link *l_ptr, struct sk_buff *buf);
static void tipc_link_sync_xmit(struct tipc_link *l);
static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf);
+static int tipc_link_input(struct tipc_link *l, struct sk_buff *buf);
+static int tipc_link_prepare_input(struct tipc_link *l, struct sk_buff **buf);
/*
* Simple link routines
@@ -277,7 +274,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
link_init_max_pkt(l_ptr);
l_ptr->next_out_no = 1;
- INIT_LIST_HEAD(&l_ptr->waiting_ports);
+ __skb_queue_head_init(&l_ptr->waiting_sks);
link_reset_statistics(l_ptr);
@@ -324,62 +321,47 @@ void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down)
}
/**
- * link_schedule_port - schedule port for deferred sending
- * @l_ptr: pointer to link
- * @origport: reference to sending port
- * @sz: amount of data to be sent
- *
- * Schedules port for renewed sending of messages after link congestion
- * has abated.
+ * link_schedule_user - schedule user for wakeup after congestion
+ * @link: congested link
+ * @oport: sending port
+ * @chain_sz: size of buffer chain that was attempted sent
+ * @imp: importance of message attempted sent
+ * Create pseudo msg to send back to user when congestion abates
*/
-static int link_schedule_port(struct tipc_link *l_ptr, u32 origport, u32 sz)
+static bool link_schedule_user(struct tipc_link *link, u32 oport,
+ uint chain_sz, uint imp)
{
- struct tipc_port *p_ptr;
+ struct sk_buff *buf;
- spin_lock_bh(&tipc_port_list_lock);
- p_ptr = tipc_port_lock(origport);
- if (p_ptr) {
- if (!list_empty(&p_ptr->wait_list))
- goto exit;
- p_ptr->congested = 1;
- p_ptr->waiting_pkts = 1 + ((sz - 1) / l_ptr->max_pkt);
- list_add_tail(&p_ptr->wait_list, &l_ptr->waiting_ports);
- l_ptr->stats.link_congs++;
-exit:
- tipc_port_unlock(p_ptr);
- }
- spin_unlock_bh(&tipc_port_list_lock);
- return -ELINKCONG;
+ buf = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, tipc_own_addr,
+ tipc_own_addr, oport, 0, 0);
+ if (!buf)
+ return false;
+ TIPC_SKB_CB(buf)->chain_sz = chain_sz;
+ TIPC_SKB_CB(buf)->chain_imp = imp;
+ __skb_queue_tail(&link->waiting_sks, buf);
+ link->stats.link_congs++;
+ return true;
}
-void tipc_link_wakeup_ports(struct tipc_link *l_ptr, int all)
+/**
+ * link_prepare_wakeup - prepare users for wakeup after congestion
+ * @link: congested link
+ * Move a number of waiting users, as permitted by available space in
+ * the send queue, from link wait queue to node wait queue for wakeup
+ */
+static void link_prepare_wakeup(struct tipc_link *link)
{
- struct tipc_port *p_ptr;
- struct tipc_port *temp_p_ptr;
- int win = l_ptr->queue_limit[0] - l_ptr->out_queue_size;
+ struct sk_buff_head *wq = &link->waiting_sks;
+ struct sk_buff *buf;
+ uint pend_qsz = link->out_queue_size;
- if (all)
- win = 100000;
- if (win <= 0)
- return;
- if (!spin_trylock_bh(&tipc_port_list_lock))
- return;
- if (link_congested(l_ptr))
- goto exit;
- list_for_each_entry_safe(p_ptr, temp_p_ptr, &l_ptr->waiting_ports,
- wait_list) {
- if (win <= 0)
+ for (buf = skb_peek(wq); buf; buf = skb_peek(wq)) {
+ if (pend_qsz >= link->queue_limit[TIPC_SKB_CB(buf)->chain_imp])
break;
- list_del_init(&p_ptr->wait_list);
- spin_lock_bh(p_ptr->lock);
- p_ptr->congested = 0;
- tipc_port_wakeup(p_ptr);
- win -= p_ptr->waiting_pkts;
- spin_unlock_bh(p_ptr->lock);
+ pend_qsz += TIPC_SKB_CB(buf)->chain_sz;
+ __skb_queue_tail(&link->owner->waiting_sks, __skb_dequeue(wq));
}
-
-exit:
- spin_unlock_bh(&tipc_port_list_lock);
}
/**
@@ -421,6 +403,7 @@ void tipc_link_reset(struct tipc_link *l_ptr)
u32 prev_state = l_ptr->state;
u32 checkpoint = l_ptr->next_in_no;
int was_active_link = tipc_link_is_active(l_ptr);
+ struct tipc_node *owner = l_ptr->owner;
msg_set_session(l_ptr->pmsg, ((msg_session(l_ptr->pmsg) + 1) & 0xffff));
@@ -448,9 +431,10 @@ void tipc_link_reset(struct tipc_link *l_ptr)
kfree_skb(l_ptr->proto_msg_queue);
l_ptr->proto_msg_queue = NULL;
kfree_skb_list(l_ptr->oldest_deferred_in);
- if (!list_empty(&l_ptr->waiting_ports))
- tipc_link_wakeup_ports(l_ptr, 1);
-
+ if (!skb_queue_empty(&l_ptr->waiting_sks)) {
+ skb_queue_splice_init(&l_ptr->waiting_sks, &owner->waiting_sks);
+ owner->action_flags |= TIPC_WAKEUP_USERS;
+ }
l_ptr->retransm_queue_head = 0;
l_ptr->retransm_queue_size = 0;
l_ptr->last_out = NULL;
@@ -676,178 +660,146 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
}
}
-/*
- * link_bundle_buf(): Append contents of a buffer to
- * the tail of an existing one.
+/* tipc_link_cong: determine return value and how to treat the
+ * sent buffer during link congestion.
+ * - For plain, errorless user data messages we keep the buffer and
+ * return -ELINKONG.
+ * - For all other messages we discard the buffer and return -EHOSTUNREACH
+ * - For TIPC internal messages we also reset the link
*/
-static int link_bundle_buf(struct tipc_link *l_ptr, struct sk_buff *bundler,
- struct sk_buff *buf)
+static int tipc_link_cong(struct tipc_link *link, struct sk_buff *buf)
{
- struct tipc_msg *bundler_msg = buf_msg(bundler);
struct tipc_msg *msg = buf_msg(buf);
- u32 size = msg_size(msg);
- u32 bundle_size = msg_size(bundler_msg);
- u32 to_pos = align(bundle_size);
- u32 pad = to_pos - bundle_size;
-
- if (msg_user(bundler_msg) != MSG_BUNDLER)
- return 0;
- if (msg_type(bundler_msg) != OPEN_MSG)
- return 0;
- if (skb_tailroom(bundler) < (pad + size))
- return 0;
- if (l_ptr->max_pkt < (to_pos + size))
- return 0;
-
- skb_put(bundler, pad + size);
- skb_copy_to_linear_data_offset(bundler, to_pos, buf->data, size);
- msg_set_size(bundler_msg, to_pos + size);
- msg_set_msgcnt(bundler_msg, msg_msgcnt(bundler_msg) + 1);
- kfree_skb(buf);
- l_ptr->stats.sent_bundled++;
- return 1;
-}
-
-static void link_add_to_outqueue(struct tipc_link *l_ptr,
- struct sk_buff *buf,
- struct tipc_msg *msg)
-{
- u32 ack = mod(l_ptr->next_in_no - 1);
- u32 seqno = mod(l_ptr->next_out_no++);
+ uint imp = tipc_msg_tot_importance(msg);
+ u32 oport = msg_tot_origport(msg);
- msg_set_word(msg, 2, ((ack << 16) | seqno));
- msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
- buf->next = NULL;
- if (l_ptr->first_out) {
- l_ptr->last_out->next = buf;
- l_ptr->last_out = buf;
- } else
- l_ptr->first_out = l_ptr->last_out = buf;
-
- l_ptr->out_queue_size++;
- if (l_ptr->out_queue_size > l_ptr->stats.max_queue_sz)
- l_ptr->stats.max_queue_sz = l_ptr->out_queue_size;
-}
-
-static void link_add_chain_to_outqueue(struct tipc_link *l_ptr,
- struct sk_buff *buf_chain,
- u32 long_msgno)
-{
- struct sk_buff *buf;
- struct tipc_msg *msg;
-
- if (!l_ptr->next_out)
- l_ptr->next_out = buf_chain;
- while (buf_chain) {
- buf = buf_chain;
- buf_chain = buf_chain->next;
-
- msg = buf_msg(buf);
- msg_set_long_msgno(msg, long_msgno);
- link_add_to_outqueue(l_ptr, buf, msg);
+ if (unlikely(imp > TIPC_CRITICAL_IMPORTANCE)) {
+ pr_warn("%s<%s>, send queue full", link_rst_msg, link->name);
+ tipc_link_reset(link);
+ goto drop;
}
+ if (unlikely(msg_errcode(msg)))
+ goto drop;
+ if (unlikely(msg_reroute_cnt(msg)))
+ goto drop;
+ if (TIPC_SKB_CB(buf)->wakeup_pending)
+ return -ELINKCONG;
+ if (link_schedule_user(link, oport, TIPC_SKB_CB(buf)->chain_sz, imp))
+ return -ELINKCONG;
+drop:
+ kfree_skb_list(buf);
+ return -EHOSTUNREACH;
}
-/*
- * tipc_link_xmit() is the 'full path' for messages, called from
- * inside TIPC when the 'fast path' in tipc_send_xmit
- * has failed, and from link_send()
+/**
+ * __tipc_link_xmit(): same as tipc_link_xmit, but destlink is known & locked
+ * @link: link to use
+ * @buf: chain of buffers containing message
+ * Consumes the buffer chain, except when returning -ELINKCONG
+ * Returns 0 if success, otherwise errno: -ELINKCONG, -EMSGSIZE (plain socket
+ * user data messages) or -EHOSTUNREACH (all other messages/senders)
+ * Only the socket functions tipc_send_stream() and tipc_send_packet() need
+ * to act on the return value, since they may need to do more send attempts.
*/
-int __tipc_link_xmit(struct tipc_link *l_ptr, struct sk_buff *buf)
+int __tipc_link_xmit(struct tipc_link *link, struct sk_buff *buf)
{
struct tipc_msg *msg = buf_msg(buf);
- u32 size = msg_size(msg);
- u32 dsz = msg_data_sz(msg);
- u32 queue_size = l_ptr->out_queue_size;
- u32 imp = tipc_msg_tot_importance(msg);
- u32 queue_limit = l_ptr->queue_limit[imp];
- u32 max_packet = l_ptr->max_pkt;
-
- /* Match msg importance against queue limits: */
- if (unlikely(queue_size >= queue_limit)) {
- if (imp <= TIPC_CRITICAL_IMPORTANCE) {
- link_schedule_port(l_ptr, msg_origport(msg), size);
- kfree_skb(buf);
- return -ELINKCONG;
- }
- kfree_skb(buf);
- if (imp > CONN_MANAGER) {
- pr_warn("%s<%s>, send queue full", link_rst_msg,
- l_ptr->name);
- tipc_link_reset(l_ptr);
- }
- return dsz;
+ uint psz = msg_size(msg);
+ uint qsz = link->out_queue_size;
+ uint sndlim = link->queue_limit[0];
+ uint imp = tipc_msg_tot_importance(msg);
+ uint mtu = link->max_pkt;
+ uint ack = mod(link->next_in_no - 1);
+ uint seqno = link->next_out_no;
+ uint bc_last_in = link->owner->bclink.last_in;
+ struct tipc_media_addr *addr = &link->media_addr;
+ struct sk_buff *next = buf->next;
+
+ /* Match queue limits against msg importance: */
+ if (unlikely(qsz >= link->queue_limit[imp]))
+ return tipc_link_cong(link, buf);
+
+ /* Has valid packet limit been used ? */
+ if (unlikely(psz > mtu)) {
+ kfree_skb_list(buf);
+ return -EMSGSIZE;
}
- /* Fragmentation needed ? */
- if (size > max_packet)
- return tipc_link_frag_xmit(l_ptr, buf);
-
- /* Packet can be queued or sent. */
- if (likely(!link_congested(l_ptr))) {
- link_add_to_outqueue(l_ptr, buf, msg);
+ /* Prepare each packet for sending, and add to outqueue: */
+ while (buf) {
+ next = buf->next;
+ msg = buf_msg(buf);
+ msg_set_word(msg, 2, ((ack << 16) | mod(seqno)));
+ msg_set_bcast_ack(msg, bc_last_in);
+
+ if (!link->first_out) {
+ link->first_out = buf;
+ } else if (qsz < sndlim) {
+ link->last_out->next = buf;
+ } else if (tipc_msg_bundle(link->last_out, buf, mtu)) {
+ link->stats.sent_bundled++;
+ buf = next;
+ next = buf->next;
+ continue;
+ } else if (tipc_msg_make_bundle(&buf, mtu, link->addr)) {
+ link->stats.sent_bundled++;
+ link->stats.sent_bundles++;
+ link->last_out->next = buf;
+ if (!link->next_out)
+ link->next_out = buf;
+ } else {
+ link->last_out->next = buf;
+ if (!link->next_out)
+ link->next_out = buf;
+ }
- tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr);
- l_ptr->unacked_window = 0;
- return dsz;
- }
- /* Congestion: can message be bundled ? */
- if ((msg_user(msg) != CHANGEOVER_PROTOCOL) &&
- (msg_user(msg) != MSG_FRAGMENTER)) {
-
- /* Try adding message to an existing bundle */
- if (l_ptr->next_out &&
- link_bundle_buf(l_ptr, l_ptr->last_out, buf))
- return dsz;
-
- /* Try creating a new bundle */
- if (size <= max_packet * 2 / 3) {
- struct sk_buff *bundler = tipc_buf_acquire(max_packet);
- struct tipc_msg bundler_hdr;
-
- if (bundler) {
- tipc_msg_init(&bundler_hdr, MSG_BUNDLER, OPEN_MSG,
- INT_H_SIZE, l_ptr->addr);
- skb_copy_to_linear_data(bundler, &bundler_hdr,
- INT_H_SIZE);
- skb_trim(bundler, INT_H_SIZE);
- link_bundle_buf(l_ptr, bundler, buf);
- buf = bundler;
- msg = buf_msg(buf);
- l_ptr->stats.sent_bundles++;
- }
+ /* Send packet if possible: */
+ if (likely(++qsz <= sndlim)) {
+ tipc_bearer_send(link->bearer_id, buf, addr);
+ link->next_out = next;
+ link->unacked_window = 0;
}
+ seqno++;
+ link->last_out = buf;
+ buf = next;
}
- if (!l_ptr->next_out)
- l_ptr->next_out = buf;
- link_add_to_outqueue(l_ptr, buf, msg);
- return dsz;
+ link->next_out_no = seqno;
+ link->out_queue_size = qsz;
+ return 0;
}
-/*
- * tipc_link_xmit(): same as __tipc_link_xmit(), but the link to use
- * has not been selected yet, and the the owner node is not locked
- * Called by TIPC internal users, e.g. the name distributor
+/**
+ * tipc_link_xmit() is the general link level function for message sending
+ * @buf: chain of buffers containing message
+ * @dsz: amount of user data to be sent
+ * @dnode: address of destination node
+ * @selector: a number used for deterministic link selection
+ * Consumes the buffer chain, except when returning -ELINKCONG
+ * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE
*/
-int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector)
+int tipc_link_xmit(struct sk_buff *buf, u32 dnode, u32 selector)
{
- struct tipc_link *l_ptr;
- struct tipc_node *n_ptr;
- int res = -ELINKCONG;
+ struct tipc_link *link = NULL;
+ struct tipc_node *node;
+ int rc = -EHOSTUNREACH;
- n_ptr = tipc_node_find(dest);
- if (n_ptr) {
- tipc_node_lock(n_ptr);
- l_ptr = n_ptr->active_links[selector & 1];
- if (l_ptr)
- res = __tipc_link_xmit(l_ptr, buf);
- else
- kfree_skb(buf);
- tipc_node_unlock(n_ptr);
- } else {
- kfree_skb(buf);
+ node = tipc_node_find(dnode);
+ if (node) {
+ tipc_node_lock(node);
+ link = node->active_links[selector & 1];
+ if (link)
+ rc = __tipc_link_xmit(link, buf);
+ tipc_node_unlock(node);
}
- return res;
+
+ if (link)
+ return rc;
+
+ if (likely(in_own_node(dnode)))
+ return tipc_sk_rcv(buf);
+
+ kfree_skb_list(buf);
+ return rc;
}
/*
@@ -858,7 +810,7 @@ int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector)
*
* Called with node locked
*/
-static void tipc_link_sync_xmit(struct tipc_link *l)
+static void tipc_link_sync_xmit(struct tipc_link *link)
{
struct sk_buff *buf;
struct tipc_msg *msg;
@@ -868,10 +820,9 @@ static void tipc_link_sync_xmit(struct tipc_link *l)
return;
msg = buf_msg(buf);
- tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, l->addr);
- msg_set_last_bcast(msg, l->owner->bclink.acked);
- link_add_chain_to_outqueue(l, buf, 0);
- tipc_link_push_queue(l);
+ tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, link->addr);
+ msg_set_last_bcast(msg, link->owner->bclink.acked);
+ __tipc_link_xmit(link, buf);
}
/*
@@ -892,293 +843,6 @@ static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf)
}
/*
- * tipc_link_names_xmit - send name table entries to new neighbor
- *
- * Send routine for bulk delivery of name table messages when contact
- * with a new neighbor occurs. No link congestion checking is performed
- * because name table messages *must* be delivered. The messages must be
- * small enough not to require fragmentation.
- * Called without any locks held.
- */
-void tipc_link_names_xmit(struct list_head *message_list, u32 dest)
-{
- struct tipc_node *n_ptr;
- struct tipc_link *l_ptr;
- struct sk_buff *buf;
- struct sk_buff *temp_buf;
-
- if (list_empty(message_list))
- return;
-
- n_ptr = tipc_node_find(dest);
- if (n_ptr) {
- tipc_node_lock(n_ptr);
- l_ptr = n_ptr->active_links[0];
- if (l_ptr) {
- /* convert circular list to linear list */
- ((struct sk_buff *)message_list->prev)->next = NULL;
- link_add_chain_to_outqueue(l_ptr,
- (struct sk_buff *)message_list->next, 0);
- tipc_link_push_queue(l_ptr);
- INIT_LIST_HEAD(message_list);
- }
- tipc_node_unlock(n_ptr);
- }
-
- /* discard the messages if they couldn't be sent */
- list_for_each_safe(buf, temp_buf, ((struct sk_buff *)message_list)) {
- list_del((struct list_head *)buf);
- kfree_skb(buf);
- }
-}
-
-/*
- * tipc_link_xmit_fast: Entry for data messages where the
- * destination link is known and the header is complete,
- * inclusive total message length. Very time critical.
- * Link is locked. Returns user data length.
- */
-static int tipc_link_xmit_fast(struct tipc_link *l_ptr, struct sk_buff *buf,
- u32 *used_max_pkt)
-{
- struct tipc_msg *msg = buf_msg(buf);
- int res = msg_data_sz(msg);
-
- if (likely(!link_congested(l_ptr))) {
- if (likely(msg_size(msg) <= l_ptr->max_pkt)) {
- link_add_to_outqueue(l_ptr, buf, msg);
- tipc_bearer_send(l_ptr->bearer_id, buf,
- &l_ptr->media_addr);
- l_ptr->unacked_window = 0;
- return res;
- }
- else
- *used_max_pkt = l_ptr->max_pkt;
- }
- return __tipc_link_xmit(l_ptr, buf); /* All other cases */
-}
-
-/*
- * tipc_link_iovec_xmit_fast: Entry for messages where the
- * destination processor is known and the header is complete,
- * except for total message length.
- * Returns user data length or errno.
- */
-int tipc_link_iovec_xmit_fast(struct tipc_port *sender,
- struct iovec const *msg_sect,
- unsigned int len, u32 destaddr)
-{
- struct tipc_msg *hdr = &sender->phdr;
- struct tipc_link *l_ptr;
- struct sk_buff *buf;
- struct tipc_node *node;
- int res;
- u32 selector = msg_origport(hdr) & 1;
-
-again:
- /*
- * Try building message using port's max_pkt hint.
- * (Must not hold any locks while building message.)
- */
- res = tipc_msg_build(hdr, msg_sect, len, sender->max_pkt, &buf);
- /* Exit if build request was invalid */
- if (unlikely(res < 0))
- return res;
-
- node = tipc_node_find(destaddr);
- if (likely(node)) {
- tipc_node_lock(node);
- l_ptr = node->active_links[selector];
- if (likely(l_ptr)) {
- if (likely(buf)) {
- res = tipc_link_xmit_fast(l_ptr, buf,
- &sender->max_pkt);
-exit:
- tipc_node_unlock(node);
- return res;
- }
-
- /* Exit if link (or bearer) is congested */
- if (link_congested(l_ptr)) {
- res = link_schedule_port(l_ptr,
- sender->ref, res);
- goto exit;
- }
-
- /*
- * Message size exceeds max_pkt hint; update hint,
- * then re-try fast path or fragment the message
- */
- sender->max_pkt = l_ptr->max_pkt;
- tipc_node_unlock(node);
-
-
- if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt)
- goto again;
-
- return tipc_link_iovec_long_xmit(sender, msg_sect,
- len, destaddr);
- }
- tipc_node_unlock(node);
- }
-
- /* Couldn't find a link to the destination node */
- kfree_skb(buf);
- tipc_port_iovec_reject(sender, hdr, msg_sect, len, TIPC_ERR_NO_NODE);
- return -ENETUNREACH;
-}
-
-/*
- * tipc_link_iovec_long_xmit(): Entry for long messages where the
- * destination node is known and the header is complete,
- * inclusive total message length.
- * Link and bearer congestion status have been checked to be ok,
- * and are ignored if they change.
- *
- * Note that fragments do not use the full link MTU so that they won't have
- * to undergo refragmentation if link changeover causes them to be sent
- * over another link with an additional tunnel header added as prefix.
- * (Refragmentation will still occur if the other link has a smaller MTU.)
- *
- * Returns user data length or errno.
- */
-static int tipc_link_iovec_long_xmit(struct tipc_port *sender,
- struct iovec const *msg_sect,
- unsigned int len, u32 destaddr)
-{
- struct tipc_link *l_ptr;
- struct tipc_node *node;
- struct tipc_msg *hdr = &sender->phdr;
- u32 dsz = len;
- u32 max_pkt, fragm_sz, rest;
- struct tipc_msg fragm_hdr;
- struct sk_buff *buf, *buf_chain, *prev;
- u32 fragm_crs, fragm_rest, hsz, sect_rest;
- const unchar __user *sect_crs;
- int curr_sect;
- u32 fragm_no;
- int res = 0;
-
-again:
- fragm_no = 1;
- max_pkt = sender->max_pkt - INT_H_SIZE;
- /* leave room for tunnel header in case of link changeover */
- fragm_sz = max_pkt - INT_H_SIZE;
- /* leave room for fragmentation header in each fragment */
- rest = dsz;
- fragm_crs = 0;
- fragm_rest = 0;
- sect_rest = 0;
- sect_crs = NULL;
- curr_sect = -1;
-
- /* Prepare reusable fragment header */
- tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT,
- INT_H_SIZE, msg_destnode(hdr));
- msg_set_size(&fragm_hdr, max_pkt);
- msg_set_fragm_no(&fragm_hdr, 1);
-
- /* Prepare header of first fragment */
- buf_chain = buf = tipc_buf_acquire(max_pkt);
- if (!buf)
- return -ENOMEM;
- buf->next = NULL;
- skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE);
- hsz = msg_hdr_sz(hdr);
- skb_copy_to_linear_data_offset(buf, INT_H_SIZE, hdr, hsz);
-
- /* Chop up message */
- fragm_crs = INT_H_SIZE + hsz;
- fragm_rest = fragm_sz - hsz;
-
- do { /* For all sections */
- u32 sz;
-
- if (!sect_rest) {
- sect_rest = msg_sect[++curr_sect].iov_len;
- sect_crs = msg_sect[curr_sect].iov_base;
- }
-
- if (sect_rest < fragm_rest)
- sz = sect_rest;
- else
- sz = fragm_rest;
-
- if (copy_from_user(buf->data + fragm_crs, sect_crs, sz)) {
- res = -EFAULT;
-error:
- kfree_skb_list(buf_chain);
- return res;
- }
- sect_crs += sz;
- sect_rest -= sz;
- fragm_crs += sz;
- fragm_rest -= sz;
- rest -= sz;
-
- if (!fragm_rest && rest) {
-
- /* Initiate new fragment: */
- if (rest <= fragm_sz) {
- fragm_sz = rest;
- msg_set_type(&fragm_hdr, LAST_FRAGMENT);
- } else {
- msg_set_type(&fragm_hdr, FRAGMENT);
- }
- msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE);
- msg_set_fragm_no(&fragm_hdr, ++fragm_no);
- prev = buf;
- buf = tipc_buf_acquire(fragm_sz + INT_H_SIZE);
- if (!buf) {
- res = -ENOMEM;
- goto error;
- }
-
- buf->next = NULL;
- prev->next = buf;
- skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE);
- fragm_crs = INT_H_SIZE;
- fragm_rest = fragm_sz;
- }
- } while (rest > 0);
-
- /*
- * Now we have a buffer chain. Select a link and check
- * that packet size is still OK
- */
- node = tipc_node_find(destaddr);
- if (likely(node)) {
- tipc_node_lock(node);
- l_ptr = node->active_links[sender->ref & 1];
- if (!l_ptr) {
- tipc_node_unlock(node);
- goto reject;
- }
- if (l_ptr->max_pkt < max_pkt) {
- sender->max_pkt = l_ptr->max_pkt;
- tipc_node_unlock(node);
- kfree_skb_list(buf_chain);
- goto again;
- }
- } else {
-reject:
- kfree_skb_list(buf_chain);
- tipc_port_iovec_reject(sender, hdr, msg_sect, len,
- TIPC_ERR_NO_NODE);
- return -ENETUNREACH;
- }
-
- /* Append chain of fragments to send queue & send them */
- l_ptr->long_msg_seq_no++;
- link_add_chain_to_outqueue(l_ptr, buf_chain, l_ptr->long_msg_seq_no);
- l_ptr->stats.sent_fragments += fragm_no;
- l_ptr->stats.sent_fragmented++;
- tipc_link_push_queue(l_ptr);
- tipc_node_unlock(node);
- return dsz;
-}
-
-/*
* tipc_link_push_packet: Push one unsent packet to the media
*/
static u32 tipc_link_push_packet(struct tipc_link *l_ptr)
@@ -1238,7 +902,7 @@ static u32 tipc_link_push_packet(struct tipc_link *l_ptr)
tipc_bearer_send(l_ptr->bearer_id, buf,
&l_ptr->media_addr);
if (msg_user(msg) == MSG_BUNDLER)
- msg_set_type(msg, CLOSED_MSG);
+ msg_set_type(msg, BUNDLE_CLOSED);
l_ptr->next_out = buf->next;
return 0;
}
@@ -1524,12 +1188,9 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr)
if (unlikely(l_ptr->next_out))
tipc_link_push_queue(l_ptr);
- if (unlikely(!list_empty(&l_ptr->waiting_ports)))
- tipc_link_wakeup_ports(l_ptr, 0);
-
- if (unlikely(++l_ptr->unacked_window >= TIPC_MIN_LINK_WIN)) {
- l_ptr->stats.sent_acks++;
- tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0);
+ if (released && !skb_queue_empty(&l_ptr->waiting_sks)) {
+ link_prepare_wakeup(l_ptr);
+ l_ptr->owner->action_flags |= TIPC_WAKEUP_USERS;
}
/* Process the incoming packet */
@@ -1565,57 +1226,19 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr)
if (unlikely(l_ptr->oldest_deferred_in))
head = link_insert_deferred_queue(l_ptr, head);
- /* Deliver packet/message to correct user: */
- if (unlikely(msg_user(msg) == CHANGEOVER_PROTOCOL)) {
- if (!tipc_link_tunnel_rcv(n_ptr, &buf)) {
- tipc_node_unlock(n_ptr);
- continue;
- }
- msg = buf_msg(buf);
- } else if (msg_user(msg) == MSG_FRAGMENTER) {
- l_ptr->stats.recv_fragments++;
- if (tipc_buf_append(&l_ptr->reasm_buf, &buf)) {
- l_ptr->stats.recv_fragmented++;
- msg = buf_msg(buf);
- } else {
- if (!l_ptr->reasm_buf)
- tipc_link_reset(l_ptr);
- tipc_node_unlock(n_ptr);
- continue;
- }
+ if (unlikely(++l_ptr->unacked_window >= TIPC_MIN_LINK_WIN)) {
+ l_ptr->stats.sent_acks++;
+ tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0);
}
- switch (msg_user(msg)) {
- case TIPC_LOW_IMPORTANCE:
- case TIPC_MEDIUM_IMPORTANCE:
- case TIPC_HIGH_IMPORTANCE:
- case TIPC_CRITICAL_IMPORTANCE:
- tipc_node_unlock(n_ptr);
- tipc_sk_rcv(buf);
- continue;
- case MSG_BUNDLER:
- l_ptr->stats.recv_bundles++;
- l_ptr->stats.recv_bundled += msg_msgcnt(msg);
- tipc_node_unlock(n_ptr);
- tipc_link_bundle_rcv(buf);
- continue;
- case NAME_DISTRIBUTOR:
- n_ptr->bclink.recv_permitted = true;
- tipc_node_unlock(n_ptr);
- tipc_named_rcv(buf);
- continue;
- case CONN_MANAGER:
+ if (tipc_link_prepare_input(l_ptr, &buf)) {
tipc_node_unlock(n_ptr);
- tipc_port_proto_rcv(buf);
continue;
- case BCAST_PROTOCOL:
- tipc_link_sync_rcv(n_ptr, buf);
- break;
- default:
- kfree_skb(buf);
- break;
}
tipc_node_unlock(n_ptr);
+ msg = buf_msg(buf);
+ if (tipc_link_input(l_ptr, buf) != 0)
+ goto discard;
continue;
unlock_discard:
tipc_node_unlock(n_ptr);
@@ -1625,6 +1248,80 @@ discard:
}
/**
+ * tipc_link_prepare_input - process TIPC link messages
+ *
+ * returns nonzero if the message was consumed
+ *
+ * Node lock must be held
+ */
+static int tipc_link_prepare_input(struct tipc_link *l, struct sk_buff **buf)
+{
+ struct tipc_node *n;
+ struct tipc_msg *msg;
+ int res = -EINVAL;
+
+ n = l->owner;
+ msg = buf_msg(*buf);
+ switch (msg_user(msg)) {
+ case CHANGEOVER_PROTOCOL:
+ if (tipc_link_tunnel_rcv(n, buf))
+ res = 0;
+ break;
+ case MSG_FRAGMENTER:
+ l->stats.recv_fragments++;
+ if (tipc_buf_append(&l->reasm_buf, buf)) {
+ l->stats.recv_fragmented++;
+ res = 0;
+ } else if (!l->reasm_buf) {
+ tipc_link_reset(l);
+ }
+ break;
+ case MSG_BUNDLER:
+ l->stats.recv_bundles++;
+ l->stats.recv_bundled += msg_msgcnt(msg);
+ res = 0;
+ break;
+ case NAME_DISTRIBUTOR:
+ n->bclink.recv_permitted = true;
+ res = 0;
+ break;
+ case BCAST_PROTOCOL:
+ tipc_link_sync_rcv(n, *buf);
+ break;
+ default:
+ res = 0;
+ }
+ return res;
+}
+/**
+ * tipc_link_input - Deliver message too higher layers
+ */
+static int tipc_link_input(struct tipc_link *l, struct sk_buff *buf)
+{
+ struct tipc_msg *msg = buf_msg(buf);
+ int res = 0;
+
+ switch (msg_user(msg)) {
+ case TIPC_LOW_IMPORTANCE:
+ case TIPC_MEDIUM_IMPORTANCE:
+ case TIPC_HIGH_IMPORTANCE:
+ case TIPC_CRITICAL_IMPORTANCE:
+ case CONN_MANAGER:
+ tipc_sk_rcv(buf);
+ break;
+ case NAME_DISTRIBUTOR:
+ tipc_named_rcv(buf);
+ break;
+ case MSG_BUNDLER:
+ tipc_link_bundle_rcv(buf);
+ break;
+ default:
+ res = -EINVAL;
+ }
+ return res;
+}
+
+/**
* tipc_link_defer_pkt - Add out-of-sequence message to deferred reception queue
*
* Returns increase in queue length (i.e. 0 or 1)
@@ -2217,6 +1914,7 @@ void tipc_link_bundle_rcv(struct sk_buff *buf)
u32 msgcount = msg_msgcnt(buf_msg(buf));
u32 pos = INT_H_SIZE;
struct sk_buff *obuf;
+ struct tipc_msg *omsg;
while (msgcount--) {
obuf = buf_extract(buf, pos);
@@ -2224,82 +1922,18 @@ void tipc_link_bundle_rcv(struct sk_buff *buf)
pr_warn("Link unable to unbundle message(s)\n");
break;
}
- pos += align(msg_size(buf_msg(obuf)));
- tipc_net_route_msg(obuf);
- }
- kfree_skb(buf);
-}
-
-/*
- * Fragmentation/defragmentation:
- */
-
-/*
- * tipc_link_frag_xmit: Entry for buffers needing fragmentation.
- * The buffer is complete, inclusive total message length.
- * Returns user data length.
- */
-static int tipc_link_frag_xmit(struct tipc_link *l_ptr, struct sk_buff *buf)
-{
- struct sk_buff *buf_chain = NULL;
- struct sk_buff *buf_chain_tail = (struct sk_buff *)&buf_chain;
- struct tipc_msg *inmsg = buf_msg(buf);
- struct tipc_msg fragm_hdr;
- u32 insize = msg_size(inmsg);
- u32 dsz = msg_data_sz(inmsg);
- unchar *crs = buf->data;
- u32 rest = insize;
- u32 pack_sz = l_ptr->max_pkt;
- u32 fragm_sz = pack_sz - INT_H_SIZE;
- u32 fragm_no = 0;
- u32 destaddr;
-
- if (msg_short(inmsg))
- destaddr = l_ptr->addr;
- else
- destaddr = msg_destnode(inmsg);
-
- /* Prepare reusable fragment header: */
- tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT,
- INT_H_SIZE, destaddr);
-
- /* Chop up message: */
- while (rest > 0) {
- struct sk_buff *fragm;
-
- if (rest <= fragm_sz) {
- fragm_sz = rest;
- msg_set_type(&fragm_hdr, LAST_FRAGMENT);
- }
- fragm = tipc_buf_acquire(fragm_sz + INT_H_SIZE);
- if (fragm == NULL) {
- kfree_skb(buf);
- kfree_skb_list(buf_chain);
- return -ENOMEM;
+ omsg = buf_msg(obuf);
+ pos += align(msg_size(omsg));
+ if (msg_isdata(omsg) || (msg_user(omsg) == CONN_MANAGER)) {
+ tipc_sk_rcv(obuf);
+ } else if (msg_user(omsg) == NAME_DISTRIBUTOR) {
+ tipc_named_rcv(obuf);
+ } else {
+ pr_warn("Illegal bundled msg: %u\n", msg_user(omsg));
+ kfree_skb(obuf);
}
- msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE);
- fragm_no++;
- msg_set_fragm_no(&fragm_hdr, fragm_no);
- skb_copy_to_linear_data(fragm, &fragm_hdr, INT_H_SIZE);
- skb_copy_to_linear_data_offset(fragm, INT_H_SIZE, crs,
- fragm_sz);
- buf_chain_tail->next = fragm;
- buf_chain_tail = fragm;
-
- rest -= fragm_sz;
- crs += fragm_sz;
- msg_set_type(&fragm_hdr, FRAGMENT);
}
kfree_skb(buf);
-
- /* Append chain of fragments to send queue & send them */
- l_ptr->long_msg_seq_no++;
- link_add_chain_to_outqueue(l_ptr, buf_chain, l_ptr->long_msg_seq_no);
- l_ptr->stats.sent_fragments += fragm_no;
- l_ptr->stats.sent_fragmented++;
- tipc_link_push_queue(l_ptr);
-
- return dsz;
}
static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance)
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 200d518b218e..b567a3427fda 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -1,7 +1,7 @@
/*
* net/tipc/link.h: Include file for TIPC link code
*
- * Copyright (c) 1995-2006, 2013, Ericsson AB
+ * Copyright (c) 1995-2006, 2013-2014, Ericsson AB
* Copyright (c) 2004-2005, 2010-2011, Wind River Systems
* All rights reserved.
*
@@ -133,7 +133,7 @@ struct tipc_stats {
* @retransm_queue_size: number of messages to retransmit
* @retransm_queue_head: sequence number of first message to retransmit
* @next_out: ptr to first unsent outbound message in queue
- * @waiting_ports: linked list of ports waiting for link congestion to abate
+ * @waiting_sks: linked list of sockets waiting for link congestion to abate
* @long_msg_seq_no: next identifier to use for outbound fragmented messages
* @reasm_buf: head of partially reassembled inbound message fragments
* @stats: collects statistics regarding link activity
@@ -194,7 +194,7 @@ struct tipc_link {
u32 retransm_queue_size;
u32 retransm_queue_head;
struct sk_buff *next_out;
- struct list_head waiting_ports;
+ struct sk_buff_head waiting_sks;
/* Fragmentation/reassembly */
u32 long_msg_seq_no;
@@ -227,20 +227,14 @@ void tipc_link_reset_all(struct tipc_node *node);
void tipc_link_reset(struct tipc_link *l_ptr);
void tipc_link_reset_list(unsigned int bearer_id);
int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector);
-void tipc_link_names_xmit(struct list_head *message_list, u32 dest);
-int __tipc_link_xmit(struct tipc_link *l_ptr, struct sk_buff *buf);
-int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf);
+int __tipc_link_xmit(struct tipc_link *link, struct sk_buff *buf);
u32 tipc_link_get_max_pkt(u32 dest, u32 selector);
-int tipc_link_iovec_xmit_fast(struct tipc_port *sender,
- struct iovec const *msg_sect,
- unsigned int len, u32 destnode);
void tipc_link_bundle_rcv(struct sk_buff *buf);
void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob,
u32 gap, u32 tolerance, u32 priority, u32 acked_mtu);
void tipc_link_push_queue(struct tipc_link *l_ptr);
u32 tipc_link_defer_pkt(struct sk_buff **head, struct sk_buff **tail,
struct sk_buff *buf);
-void tipc_link_wakeup_ports(struct tipc_link *l_ptr, int all);
void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window);
void tipc_link_retransmit(struct tipc_link *l_ptr,
struct sk_buff *start, u32 retransmits);
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 0a37a472c29f..74745a47d72a 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -36,21 +36,16 @@
#include "core.h"
#include "msg.h"
+#include "addr.h"
+#include "name_table.h"
-u32 tipc_msg_tot_importance(struct tipc_msg *m)
+#define MAX_FORWARD_SIZE 1024
+
+static unsigned int align(unsigned int i)
{
- if (likely(msg_isdata(m))) {
- if (likely(msg_orignode(m) == tipc_own_addr))
- return msg_importance(m);
- return msg_importance(m) + 4;
- }
- if ((msg_user(m) == MSG_FRAGMENTER) &&
- (msg_type(m) == FIRST_FRAGMENT))
- return msg_importance(msg_get_wrapped(m));
- return msg_importance(m);
+ return (i + 3) & ~3u;
}
-
void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize,
u32 destnode)
{
@@ -61,43 +56,35 @@ void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize,
msg_set_size(m, hsize);
msg_set_prevnode(m, tipc_own_addr);
msg_set_type(m, type);
- msg_set_orignode(m, tipc_own_addr);
- msg_set_destnode(m, destnode);
+ if (hsize > SHORT_H_SIZE) {
+ msg_set_orignode(m, tipc_own_addr);
+ msg_set_destnode(m, destnode);
+ }
}
-/**
- * tipc_msg_build - create message using specified header and data
- *
- * Note: Caller must not hold any locks in case copy_from_user() is interrupted!
- *
- * Returns message data size or errno
- */
-int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect,
- unsigned int len, int max_size, struct sk_buff **buf)
+struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz,
+ uint data_sz, u32 dnode, u32 onode,
+ u32 dport, u32 oport, int errcode)
{
- int dsz, sz, hsz;
- unsigned char *to;
-
- dsz = len;
- hsz = msg_hdr_sz(hdr);
- sz = hsz + dsz;
- msg_set_size(hdr, sz);
- if (unlikely(sz > max_size)) {
- *buf = NULL;
- return dsz;
- }
+ struct tipc_msg *msg;
+ struct sk_buff *buf;
- *buf = tipc_buf_acquire(sz);
- if (!(*buf))
- return -ENOMEM;
- skb_copy_to_linear_data(*buf, hdr, hsz);
- to = (*buf)->data + hsz;
- if (len && memcpy_fromiovecend(to, msg_sect, 0, dsz)) {
- kfree_skb(*buf);
- *buf = NULL;
- return -EFAULT;
+ buf = tipc_buf_acquire(hdr_sz + data_sz);
+ if (unlikely(!buf))
+ return NULL;
+
+ msg = buf_msg(buf);
+ tipc_msg_init(msg, user, type, hdr_sz, dnode);
+ msg_set_size(msg, hdr_sz + data_sz);
+ msg_set_prevnode(msg, onode);
+ msg_set_origport(msg, oport);
+ msg_set_destport(msg, dport);
+ msg_set_errcode(msg, errcode);
+ if (hdr_sz > SHORT_H_SIZE) {
+ msg_set_orignode(msg, onode);
+ msg_set_destnode(msg, dnode);
}
- return dsz;
+ return buf;
}
/* tipc_buf_append(): Append a buffer to the fragment list of another buffer
@@ -112,27 +99,38 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
struct sk_buff *head = *headbuf;
struct sk_buff *frag = *buf;
struct sk_buff *tail;
- struct tipc_msg *msg = buf_msg(frag);
- u32 fragid = msg_type(msg);
- bool headstolen;
+ struct tipc_msg *msg;
+ u32 fragid;
int delta;
+ bool headstolen;
+
+ if (!frag)
+ goto err;
+ msg = buf_msg(frag);
+ fragid = msg_type(msg);
+ frag->next = NULL;
skb_pull(frag, msg_hdr_sz(msg));
if (fragid == FIRST_FRAGMENT) {
- if (head || skb_unclone(frag, GFP_ATOMIC))
- goto out_free;
+ if (unlikely(head))
+ goto err;
+ if (unlikely(skb_unclone(frag, GFP_ATOMIC)))
+ goto err;
head = *headbuf = frag;
skb_frag_list_init(head);
+ TIPC_SKB_CB(head)->tail = NULL;
*buf = NULL;
return 0;
}
+
if (!head)
- goto out_free;
- tail = TIPC_SKB_CB(head)->tail;
+ goto err;
+
if (skb_try_coalesce(head, frag, &headstolen, &delta)) {
kfree_skb_partial(frag, headstolen);
} else {
+ tail = TIPC_SKB_CB(head)->tail;
if (!skb_has_frag_list(head))
skb_shinfo(head)->frag_list = frag;
else
@@ -142,6 +140,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
head->len += frag->len;
TIPC_SKB_CB(head)->tail = frag;
}
+
if (fragid == LAST_FRAGMENT) {
*buf = head;
TIPC_SKB_CB(head)->tail = NULL;
@@ -150,10 +149,314 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
}
*buf = NULL;
return 0;
-out_free:
+
+err:
pr_warn_ratelimited("Unable to build fragment list\n");
kfree_skb(*buf);
kfree_skb(*headbuf);
*buf = *headbuf = NULL;
return 0;
}
+
+
+/**
+ * tipc_msg_build - create buffer chain containing specified header and data
+ * @mhdr: Message header, to be prepended to data
+ * @iov: User data
+ * @offset: Posision in iov to start copying from
+ * @dsz: Total length of user data
+ * @pktmax: Max packet size that can be used
+ * @chain: Buffer or chain of buffers to be returned to caller
+ * Returns message data size or errno: -ENOMEM, -EFAULT
+ */
+int tipc_msg_build(struct tipc_msg *mhdr, struct iovec const *iov,
+ int offset, int dsz, int pktmax , struct sk_buff **chain)
+{
+ int mhsz = msg_hdr_sz(mhdr);
+ int msz = mhsz + dsz;
+ int pktno = 1;
+ int pktsz;
+ int pktrem = pktmax;
+ int drem = dsz;
+ struct tipc_msg pkthdr;
+ struct sk_buff *buf, *prev;
+ char *pktpos;
+ int rc;
+ uint chain_sz = 0;
+ msg_set_size(mhdr, msz);
+
+ /* No fragmentation needed? */
+ if (likely(msz <= pktmax)) {
+ buf = tipc_buf_acquire(msz);
+ *chain = buf;
+ if (unlikely(!buf))
+ return -ENOMEM;
+ skb_copy_to_linear_data(buf, mhdr, mhsz);
+ pktpos = buf->data + mhsz;
+ TIPC_SKB_CB(buf)->chain_sz = 1;
+ if (!dsz || !memcpy_fromiovecend(pktpos, iov, offset, dsz))
+ return dsz;
+ rc = -EFAULT;
+ goto error;
+ }
+
+ /* Prepare reusable fragment header */
+ tipc_msg_init(&pkthdr, MSG_FRAGMENTER, FIRST_FRAGMENT,
+ INT_H_SIZE, msg_destnode(mhdr));
+ msg_set_size(&pkthdr, pktmax);
+ msg_set_fragm_no(&pkthdr, pktno);
+
+ /* Prepare first fragment */
+ *chain = buf = tipc_buf_acquire(pktmax);
+ if (!buf)
+ return -ENOMEM;
+ chain_sz = 1;
+ pktpos = buf->data;
+ skb_copy_to_linear_data(buf, &pkthdr, INT_H_SIZE);
+ pktpos += INT_H_SIZE;
+ pktrem -= INT_H_SIZE;
+ skb_copy_to_linear_data_offset(buf, INT_H_SIZE, mhdr, mhsz);
+ pktpos += mhsz;
+ pktrem -= mhsz;
+
+ do {
+ if (drem < pktrem)
+ pktrem = drem;
+
+ if (memcpy_fromiovecend(pktpos, iov, offset, pktrem)) {
+ rc = -EFAULT;
+ goto error;
+ }
+ drem -= pktrem;
+ offset += pktrem;
+
+ if (!drem)
+ break;
+
+ /* Prepare new fragment: */
+ if (drem < (pktmax - INT_H_SIZE))
+ pktsz = drem + INT_H_SIZE;
+ else
+ pktsz = pktmax;
+ prev = buf;
+ buf = tipc_buf_acquire(pktsz);
+ if (!buf) {
+ rc = -ENOMEM;
+ goto error;
+ }
+ chain_sz++;
+ prev->next = buf;
+ msg_set_type(&pkthdr, FRAGMENT);
+ msg_set_size(&pkthdr, pktsz);
+ msg_set_fragm_no(&pkthdr, ++pktno);
+ skb_copy_to_linear_data(buf, &pkthdr, INT_H_SIZE);
+ pktpos = buf->data + INT_H_SIZE;
+ pktrem = pktsz - INT_H_SIZE;
+
+ } while (1);
+ TIPC_SKB_CB(*chain)->chain_sz = chain_sz;
+ msg_set_type(buf_msg(buf), LAST_FRAGMENT);
+ return dsz;
+error:
+ kfree_skb_list(*chain);
+ *chain = NULL;
+ return rc;
+}
+
+/**
+ * tipc_msg_bundle(): Append contents of a buffer to tail of an existing one
+ * @bbuf: the existing buffer ("bundle")
+ * @buf: buffer to be appended
+ * @mtu: max allowable size for the bundle buffer
+ * Consumes buffer if successful
+ * Returns true if bundling could be performed, otherwise false
+ */
+bool tipc_msg_bundle(struct sk_buff *bbuf, struct sk_buff *buf, u32 mtu)
+{
+ struct tipc_msg *bmsg = buf_msg(bbuf);
+ struct tipc_msg *msg = buf_msg(buf);
+ unsigned int bsz = msg_size(bmsg);
+ unsigned int msz = msg_size(msg);
+ u32 start = align(bsz);
+ u32 max = mtu - INT_H_SIZE;
+ u32 pad = start - bsz;
+
+ if (likely(msg_user(msg) == MSG_FRAGMENTER))
+ return false;
+ if (unlikely(msg_user(msg) == CHANGEOVER_PROTOCOL))
+ return false;
+ if (unlikely(msg_user(msg) == BCAST_PROTOCOL))
+ return false;
+ if (likely(msg_user(bmsg) != MSG_BUNDLER))
+ return false;
+ if (likely(msg_type(bmsg) != BUNDLE_OPEN))
+ return false;
+ if (unlikely(skb_tailroom(bbuf) < (pad + msz)))
+ return false;
+ if (unlikely(max < (start + msz)))
+ return false;
+
+ skb_put(bbuf, pad + msz);
+ skb_copy_to_linear_data_offset(bbuf, start, buf->data, msz);
+ msg_set_size(bmsg, start + msz);
+ msg_set_msgcnt(bmsg, msg_msgcnt(bmsg) + 1);
+ bbuf->next = buf->next;
+ kfree_skb(buf);
+ return true;
+}
+
+/**
+ * tipc_msg_make_bundle(): Create bundle buf and append message to its tail
+ * @buf: buffer to be appended and replaced
+ * @mtu: max allowable size for the bundle buffer, inclusive header
+ * @dnode: destination node for message. (Not always present in header)
+ * Replaces buffer if successful
+ * Returns true if sucess, otherwise false
+ */
+bool tipc_msg_make_bundle(struct sk_buff **buf, u32 mtu, u32 dnode)
+{
+ struct sk_buff *bbuf;
+ struct tipc_msg *bmsg;
+ struct tipc_msg *msg = buf_msg(*buf);
+ u32 msz = msg_size(msg);
+ u32 max = mtu - INT_H_SIZE;
+
+ if (msg_user(msg) == MSG_FRAGMENTER)
+ return false;
+ if (msg_user(msg) == CHANGEOVER_PROTOCOL)
+ return false;
+ if (msg_user(msg) == BCAST_PROTOCOL)
+ return false;
+ if (msz > (max / 2))
+ return false;
+
+ bbuf = tipc_buf_acquire(max);
+ if (!bbuf)
+ return false;
+
+ skb_trim(bbuf, INT_H_SIZE);
+ bmsg = buf_msg(bbuf);
+ tipc_msg_init(bmsg, MSG_BUNDLER, BUNDLE_OPEN, INT_H_SIZE, dnode);
+ msg_set_seqno(bmsg, msg_seqno(msg));
+ msg_set_ack(bmsg, msg_ack(msg));
+ msg_set_bcast_ack(bmsg, msg_bcast_ack(msg));
+ bbuf->next = (*buf)->next;
+ tipc_msg_bundle(bbuf, *buf, mtu);
+ *buf = bbuf;
+ return true;
+}
+
+/**
+ * tipc_msg_reverse(): swap source and destination addresses and add error code
+ * @buf: buffer containing message to be reversed
+ * @dnode: return value: node where to send message after reversal
+ * @err: error code to be set in message
+ * Consumes buffer if failure
+ * Returns true if success, otherwise false
+ */
+bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err)
+{
+ struct tipc_msg *msg = buf_msg(buf);
+ uint imp = msg_importance(msg);
+ struct tipc_msg ohdr;
+ uint rdsz = min_t(uint, msg_data_sz(msg), MAX_FORWARD_SIZE);
+
+ if (skb_linearize(buf))
+ goto exit;
+ if (msg_dest_droppable(msg))
+ goto exit;
+ if (msg_errcode(msg))
+ goto exit;
+
+ memcpy(&ohdr, msg, msg_hdr_sz(msg));
+ imp = min_t(uint, imp + 1, TIPC_CRITICAL_IMPORTANCE);
+ if (msg_isdata(msg))
+ msg_set_importance(msg, imp);
+ msg_set_errcode(msg, err);
+ msg_set_origport(msg, msg_destport(&ohdr));
+ msg_set_destport(msg, msg_origport(&ohdr));
+ msg_set_prevnode(msg, tipc_own_addr);
+ if (!msg_short(msg)) {
+ msg_set_orignode(msg, msg_destnode(&ohdr));
+ msg_set_destnode(msg, msg_orignode(&ohdr));
+ }
+ msg_set_size(msg, msg_hdr_sz(msg) + rdsz);
+ skb_trim(buf, msg_size(msg));
+ skb_orphan(buf);
+ *dnode = msg_orignode(&ohdr);
+ return true;
+exit:
+ kfree_skb(buf);
+ return false;
+}
+
+/**
+ * tipc_msg_eval: determine fate of message that found no destination
+ * @buf: the buffer containing the message.
+ * @dnode: return value: next-hop node, if message to be forwarded
+ * @err: error code to use, if message to be rejected
+ *
+ * Does not consume buffer
+ * Returns 0 (TIPC_OK) if message ok and we can try again, -TIPC error
+ * code if message to be rejected
+ */
+int tipc_msg_eval(struct sk_buff *buf, u32 *dnode)
+{
+ struct tipc_msg *msg = buf_msg(buf);
+ u32 dport;
+
+ if (msg_type(msg) != TIPC_NAMED_MSG)
+ return -TIPC_ERR_NO_PORT;
+ if (skb_linearize(buf))
+ return -TIPC_ERR_NO_NAME;
+ if (msg_data_sz(msg) > MAX_FORWARD_SIZE)
+ return -TIPC_ERR_NO_NAME;
+ if (msg_reroute_cnt(msg) > 0)
+ return -TIPC_ERR_NO_NAME;
+
+ *dnode = addr_domain(msg_lookup_scope(msg));
+ dport = tipc_nametbl_translate(msg_nametype(msg),
+ msg_nameinst(msg),
+ dnode);
+ if (!dport)
+ return -TIPC_ERR_NO_NAME;
+ msg_incr_reroute_cnt(msg);
+ msg_set_destnode(msg, *dnode);
+ msg_set_destport(msg, dport);
+ return TIPC_OK;
+}
+
+/* tipc_msg_reassemble() - clone a buffer chain of fragments and
+ * reassemble the clones into one message
+ */
+struct sk_buff *tipc_msg_reassemble(struct sk_buff *chain)
+{
+ struct sk_buff *buf = chain;
+ struct sk_buff *frag = buf;
+ struct sk_buff *head = NULL;
+ int hdr_sz;
+
+ /* Copy header if single buffer */
+ if (!buf->next) {
+ hdr_sz = skb_headroom(buf) + msg_hdr_sz(buf_msg(buf));
+ return __pskb_copy(buf, hdr_sz, GFP_ATOMIC);
+ }
+
+ /* Clone all fragments and reassemble */
+ while (buf) {
+ frag = skb_clone(buf, GFP_ATOMIC);
+ if (!frag)
+ goto error;
+ frag->next = NULL;
+ if (tipc_buf_append(&head, &frag))
+ break;
+ if (!head)
+ goto error;
+ buf = buf->next;
+ }
+ return frag;
+error:
+ pr_warn("Failed do clone local mcast rcv buffer\n");
+ kfree_skb(head);
+ return NULL;
+}
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 503511903d1d..0ea7b695ac4d 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -442,6 +442,7 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m)
#define NAME_DISTRIBUTOR 11
#define MSG_FRAGMENTER 12
#define LINK_CONFIG 13
+#define SOCK_WAKEUP 14 /* pseudo user */
/*
* Connection management protocol message types
@@ -463,6 +464,11 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m)
#define FRAGMENT 1
#define LAST_FRAGMENT 2
+/* Bundling protocol message types
+ */
+#define BUNDLE_OPEN 0
+#define BUNDLE_CLOSED 1
+
/*
* Link management protocol message types
*/
@@ -706,12 +712,40 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n)
msg_set_bits(m, 9, 0, 0xffff, n);
}
-u32 tipc_msg_tot_importance(struct tipc_msg *m);
+static inline u32 tipc_msg_tot_importance(struct tipc_msg *m)
+{
+ if ((msg_user(m) == MSG_FRAGMENTER) && (msg_type(m) == FIRST_FRAGMENT))
+ return msg_importance(msg_get_wrapped(m));
+ return msg_importance(m);
+}
+
+static inline u32 msg_tot_origport(struct tipc_msg *m)
+{
+ if ((msg_user(m) == MSG_FRAGMENTER) && (msg_type(m) == FIRST_FRAGMENT))
+ return msg_origport(msg_get_wrapped(m));
+ return msg_origport(m);
+}
+
+bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err);
+
+int tipc_msg_eval(struct sk_buff *buf, u32 *dnode);
+
void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize,
u32 destnode);
-int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect,
- unsigned int len, int max_size, struct sk_buff **buf);
+
+struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz,
+ uint data_sz, u32 dnode, u32 onode,
+ u32 dport, u32 oport, int errcode);
int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf);
+bool tipc_msg_bundle(struct sk_buff *bbuf, struct sk_buff *buf, u32 mtu);
+
+bool tipc_msg_make_bundle(struct sk_buff **buf, u32 mtu, u32 dnode);
+
+int tipc_msg_build(struct tipc_msg *mhdr, struct iovec const *iov,
+ int offset, int dsz, int mtu , struct sk_buff **chain);
+
+struct sk_buff *tipc_msg_reassemble(struct sk_buff *chain);
+
#endif
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 8ce730984aa1..376d2bb51d8d 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -1,7 +1,7 @@
/*
* net/tipc/name_distr.c: TIPC name distribution code
*
- * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2000-2006, 2014, Ericsson AB
* Copyright (c) 2005, 2010-2011, Wind River Systems
* All rights reserved.
*
@@ -71,6 +71,21 @@ static struct publ_list *publ_lists[] = {
};
+int sysctl_tipc_named_timeout __read_mostly = 2000;
+
+/**
+ * struct tipc_dist_queue - queue holding deferred name table updates
+ */
+static struct list_head tipc_dist_queue = LIST_HEAD_INIT(tipc_dist_queue);
+
+struct distr_queue_item {
+ struct distr_item i;
+ u32 dtype;
+ u32 node;
+ unsigned long expires;
+ struct list_head next;
+};
+
/**
* publ_to_item - add publication info to a publication message
*/
@@ -101,24 +116,22 @@ static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest)
void named_cluster_distribute(struct sk_buff *buf)
{
- struct sk_buff *buf_copy;
- struct tipc_node *n_ptr;
- struct tipc_link *l_ptr;
+ struct sk_buff *obuf;
+ struct tipc_node *node;
+ u32 dnode;
rcu_read_lock();
- list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) {
- tipc_node_lock(n_ptr);
- l_ptr = n_ptr->active_links[n_ptr->addr & 1];
- if (l_ptr) {
- buf_copy = skb_copy(buf, GFP_ATOMIC);
- if (!buf_copy) {
- tipc_node_unlock(n_ptr);
- break;
- }
- msg_set_destnode(buf_msg(buf_copy), n_ptr->addr);
- __tipc_link_xmit(l_ptr, buf_copy);
- }
- tipc_node_unlock(n_ptr);
+ list_for_each_entry_rcu(node, &tipc_node_list, list) {
+ dnode = node->addr;
+ if (in_own_node(dnode))
+ continue;
+ if (!tipc_node_active_links(node))
+ continue;
+ obuf = skb_copy(buf, GFP_ATOMIC);
+ if (!obuf)
+ break;
+ msg_set_destnode(buf_msg(obuf), dnode);
+ tipc_link_xmit(obuf, dnode, dnode);
}
rcu_read_unlock();
@@ -175,34 +188,44 @@ struct sk_buff *tipc_named_withdraw(struct publication *publ)
return buf;
}
-/*
+/**
* named_distribute - prepare name info for bulk distribution to another node
+ * @msg_list: list of messages (buffers) to be returned from this function
+ * @dnode: node to be updated
+ * @pls: linked list of publication items to be packed into buffer chain
*/
-static void named_distribute(struct list_head *message_list, u32 node,
- struct publ_list *pls, u32 max_item_buf)
+static void named_distribute(struct list_head *msg_list, u32 dnode,
+ struct publ_list *pls)
{
struct publication *publ;
struct sk_buff *buf = NULL;
struct distr_item *item = NULL;
- u32 left = 0;
- u32 rest = pls->size * ITEM_SIZE;
+ uint dsz = pls->size * ITEM_SIZE;
+ uint msg_dsz = (tipc_node_get_mtu(dnode, 0) / ITEM_SIZE) * ITEM_SIZE;
+ uint rem = dsz;
+ uint msg_rem = 0;
list_for_each_entry(publ, &pls->list, local_list) {
+ /* Prepare next buffer: */
if (!buf) {
- left = (rest <= max_item_buf) ? rest : max_item_buf;
- rest -= left;
- buf = named_prepare_buf(PUBLICATION, left, node);
+ msg_rem = min_t(uint, rem, msg_dsz);
+ rem -= msg_rem;
+ buf = named_prepare_buf(PUBLICATION, msg_rem, dnode);
if (!buf) {
pr_warn("Bulk publication failure\n");
return;
}
item = (struct distr_item *)msg_data(buf_msg(buf));
}
+
+ /* Pack publication into message: */
publ_to_item(item, publ);
item++;
- left -= ITEM_SIZE;
- if (!left) {
- list_add_tail((struct list_head *)buf, message_list);
+ msg_rem -= ITEM_SIZE;
+
+ /* Append full buffer to list: */
+ if (!msg_rem) {
+ list_add_tail((struct list_head *)buf, msg_list);
buf = NULL;
}
}
@@ -211,16 +234,20 @@ static void named_distribute(struct list_head *message_list, u32 node,
/**
* tipc_named_node_up - tell specified node about all publications by this node
*/
-void tipc_named_node_up(u32 max_item_buf, u32 node)
+void tipc_named_node_up(u32 dnode)
{
- LIST_HEAD(message_list);
+ LIST_HEAD(msg_list);
+ struct sk_buff *buf_chain;
read_lock_bh(&tipc_nametbl_lock);
- named_distribute(&message_list, node, &publ_cluster, max_item_buf);
- named_distribute(&message_list, node, &publ_zone, max_item_buf);
+ named_distribute(&msg_list, dnode, &publ_cluster);
+ named_distribute(&msg_list, dnode, &publ_zone);
read_unlock_bh(&tipc_nametbl_lock);
- tipc_link_names_xmit(&message_list, node);
+ /* Convert circular list to linear list and send: */
+ buf_chain = (struct sk_buff *)msg_list.next;
+ ((struct sk_buff *)msg_list.prev)->next = NULL;
+ tipc_link_xmit(buf_chain, dnode, dnode);
}
/**
@@ -251,54 +278,105 @@ static void named_purge_publ(struct publication *publ)
}
/**
+ * tipc_update_nametbl - try to process a nametable update and notify
+ * subscribers
+ *
+ * tipc_nametbl_lock must be held.
+ * Returns the publication item if successful, otherwise NULL.
+ */
+static bool tipc_update_nametbl(struct distr_item *i, u32 node, u32 dtype)
+{
+ struct publication *publ = NULL;
+
+ if (dtype == PUBLICATION) {
+ publ = tipc_nametbl_insert_publ(ntohl(i->type), ntohl(i->lower),
+ ntohl(i->upper),
+ TIPC_CLUSTER_SCOPE, node,
+ ntohl(i->ref), ntohl(i->key));
+ if (publ) {
+ tipc_nodesub_subscribe(&publ->subscr, node, publ,
+ (net_ev_handler)
+ named_purge_publ);
+ return true;
+ }
+ } else if (dtype == WITHDRAWAL) {
+ publ = tipc_nametbl_remove_publ(ntohl(i->type), ntohl(i->lower),
+ node, ntohl(i->ref),
+ ntohl(i->key));
+ if (publ) {
+ tipc_nodesub_unsubscribe(&publ->subscr);
+ kfree(publ);
+ return true;
+ }
+ } else {
+ pr_warn("Unrecognized name table message received\n");
+ }
+ return false;
+}
+
+/**
+ * tipc_named_add_backlog - add a failed name table update to the backlog
+ *
+ */
+static void tipc_named_add_backlog(struct distr_item *i, u32 type, u32 node)
+{
+ struct distr_queue_item *e;
+ unsigned long now = get_jiffies_64();
+
+ e = kzalloc(sizeof(*e), GFP_ATOMIC);
+ if (!e)
+ return;
+ e->dtype = type;
+ e->node = node;
+ e->expires = now + msecs_to_jiffies(sysctl_tipc_named_timeout);
+ memcpy(e, i, sizeof(*i));
+ list_add_tail(&e->next, &tipc_dist_queue);
+}
+
+/**
+ * tipc_named_process_backlog - try to process any pending name table updates
+ * from the network.
+ */
+void tipc_named_process_backlog(void)
+{
+ struct distr_queue_item *e, *tmp;
+ char addr[16];
+ unsigned long now = get_jiffies_64();
+
+ list_for_each_entry_safe(e, tmp, &tipc_dist_queue, next) {
+ if (time_after(e->expires, now)) {
+ if (!tipc_update_nametbl(&e->i, e->node, e->dtype))
+ continue;
+ } else {
+ tipc_addr_string_fill(addr, e->node);
+ pr_warn_ratelimited("Dropping name table update (%d) of {%u, %u, %u} from %s key=%u\n",
+ e->dtype, ntohl(e->i.type),
+ ntohl(e->i.lower),
+ ntohl(e->i.upper),
+ addr, ntohl(e->i.key));
+ }
+ list_del(&e->next);
+ kfree(e);
+ }
+}
+
+/**
* tipc_named_rcv - process name table update message sent by another node
*/
void tipc_named_rcv(struct sk_buff *buf)
{
- struct publication *publ;
struct tipc_msg *msg = buf_msg(buf);
struct distr_item *item = (struct distr_item *)msg_data(msg);
u32 count = msg_data_sz(msg) / ITEM_SIZE;
+ u32 node = msg_orignode(msg);
write_lock_bh(&tipc_nametbl_lock);
while (count--) {
- if (msg_type(msg) == PUBLICATION) {
- publ = tipc_nametbl_insert_publ(ntohl(item->type),
- ntohl(item->lower),
- ntohl(item->upper),
- TIPC_CLUSTER_SCOPE,
- msg_orignode(msg),
- ntohl(item->ref),
- ntohl(item->key));
- if (publ) {
- tipc_nodesub_subscribe(&publ->subscr,
- msg_orignode(msg),
- publ,
- (net_ev_handler)
- named_purge_publ);
- }
- } else if (msg_type(msg) == WITHDRAWAL) {
- publ = tipc_nametbl_remove_publ(ntohl(item->type),
- ntohl(item->lower),
- msg_orignode(msg),
- ntohl(item->ref),
- ntohl(item->key));
-
- if (publ) {
- tipc_nodesub_unsubscribe(&publ->subscr);
- kfree(publ);
- } else {
- pr_err("Unable to remove publication by node 0x%x\n"
- " (type=%u, lower=%u, ref=%u, key=%u)\n",
- msg_orignode(msg), ntohl(item->type),
- ntohl(item->lower), ntohl(item->ref),
- ntohl(item->key));
- }
- } else {
- pr_warn("Unrecognized name table message received\n");
- }
+ if (!tipc_update_nametbl(item, node, msg_type(msg)))
+ tipc_named_add_backlog(item, msg_type(msg), node);
item++;
}
+ tipc_named_process_backlog();
write_unlock_bh(&tipc_nametbl_lock);
kfree_skb(buf);
}
diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h
index b2eed4ec1526..b9e75feb3434 100644
--- a/net/tipc/name_distr.h
+++ b/net/tipc/name_distr.h
@@ -70,8 +70,9 @@ struct distr_item {
struct sk_buff *tipc_named_publish(struct publication *publ);
struct sk_buff *tipc_named_withdraw(struct publication *publ);
void named_cluster_distribute(struct sk_buff *buf);
-void tipc_named_node_up(u32 max_item_buf, u32 node);
+void tipc_named_node_up(u32 dnode);
void tipc_named_rcv(struct sk_buff *buf);
void tipc_named_reinit(void);
+void tipc_named_process_backlog(void);
#endif
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 9d7d37d95187..3a6a0a7c0759 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -39,7 +39,6 @@
#include "name_table.h"
#include "name_distr.h"
#include "subscr.h"
-#include "port.h"
#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */
@@ -262,8 +261,6 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
/* Lower end overlaps existing entry => need an exact match */
if ((sseq->lower != lower) || (sseq->upper != upper)) {
- pr_warn("Cannot publish {%u,%u,%u}, overlap error\n",
- type, lower, upper);
return NULL;
}
@@ -285,8 +282,6 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
/* Fail if upper end overlaps into an existing entry */
if ((inspos < nseq->first_free) &&
(upper >= nseq->sseqs[inspos].lower)) {
- pr_warn("Cannot publish {%u,%u,%u}, overlap error\n",
- type, lower, upper);
return NULL;
}
@@ -678,6 +673,8 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper,
if (likely(publ)) {
table.local_publ_count++;
buf = tipc_named_publish(publ);
+ /* Any pending external events? */
+ tipc_named_process_backlog();
}
write_unlock_bh(&tipc_nametbl_lock);
@@ -699,6 +696,8 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key)
if (likely(publ)) {
table.local_publ_count--;
buf = tipc_named_withdraw(publ);
+ /* Any pending external events? */
+ tipc_named_process_backlog();
write_unlock_bh(&tipc_nametbl_lock);
list_del_init(&publ->pport_list);
kfree(publ);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index f64375e7f99f..93b9944a6a8b 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -1,7 +1,7 @@
/*
* net/tipc/net.c: TIPC network routing code
*
- * Copyright (c) 1995-2006, Ericsson AB
+ * Copyright (c) 1995-2006, 2014, Ericsson AB
* Copyright (c) 2005, 2010-2011, Wind River Systems
* All rights reserved.
*
@@ -38,7 +38,6 @@
#include "net.h"
#include "name_distr.h"
#include "subscr.h"
-#include "port.h"
#include "socket.h"
#include "node.h"
#include "config.h"
@@ -104,67 +103,6 @@
* - A local spin_lock protecting the queue of subscriber events.
*/
-static void net_route_named_msg(struct sk_buff *buf)
-{
- struct tipc_msg *msg = buf_msg(buf);
- u32 dnode;
- u32 dport;
-
- if (!msg_named(msg)) {
- kfree_skb(buf);
- return;
- }
-
- dnode = addr_domain(msg_lookup_scope(msg));
- dport = tipc_nametbl_translate(msg_nametype(msg), msg_nameinst(msg), &dnode);
- if (dport) {
- msg_set_destnode(msg, dnode);
- msg_set_destport(msg, dport);
- tipc_net_route_msg(buf);
- return;
- }
- tipc_reject_msg(buf, TIPC_ERR_NO_NAME);
-}
-
-void tipc_net_route_msg(struct sk_buff *buf)
-{
- struct tipc_msg *msg;
- u32 dnode;
-
- if (!buf)
- return;
- msg = buf_msg(buf);
-
- /* Handle message for this node */
- dnode = msg_short(msg) ? tipc_own_addr : msg_destnode(msg);
- if (tipc_in_scope(dnode, tipc_own_addr)) {
- if (msg_isdata(msg)) {
- if (msg_mcast(msg))
- tipc_port_mcast_rcv(buf, NULL);
- else if (msg_destport(msg))
- tipc_sk_rcv(buf);
- else
- net_route_named_msg(buf);
- return;
- }
- switch (msg_user(msg)) {
- case NAME_DISTRIBUTOR:
- tipc_named_rcv(buf);
- break;
- case CONN_MANAGER:
- tipc_port_proto_rcv(buf);
- break;
- default:
- kfree_skb(buf);
- }
- return;
- }
-
- /* Handle message for another node */
- skb_trim(buf, msg_size(msg));
- tipc_link_xmit(buf, dnode, msg_link_selector(msg));
-}
-
int tipc_net_start(u32 addr)
{
char addr_string[16];
@@ -172,7 +110,7 @@ int tipc_net_start(u32 addr)
tipc_own_addr = addr;
tipc_named_reinit();
- tipc_port_reinit();
+ tipc_sk_reinit();
res = tipc_bclink_init();
if (res)
return res;
diff --git a/net/tipc/net.h b/net/tipc/net.h
index c6c2b46f7c28..59ef3388be2c 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -37,8 +37,6 @@
#ifndef _TIPC_NET_H
#define _TIPC_NET_H
-void tipc_net_route_msg(struct sk_buff *buf);
-
int tipc_net_start(u32 addr);
void tipc_net_stop(void);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 5b44c3041be4..90cee4a6fce4 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1,7 +1,7 @@
/*
* net/tipc/node.c: TIPC node management routines
*
- * Copyright (c) 2000-2006, 2012 Ericsson AB
+ * Copyright (c) 2000-2006, 2012-2014, Ericsson AB
* Copyright (c) 2005-2006, 2010-2014, Wind River Systems
* All rights reserved.
*
@@ -38,6 +38,7 @@
#include "config.h"
#include "node.h"
#include "name_distr.h"
+#include "socket.h"
#define NODE_HTABLE_SIZE 512
@@ -50,6 +51,13 @@ static u32 tipc_num_nodes;
static u32 tipc_num_links;
static DEFINE_SPINLOCK(node_list_lock);
+struct tipc_sock_conn {
+ u32 port;
+ u32 peer_port;
+ u32 peer_node;
+ struct list_head list;
+};
+
/*
* A trivial power-of-two bitmask technique is used for speed, since this
* operation is done for every incoming TIPC packet. The number of hash table
@@ -100,6 +108,8 @@ struct tipc_node *tipc_node_create(u32 addr)
INIT_HLIST_NODE(&n_ptr->hash);
INIT_LIST_HEAD(&n_ptr->list);
INIT_LIST_HEAD(&n_ptr->nsub);
+ INIT_LIST_HEAD(&n_ptr->conn_sks);
+ __skb_queue_head_init(&n_ptr->waiting_sks);
hlist_add_head_rcu(&n_ptr->hash, &node_htable[tipc_hashfn(addr)]);
@@ -136,6 +146,71 @@ void tipc_node_stop(void)
spin_unlock_bh(&node_list_lock);
}
+int tipc_node_add_conn(u32 dnode, u32 port, u32 peer_port)
+{
+ struct tipc_node *node;
+ struct tipc_sock_conn *conn;
+
+ if (in_own_node(dnode))
+ return 0;
+
+ node = tipc_node_find(dnode);
+ if (!node) {
+ pr_warn("Connecting sock to node 0x%x failed\n", dnode);
+ return -EHOSTUNREACH;
+ }
+ conn = kmalloc(sizeof(*conn), GFP_ATOMIC);
+ if (!conn)
+ return -EHOSTUNREACH;
+ conn->peer_node = dnode;
+ conn->port = port;
+ conn->peer_port = peer_port;
+
+ tipc_node_lock(node);
+ list_add_tail(&conn->list, &node->conn_sks);
+ tipc_node_unlock(node);
+ return 0;
+}
+
+void tipc_node_remove_conn(u32 dnode, u32 port)
+{
+ struct tipc_node *node;
+ struct tipc_sock_conn *conn, *safe;
+
+ if (in_own_node(dnode))
+ return;
+
+ node = tipc_node_find(dnode);
+ if (!node)
+ return;
+
+ tipc_node_lock(node);
+ list_for_each_entry_safe(conn, safe, &node->conn_sks, list) {
+ if (port != conn->port)
+ continue;
+ list_del(&conn->list);
+ kfree(conn);
+ }
+ tipc_node_unlock(node);
+}
+
+void tipc_node_abort_sock_conns(struct list_head *conns)
+{
+ struct tipc_sock_conn *conn, *safe;
+ struct sk_buff *buf;
+
+ list_for_each_entry_safe(conn, safe, conns, list) {
+ buf = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
+ SHORT_H_SIZE, 0, tipc_own_addr,
+ conn->peer_node, conn->port,
+ conn->peer_port, TIPC_ERR_NO_NODE);
+ if (likely(buf))
+ tipc_sk_rcv(buf);
+ list_del(&conn->list);
+ kfree(conn);
+ }
+}
+
/**
* tipc_node_link_up - handle addition of link
*
@@ -155,21 +230,25 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
if (!active[0]) {
active[0] = active[1] = l_ptr;
node_established_contact(n_ptr);
- return;
+ goto exit;
}
if (l_ptr->priority < active[0]->priority) {
pr_info("New link <%s> becomes standby\n", l_ptr->name);
- return;
+ goto exit;
}
tipc_link_dup_queue_xmit(active[0], l_ptr);
if (l_ptr->priority == active[0]->priority) {
active[0] = l_ptr;
- return;
+ goto exit;
}
pr_info("Old link <%s> becomes standby\n", active[0]->name);
if (active[1] != active[0])
pr_info("Old link <%s> becomes standby\n", active[1]->name);
active[0] = active[1] = l_ptr;
+exit:
+ /* Leave room for changeover header when returning 'mtu' to users: */
+ n_ptr->act_mtus[0] = active[0]->max_pkt - INT_H_SIZE;
+ n_ptr->act_mtus[1] = active[1]->max_pkt - INT_H_SIZE;
}
/**
@@ -229,6 +308,19 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
tipc_link_failover_send_queue(l_ptr);
else
node_lost_contact(n_ptr);
+
+ /* Leave room for changeover header when returning 'mtu' to users: */
+ if (active[0]) {
+ n_ptr->act_mtus[0] = active[0]->max_pkt - INT_H_SIZE;
+ n_ptr->act_mtus[1] = active[1]->max_pkt - INT_H_SIZE;
+ return;
+ }
+
+ /* Loopback link went down? No fragmentation needed from now on. */
+ if (n_ptr->addr == tipc_own_addr) {
+ n_ptr->act_mtus[0] = MAX_MSG_SIZE;
+ n_ptr->act_mtus[1] = MAX_MSG_SIZE;
+ }
}
int tipc_node_active_links(struct tipc_node *n_ptr)
@@ -457,32 +549,45 @@ int tipc_node_get_linkname(u32 bearer_id, u32 addr, char *linkname, size_t len)
void tipc_node_unlock(struct tipc_node *node)
{
LIST_HEAD(nsub_list);
- struct tipc_link *link;
- int pkt_sz = 0;
+ LIST_HEAD(conn_sks);
+ struct sk_buff_head waiting_sks;
u32 addr = 0;
+ unsigned int flags = node->action_flags;
if (likely(!node->action_flags)) {
spin_unlock_bh(&node->lock);
return;
}
+ __skb_queue_head_init(&waiting_sks);
+ if (node->action_flags & TIPC_WAKEUP_USERS) {
+ skb_queue_splice_init(&node->waiting_sks, &waiting_sks);
+ node->action_flags &= ~TIPC_WAKEUP_USERS;
+ }
if (node->action_flags & TIPC_NOTIFY_NODE_DOWN) {
list_replace_init(&node->nsub, &nsub_list);
+ list_replace_init(&node->conn_sks, &conn_sks);
node->action_flags &= ~TIPC_NOTIFY_NODE_DOWN;
}
if (node->action_flags & TIPC_NOTIFY_NODE_UP) {
- link = node->active_links[0];
node->action_flags &= ~TIPC_NOTIFY_NODE_UP;
- if (link) {
- pkt_sz = ((link->max_pkt - INT_H_SIZE) / ITEM_SIZE) *
- ITEM_SIZE;
- addr = node->addr;
- }
+ addr = node->addr;
}
+ node->action_flags &= ~TIPC_WAKEUP_BCAST_USERS;
spin_unlock_bh(&node->lock);
+ while (!skb_queue_empty(&waiting_sks))
+ tipc_sk_rcv(__skb_dequeue(&waiting_sks));
+
+ if (!list_empty(&conn_sks))
+ tipc_node_abort_sock_conns(&conn_sks);
+
if (!list_empty(&nsub_list))
tipc_nodesub_notify(&nsub_list);
- if (pkt_sz)
- tipc_named_node_up(pkt_sz, addr);
+
+ if (flags & TIPC_WAKEUP_BCAST_USERS)
+ tipc_bclink_wakeup_users();
+
+ if (addr)
+ tipc_named_node_up(addr);
}
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 9087063793f2..67513c3c852c 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -41,6 +41,7 @@
#include "addr.h"
#include "net.h"
#include "bearer.h"
+#include "msg.h"
/*
* Out-of-range value for node signature
@@ -57,7 +58,9 @@ enum {
TIPC_WAIT_PEER_LINKS_DOWN = (1 << 1),
TIPC_WAIT_OWN_LINKS_DOWN = (1 << 2),
TIPC_NOTIFY_NODE_DOWN = (1 << 3),
- TIPC_NOTIFY_NODE_UP = (1 << 4)
+ TIPC_NOTIFY_NODE_UP = (1 << 4),
+ TIPC_WAKEUP_USERS = (1 << 5),
+ TIPC_WAKEUP_BCAST_USERS = (1 << 6)
};
/**
@@ -105,6 +108,7 @@ struct tipc_node {
spinlock_t lock;
struct hlist_node hash;
struct tipc_link *active_links[2];
+ u32 act_mtus[2];
struct tipc_link *links[MAX_BEARERS];
unsigned int action_flags;
struct tipc_node_bclink bclink;
@@ -113,6 +117,8 @@ struct tipc_node {
int working_links;
u32 signature;
struct list_head nsub;
+ struct sk_buff_head waiting_sks;
+ struct list_head conn_sks;
struct rcu_head rcu;
};
@@ -131,6 +137,8 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space);
int tipc_node_get_linkname(u32 bearer_id, u32 node, char *linkname, size_t len);
void tipc_node_unlock(struct tipc_node *node);
+int tipc_node_add_conn(u32 dnode, u32 port, u32 peer_port);
+void tipc_node_remove_conn(u32 dnode, u32 port);
static inline void tipc_node_lock(struct tipc_node *node)
{
@@ -143,4 +151,19 @@ static inline bool tipc_node_blocked(struct tipc_node *node)
TIPC_NOTIFY_NODE_DOWN | TIPC_WAIT_OWN_LINKS_DOWN));
}
+static inline uint tipc_node_get_mtu(u32 addr, u32 selector)
+{
+ struct tipc_node *node;
+ u32 mtu;
+
+ node = tipc_node_find(addr);
+
+ if (likely(node))
+ mtu = node->act_mtus[selector & 1];
+ else
+ mtu = MAX_MSG_SIZE;
+
+ return mtu;
+}
+
#endif
diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c
index 7c59ab1d6ecb..2d13eea8574a 100644
--- a/net/tipc/node_subscr.c
+++ b/net/tipc/node_subscr.c
@@ -84,11 +84,13 @@ void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub)
void tipc_nodesub_notify(struct list_head *nsub_list)
{
struct tipc_node_subscr *ns, *safe;
+ net_ev_handler handle_node_down;
list_for_each_entry_safe(ns, safe, nsub_list, nodesub_list) {
- if (ns->handle_node_down) {
- ns->handle_node_down(ns->usr_handle);
+ handle_node_down = ns->handle_node_down;
+ if (handle_node_down) {
ns->handle_node_down = NULL;
+ handle_node_down(ns->usr_handle);
}
}
}
diff --git a/net/tipc/port.c b/net/tipc/port.c
deleted file mode 100644
index 5fd7acce01ea..000000000000
--- a/net/tipc/port.c
+++ /dev/null
@@ -1,898 +0,0 @@
-/*
- * net/tipc/port.c: TIPC port code
- *
- * Copyright (c) 1992-2007, 2014, Ericsson AB
- * Copyright (c) 2004-2008, 2010-2013, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "core.h"
-#include "config.h"
-#include "port.h"
-#include "name_table.h"
-#include "socket.h"
-
-/* Connection management: */
-#define PROBING_INTERVAL 3600000 /* [ms] => 1 h */
-#define CONFIRMED 0
-#define PROBING 1
-
-#define MAX_REJECT_SIZE 1024
-
-DEFINE_SPINLOCK(tipc_port_list_lock);
-
-static LIST_HEAD(ports);
-static void port_handle_node_down(unsigned long ref);
-static struct sk_buff *port_build_self_abort_msg(struct tipc_port *, u32 err);
-static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *, u32 err);
-static void port_timeout(unsigned long ref);
-
-/**
- * tipc_port_peer_msg - verify message was sent by connected port's peer
- *
- * Handles cases where the node's network address has changed from
- * the default of <0.0.0> to its configured setting.
- */
-int tipc_port_peer_msg(struct tipc_port *p_ptr, struct tipc_msg *msg)
-{
- u32 peernode;
- u32 orignode;
-
- if (msg_origport(msg) != tipc_port_peerport(p_ptr))
- return 0;
-
- orignode = msg_orignode(msg);
- peernode = tipc_port_peernode(p_ptr);
- return (orignode == peernode) ||
- (!orignode && (peernode == tipc_own_addr)) ||
- (!peernode && (orignode == tipc_own_addr));
-}
-
-/**
- * tipc_port_mcast_xmit - send a multicast message to local and remote
- * destinations
- */
-int tipc_port_mcast_xmit(struct tipc_port *oport,
- struct tipc_name_seq const *seq,
- struct iovec const *msg_sect,
- unsigned int len)
-{
- struct tipc_msg *hdr;
- struct sk_buff *buf;
- struct sk_buff *ibuf = NULL;
- struct tipc_port_list dports = {0, NULL, };
- int ext_targets;
- int res;
-
- /* Create multicast message */
- hdr = &oport->phdr;
- msg_set_type(hdr, TIPC_MCAST_MSG);
- msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE);
- msg_set_destport(hdr, 0);
- msg_set_destnode(hdr, 0);
- msg_set_nametype(hdr, seq->type);
- msg_set_namelower(hdr, seq->lower);
- msg_set_nameupper(hdr, seq->upper);
- msg_set_hdr_sz(hdr, MCAST_H_SIZE);
- res = tipc_msg_build(hdr, msg_sect, len, MAX_MSG_SIZE, &buf);
- if (unlikely(!buf))
- return res;
-
- /* Figure out where to send multicast message */
- ext_targets = tipc_nametbl_mc_translate(seq->type, seq->lower, seq->upper,
- TIPC_NODE_SCOPE, &dports);
-
- /* Send message to destinations (duplicate it only if necessary) */
- if (ext_targets) {
- if (dports.count != 0) {
- ibuf = skb_copy(buf, GFP_ATOMIC);
- if (ibuf == NULL) {
- tipc_port_list_free(&dports);
- kfree_skb(buf);
- return -ENOMEM;
- }
- }
- res = tipc_bclink_xmit(buf);
- if ((res < 0) && (dports.count != 0))
- kfree_skb(ibuf);
- } else {
- ibuf = buf;
- }
-
- if (res >= 0) {
- if (ibuf)
- tipc_port_mcast_rcv(ibuf, &dports);
- } else {
- tipc_port_list_free(&dports);
- }
- return res;
-}
-
-/**
- * tipc_port_mcast_rcv - deliver multicast message to all destination ports
- *
- * If there is no port list, perform a lookup to create one
- */
-void tipc_port_mcast_rcv(struct sk_buff *buf, struct tipc_port_list *dp)
-{
- struct tipc_msg *msg;
- struct tipc_port_list dports = {0, NULL, };
- struct tipc_port_list *item = dp;
- int cnt = 0;
-
- msg = buf_msg(buf);
-
- /* Create destination port list, if one wasn't supplied */
- if (dp == NULL) {
- tipc_nametbl_mc_translate(msg_nametype(msg),
- msg_namelower(msg),
- msg_nameupper(msg),
- TIPC_CLUSTER_SCOPE,
- &dports);
- item = dp = &dports;
- }
-
- /* Deliver a copy of message to each destination port */
- if (dp->count != 0) {
- msg_set_destnode(msg, tipc_own_addr);
- if (dp->count == 1) {
- msg_set_destport(msg, dp->ports[0]);
- tipc_sk_rcv(buf);
- tipc_port_list_free(dp);
- return;
- }
- for (; cnt < dp->count; cnt++) {
- int index = cnt % PLSIZE;
- struct sk_buff *b = skb_clone(buf, GFP_ATOMIC);
-
- if (b == NULL) {
- pr_warn("Unable to deliver multicast message(s)\n");
- goto exit;
- }
- if ((index == 0) && (cnt != 0))
- item = item->next;
- msg_set_destport(buf_msg(b), item->ports[index]);
- tipc_sk_rcv(b);
- }
- }
-exit:
- kfree_skb(buf);
- tipc_port_list_free(dp);
-}
-
-
-void tipc_port_wakeup(struct tipc_port *port)
-{
- tipc_sock_wakeup(tipc_port_to_sock(port));
-}
-
-/* tipc_port_init - intiate TIPC port and lock it
- *
- * Returns obtained reference if initialization is successful, zero otherwise
- */
-u32 tipc_port_init(struct tipc_port *p_ptr,
- const unsigned int importance)
-{
- struct tipc_msg *msg;
- u32 ref;
-
- ref = tipc_ref_acquire(p_ptr, &p_ptr->lock);
- if (!ref) {
- pr_warn("Port registration failed, ref. table exhausted\n");
- return 0;
- }
-
- p_ptr->max_pkt = MAX_PKT_DEFAULT;
- p_ptr->ref = ref;
- INIT_LIST_HEAD(&p_ptr->wait_list);
- INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list);
- k_init_timer(&p_ptr->timer, (Handler)port_timeout, ref);
- INIT_LIST_HEAD(&p_ptr->publications);
- INIT_LIST_HEAD(&p_ptr->port_list);
-
- /*
- * Must hold port list lock while initializing message header template
- * to ensure a change to node's own network address doesn't result
- * in template containing out-dated network address information
- */
- spin_lock_bh(&tipc_port_list_lock);
- msg = &p_ptr->phdr;
- tipc_msg_init(msg, importance, TIPC_NAMED_MSG, NAMED_H_SIZE, 0);
- msg_set_origport(msg, ref);
- list_add_tail(&p_ptr->port_list, &ports);
- spin_unlock_bh(&tipc_port_list_lock);
- return ref;
-}
-
-void tipc_port_destroy(struct tipc_port *p_ptr)
-{
- struct sk_buff *buf = NULL;
-
- tipc_withdraw(p_ptr, 0, NULL);
-
- spin_lock_bh(p_ptr->lock);
- tipc_ref_discard(p_ptr->ref);
- spin_unlock_bh(p_ptr->lock);
-
- k_cancel_timer(&p_ptr->timer);
- if (p_ptr->connected) {
- buf = port_build_peer_abort_msg(p_ptr, TIPC_ERR_NO_PORT);
- tipc_nodesub_unsubscribe(&p_ptr->subscription);
- }
-
- spin_lock_bh(&tipc_port_list_lock);
- list_del(&p_ptr->port_list);
- list_del(&p_ptr->wait_list);
- spin_unlock_bh(&tipc_port_list_lock);
- k_term_timer(&p_ptr->timer);
- tipc_net_route_msg(buf);
-}
-
-/*
- * port_build_proto_msg(): create connection protocol message for port
- *
- * On entry the port must be locked and connected.
- */
-static struct sk_buff *port_build_proto_msg(struct tipc_port *p_ptr,
- u32 type, u32 ack)
-{
- struct sk_buff *buf;
- struct tipc_msg *msg;
-
- buf = tipc_buf_acquire(INT_H_SIZE);
- if (buf) {
- msg = buf_msg(buf);
- tipc_msg_init(msg, CONN_MANAGER, type, INT_H_SIZE,
- tipc_port_peernode(p_ptr));
- msg_set_destport(msg, tipc_port_peerport(p_ptr));
- msg_set_origport(msg, p_ptr->ref);
- msg_set_msgcnt(msg, ack);
- }
- return buf;
-}
-
-int tipc_reject_msg(struct sk_buff *buf, u32 err)
-{
- struct tipc_msg *msg = buf_msg(buf);
- struct sk_buff *rbuf;
- struct tipc_msg *rmsg;
- int hdr_sz;
- u32 imp;
- u32 data_sz = msg_data_sz(msg);
- u32 src_node;
- u32 rmsg_sz;
-
- /* discard rejected message if it shouldn't be returned to sender */
- if (WARN(!msg_isdata(msg),
- "attempt to reject message with user=%u", msg_user(msg))) {
- dump_stack();
- goto exit;
- }
- if (msg_errcode(msg) || msg_dest_droppable(msg))
- goto exit;
-
- /*
- * construct returned message by copying rejected message header and
- * data (or subset), then updating header fields that need adjusting
- */
- hdr_sz = msg_hdr_sz(msg);
- rmsg_sz = hdr_sz + min_t(u32, data_sz, MAX_REJECT_SIZE);
-
- rbuf = tipc_buf_acquire(rmsg_sz);
- if (rbuf == NULL)
- goto exit;
-
- rmsg = buf_msg(rbuf);
- skb_copy_to_linear_data(rbuf, msg, rmsg_sz);
-
- if (msg_connected(rmsg)) {
- imp = msg_importance(rmsg);
- if (imp < TIPC_CRITICAL_IMPORTANCE)
- msg_set_importance(rmsg, ++imp);
- }
- msg_set_non_seq(rmsg, 0);
- msg_set_size(rmsg, rmsg_sz);
- msg_set_errcode(rmsg, err);
- msg_set_prevnode(rmsg, tipc_own_addr);
- msg_swap_words(rmsg, 4, 5);
- if (!msg_short(rmsg))
- msg_swap_words(rmsg, 6, 7);
-
- /* send self-abort message when rejecting on a connected port */
- if (msg_connected(msg)) {
- struct tipc_port *p_ptr = tipc_port_lock(msg_destport(msg));
-
- if (p_ptr) {
- struct sk_buff *abuf = NULL;
-
- if (p_ptr->connected)
- abuf = port_build_self_abort_msg(p_ptr, err);
- tipc_port_unlock(p_ptr);
- tipc_net_route_msg(abuf);
- }
- }
-
- /* send returned message & dispose of rejected message */
- src_node = msg_prevnode(msg);
- if (in_own_node(src_node))
- tipc_sk_rcv(rbuf);
- else
- tipc_link_xmit(rbuf, src_node, msg_link_selector(rmsg));
-exit:
- kfree_skb(buf);
- return data_sz;
-}
-
-int tipc_port_iovec_reject(struct tipc_port *p_ptr, struct tipc_msg *hdr,
- struct iovec const *msg_sect, unsigned int len,
- int err)
-{
- struct sk_buff *buf;
- int res;
-
- res = tipc_msg_build(hdr, msg_sect, len, MAX_MSG_SIZE, &buf);
- if (!buf)
- return res;
-
- return tipc_reject_msg(buf, err);
-}
-
-static void port_timeout(unsigned long ref)
-{
- struct tipc_port *p_ptr = tipc_port_lock(ref);
- struct sk_buff *buf = NULL;
-
- if (!p_ptr)
- return;
-
- if (!p_ptr->connected) {
- tipc_port_unlock(p_ptr);
- return;
- }
-
- /* Last probe answered ? */
- if (p_ptr->probing_state == PROBING) {
- buf = port_build_self_abort_msg(p_ptr, TIPC_ERR_NO_PORT);
- } else {
- buf = port_build_proto_msg(p_ptr, CONN_PROBE, 0);
- p_ptr->probing_state = PROBING;
- k_start_timer(&p_ptr->timer, p_ptr->probing_interval);
- }
- tipc_port_unlock(p_ptr);
- tipc_net_route_msg(buf);
-}
-
-
-static void port_handle_node_down(unsigned long ref)
-{
- struct tipc_port *p_ptr = tipc_port_lock(ref);
- struct sk_buff *buf = NULL;
-
- if (!p_ptr)
- return;
- buf = port_build_self_abort_msg(p_ptr, TIPC_ERR_NO_NODE);
- tipc_port_unlock(p_ptr);
- tipc_net_route_msg(buf);
-}
-
-
-static struct sk_buff *port_build_self_abort_msg(struct tipc_port *p_ptr, u32 err)
-{
- struct sk_buff *buf = port_build_peer_abort_msg(p_ptr, err);
-
- if (buf) {
- struct tipc_msg *msg = buf_msg(buf);
- msg_swap_words(msg, 4, 5);
- msg_swap_words(msg, 6, 7);
- }
- return buf;
-}
-
-
-static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *p_ptr, u32 err)
-{
- struct sk_buff *buf;
- struct tipc_msg *msg;
- u32 imp;
-
- if (!p_ptr->connected)
- return NULL;
-
- buf = tipc_buf_acquire(BASIC_H_SIZE);
- if (buf) {
- msg = buf_msg(buf);
- memcpy(msg, &p_ptr->phdr, BASIC_H_SIZE);
- msg_set_hdr_sz(msg, BASIC_H_SIZE);
- msg_set_size(msg, BASIC_H_SIZE);
- imp = msg_importance(msg);
- if (imp < TIPC_CRITICAL_IMPORTANCE)
- msg_set_importance(msg, ++imp);
- msg_set_errcode(msg, err);
- }
- return buf;
-}
-
-void tipc_port_proto_rcv(struct sk_buff *buf)
-{
- struct tipc_msg *msg = buf_msg(buf);
- struct tipc_port *p_ptr;
- struct sk_buff *r_buf = NULL;
- u32 destport = msg_destport(msg);
- int wakeable;
-
- /* Validate connection */
- p_ptr = tipc_port_lock(destport);
- if (!p_ptr || !p_ptr->connected || !tipc_port_peer_msg(p_ptr, msg)) {
- r_buf = tipc_buf_acquire(BASIC_H_SIZE);
- if (r_buf) {
- msg = buf_msg(r_buf);
- tipc_msg_init(msg, TIPC_HIGH_IMPORTANCE, TIPC_CONN_MSG,
- BASIC_H_SIZE, msg_orignode(msg));
- msg_set_errcode(msg, TIPC_ERR_NO_PORT);
- msg_set_origport(msg, destport);
- msg_set_destport(msg, msg_origport(msg));
- }
- if (p_ptr)
- tipc_port_unlock(p_ptr);
- goto exit;
- }
-
- /* Process protocol message sent by peer */
- switch (msg_type(msg)) {
- case CONN_ACK:
- wakeable = tipc_port_congested(p_ptr) && p_ptr->congested;
- p_ptr->acked += msg_msgcnt(msg);
- if (!tipc_port_congested(p_ptr)) {
- p_ptr->congested = 0;
- if (wakeable)
- tipc_port_wakeup(p_ptr);
- }
- break;
- case CONN_PROBE:
- r_buf = port_build_proto_msg(p_ptr, CONN_PROBE_REPLY, 0);
- break;
- default:
- /* CONN_PROBE_REPLY or unrecognized - no action required */
- break;
- }
- p_ptr->probing_state = CONFIRMED;
- tipc_port_unlock(p_ptr);
-exit:
- tipc_net_route_msg(r_buf);
- kfree_skb(buf);
-}
-
-static int port_print(struct tipc_port *p_ptr, char *buf, int len, int full_id)
-{
- struct publication *publ;
- int ret;
-
- if (full_id)
- ret = tipc_snprintf(buf, len, "<%u.%u.%u:%u>:",
- tipc_zone(tipc_own_addr),
- tipc_cluster(tipc_own_addr),
- tipc_node(tipc_own_addr), p_ptr->ref);
- else
- ret = tipc_snprintf(buf, len, "%-10u:", p_ptr->ref);
-
- if (p_ptr->connected) {
- u32 dport = tipc_port_peerport(p_ptr);
- u32 destnode = tipc_port_peernode(p_ptr);
-
- ret += tipc_snprintf(buf + ret, len - ret,
- " connected to <%u.%u.%u:%u>",
- tipc_zone(destnode),
- tipc_cluster(destnode),
- tipc_node(destnode), dport);
- if (p_ptr->conn_type != 0)
- ret += tipc_snprintf(buf + ret, len - ret,
- " via {%u,%u}", p_ptr->conn_type,
- p_ptr->conn_instance);
- } else if (p_ptr->published) {
- ret += tipc_snprintf(buf + ret, len - ret, " bound to");
- list_for_each_entry(publ, &p_ptr->publications, pport_list) {
- if (publ->lower == publ->upper)
- ret += tipc_snprintf(buf + ret, len - ret,
- " {%u,%u}", publ->type,
- publ->lower);
- else
- ret += tipc_snprintf(buf + ret, len - ret,
- " {%u,%u,%u}", publ->type,
- publ->lower, publ->upper);
- }
- }
- ret += tipc_snprintf(buf + ret, len - ret, "\n");
- return ret;
-}
-
-struct sk_buff *tipc_port_get_ports(void)
-{
- struct sk_buff *buf;
- struct tlv_desc *rep_tlv;
- char *pb;
- int pb_len;
- struct tipc_port *p_ptr;
- int str_len = 0;
-
- buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN));
- if (!buf)
- return NULL;
- rep_tlv = (struct tlv_desc *)buf->data;
- pb = TLV_DATA(rep_tlv);
- pb_len = ULTRA_STRING_MAX_LEN;
-
- spin_lock_bh(&tipc_port_list_lock);
- list_for_each_entry(p_ptr, &ports, port_list) {
- spin_lock_bh(p_ptr->lock);
- str_len += port_print(p_ptr, pb, pb_len, 0);
- spin_unlock_bh(p_ptr->lock);
- }
- spin_unlock_bh(&tipc_port_list_lock);
- str_len += 1; /* for "\0" */
- skb_put(buf, TLV_SPACE(str_len));
- TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
-
- return buf;
-}
-
-void tipc_port_reinit(void)
-{
- struct tipc_port *p_ptr;
- struct tipc_msg *msg;
-
- spin_lock_bh(&tipc_port_list_lock);
- list_for_each_entry(p_ptr, &ports, port_list) {
- msg = &p_ptr->phdr;
- msg_set_prevnode(msg, tipc_own_addr);
- msg_set_orignode(msg, tipc_own_addr);
- }
- spin_unlock_bh(&tipc_port_list_lock);
-}
-
-void tipc_acknowledge(u32 ref, u32 ack)
-{
- struct tipc_port *p_ptr;
- struct sk_buff *buf = NULL;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return;
- if (p_ptr->connected) {
- p_ptr->conn_unacked -= ack;
- buf = port_build_proto_msg(p_ptr, CONN_ACK, ack);
- }
- tipc_port_unlock(p_ptr);
- tipc_net_route_msg(buf);
-}
-
-int tipc_publish(struct tipc_port *p_ptr, unsigned int scope,
- struct tipc_name_seq const *seq)
-{
- struct publication *publ;
- u32 key;
-
- if (p_ptr->connected)
- return -EINVAL;
- key = p_ptr->ref + p_ptr->pub_count + 1;
- if (key == p_ptr->ref)
- return -EADDRINUSE;
-
- publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper,
- scope, p_ptr->ref, key);
- if (publ) {
- list_add(&publ->pport_list, &p_ptr->publications);
- p_ptr->pub_count++;
- p_ptr->published = 1;
- return 0;
- }
- return -EINVAL;
-}
-
-int tipc_withdraw(struct tipc_port *p_ptr, unsigned int scope,
- struct tipc_name_seq const *seq)
-{
- struct publication *publ;
- struct publication *tpubl;
- int res = -EINVAL;
-
- if (!seq) {
- list_for_each_entry_safe(publ, tpubl,
- &p_ptr->publications, pport_list) {
- tipc_nametbl_withdraw(publ->type, publ->lower,
- publ->ref, publ->key);
- }
- res = 0;
- } else {
- list_for_each_entry_safe(publ, tpubl,
- &p_ptr->publications, pport_list) {
- if (publ->scope != scope)
- continue;
- if (publ->type != seq->type)
- continue;
- if (publ->lower != seq->lower)
- continue;
- if (publ->upper != seq->upper)
- break;
- tipc_nametbl_withdraw(publ->type, publ->lower,
- publ->ref, publ->key);
- res = 0;
- break;
- }
- }
- if (list_empty(&p_ptr->publications))
- p_ptr->published = 0;
- return res;
-}
-
-int tipc_port_connect(u32 ref, struct tipc_portid const *peer)
-{
- struct tipc_port *p_ptr;
- int res;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return -EINVAL;
- res = __tipc_port_connect(ref, p_ptr, peer);
- tipc_port_unlock(p_ptr);
- return res;
-}
-
-/*
- * __tipc_port_connect - connect to a remote peer
- *
- * Port must be locked.
- */
-int __tipc_port_connect(u32 ref, struct tipc_port *p_ptr,
- struct tipc_portid const *peer)
-{
- struct tipc_msg *msg;
- int res = -EINVAL;
-
- if (p_ptr->published || p_ptr->connected)
- goto exit;
- if (!peer->ref)
- goto exit;
-
- msg = &p_ptr->phdr;
- msg_set_destnode(msg, peer->node);
- msg_set_destport(msg, peer->ref);
- msg_set_type(msg, TIPC_CONN_MSG);
- msg_set_lookup_scope(msg, 0);
- msg_set_hdr_sz(msg, SHORT_H_SIZE);
-
- p_ptr->probing_interval = PROBING_INTERVAL;
- p_ptr->probing_state = CONFIRMED;
- p_ptr->connected = 1;
- k_start_timer(&p_ptr->timer, p_ptr->probing_interval);
-
- tipc_nodesub_subscribe(&p_ptr->subscription, peer->node,
- (void *)(unsigned long)ref,
- (net_ev_handler)port_handle_node_down);
- res = 0;
-exit:
- p_ptr->max_pkt = tipc_link_get_max_pkt(peer->node, ref);
- return res;
-}
-
-/*
- * __tipc_disconnect - disconnect port from peer
- *
- * Port must be locked.
- */
-int __tipc_port_disconnect(struct tipc_port *tp_ptr)
-{
- if (tp_ptr->connected) {
- tp_ptr->connected = 0;
- /* let timer expire on it's own to avoid deadlock! */
- tipc_nodesub_unsubscribe(&tp_ptr->subscription);
- return 0;
- }
-
- return -ENOTCONN;
-}
-
-/*
- * tipc_port_disconnect(): Disconnect port form peer.
- * This is a node local operation.
- */
-int tipc_port_disconnect(u32 ref)
-{
- struct tipc_port *p_ptr;
- int res;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return -EINVAL;
- res = __tipc_port_disconnect(p_ptr);
- tipc_port_unlock(p_ptr);
- return res;
-}
-
-/*
- * tipc_port_shutdown(): Send a SHUTDOWN msg to peer and disconnect
- */
-int tipc_port_shutdown(u32 ref)
-{
- struct tipc_port *p_ptr;
- struct sk_buff *buf = NULL;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return -EINVAL;
-
- buf = port_build_peer_abort_msg(p_ptr, TIPC_CONN_SHUTDOWN);
- tipc_port_unlock(p_ptr);
- tipc_net_route_msg(buf);
- return tipc_port_disconnect(ref);
-}
-
-/*
- * tipc_port_iovec_rcv: Concatenate and deliver sectioned
- * message for this node.
- */
-static int tipc_port_iovec_rcv(struct tipc_port *sender,
- struct iovec const *msg_sect,
- unsigned int len)
-{
- struct sk_buff *buf;
- int res;
-
- res = tipc_msg_build(&sender->phdr, msg_sect, len, MAX_MSG_SIZE, &buf);
- if (likely(buf))
- tipc_sk_rcv(buf);
- return res;
-}
-
-/**
- * tipc_send - send message sections on connection
- */
-int tipc_send(struct tipc_port *p_ptr,
- struct iovec const *msg_sect,
- unsigned int len)
-{
- u32 destnode;
- int res;
-
- if (!p_ptr->connected)
- return -EINVAL;
-
- p_ptr->congested = 1;
- if (!tipc_port_congested(p_ptr)) {
- destnode = tipc_port_peernode(p_ptr);
- if (likely(!in_own_node(destnode)))
- res = tipc_link_iovec_xmit_fast(p_ptr, msg_sect, len,
- destnode);
- else
- res = tipc_port_iovec_rcv(p_ptr, msg_sect, len);
-
- if (likely(res != -ELINKCONG)) {
- p_ptr->congested = 0;
- if (res > 0)
- p_ptr->sent++;
- return res;
- }
- }
- if (tipc_port_unreliable(p_ptr)) {
- p_ptr->congested = 0;
- return len;
- }
- return -ELINKCONG;
-}
-
-/**
- * tipc_send2name - send message sections to port name
- */
-int tipc_send2name(struct tipc_port *p_ptr,
- struct tipc_name const *name,
- unsigned int domain,
- struct iovec const *msg_sect,
- unsigned int len)
-{
- struct tipc_msg *msg;
- u32 destnode = domain;
- u32 destport;
- int res;
-
- if (p_ptr->connected)
- return -EINVAL;
-
- msg = &p_ptr->phdr;
- msg_set_type(msg, TIPC_NAMED_MSG);
- msg_set_hdr_sz(msg, NAMED_H_SIZE);
- msg_set_nametype(msg, name->type);
- msg_set_nameinst(msg, name->instance);
- msg_set_lookup_scope(msg, tipc_addr_scope(domain));
- destport = tipc_nametbl_translate(name->type, name->instance, &destnode);
- msg_set_destnode(msg, destnode);
- msg_set_destport(msg, destport);
-
- if (likely(destport || destnode)) {
- if (likely(in_own_node(destnode)))
- res = tipc_port_iovec_rcv(p_ptr, msg_sect, len);
- else if (tipc_own_addr)
- res = tipc_link_iovec_xmit_fast(p_ptr, msg_sect, len,
- destnode);
- else
- res = tipc_port_iovec_reject(p_ptr, msg, msg_sect,
- len, TIPC_ERR_NO_NODE);
- if (likely(res != -ELINKCONG)) {
- if (res > 0)
- p_ptr->sent++;
- return res;
- }
- if (tipc_port_unreliable(p_ptr))
- return len;
-
- return -ELINKCONG;
- }
- return tipc_port_iovec_reject(p_ptr, msg, msg_sect, len,
- TIPC_ERR_NO_NAME);
-}
-
-/**
- * tipc_send2port - send message sections to port identity
- */
-int tipc_send2port(struct tipc_port *p_ptr,
- struct tipc_portid const *dest,
- struct iovec const *msg_sect,
- unsigned int len)
-{
- struct tipc_msg *msg;
- int res;
-
- if (p_ptr->connected)
- return -EINVAL;
-
- msg = &p_ptr->phdr;
- msg_set_type(msg, TIPC_DIRECT_MSG);
- msg_set_lookup_scope(msg, 0);
- msg_set_destnode(msg, dest->node);
- msg_set_destport(msg, dest->ref);
- msg_set_hdr_sz(msg, BASIC_H_SIZE);
-
- if (in_own_node(dest->node))
- res = tipc_port_iovec_rcv(p_ptr, msg_sect, len);
- else if (tipc_own_addr)
- res = tipc_link_iovec_xmit_fast(p_ptr, msg_sect, len,
- dest->node);
- else
- res = tipc_port_iovec_reject(p_ptr, msg, msg_sect, len,
- TIPC_ERR_NO_NODE);
- if (likely(res != -ELINKCONG)) {
- if (res > 0)
- p_ptr->sent++;
- return res;
- }
- if (tipc_port_unreliable(p_ptr))
- return len;
-
- return -ELINKCONG;
-}
diff --git a/net/tipc/port.h b/net/tipc/port.h
deleted file mode 100644
index cf4ca5b1d9a4..000000000000
--- a/net/tipc/port.h
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * net/tipc/port.h: Include file for TIPC port code
- *
- * Copyright (c) 1994-2007, 2014, Ericsson AB
- * Copyright (c) 2004-2007, 2010-2013, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _TIPC_PORT_H
-#define _TIPC_PORT_H
-
-#include "ref.h"
-#include "net.h"
-#include "msg.h"
-#include "node_subscr.h"
-
-#define TIPC_CONNACK_INTV 256
-#define TIPC_FLOWCTRL_WIN (TIPC_CONNACK_INTV * 2)
-#define TIPC_CONN_OVERLOAD_LIMIT ((TIPC_FLOWCTRL_WIN * 2 + 1) * \
- SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE))
-
-/**
- * struct tipc_port - TIPC port structure
- * @lock: pointer to spinlock for controlling access to port
- * @connected: non-zero if port is currently connected to a peer port
- * @conn_type: TIPC type used when connection was established
- * @conn_instance: TIPC instance used when connection was established
- * @conn_unacked: number of unacknowledged messages received from peer port
- * @published: non-zero if port has one or more associated names
- * @congested: non-zero if cannot send because of link or port congestion
- * @max_pkt: maximum packet size "hint" used when building messages sent by port
- * @ref: unique reference to port in TIPC object registry
- * @phdr: preformatted message header used when sending messages
- * @port_list: adjacent ports in TIPC's global list of ports
- * @wait_list: adjacent ports in list of ports waiting on link congestion
- * @waiting_pkts:
- * @sent: # of non-empty messages sent by port
- * @acked: # of non-empty message acknowledgements from connected port's peer
- * @publications: list of publications for port
- * @pub_count: total # of publications port has made during its lifetime
- * @probing_state:
- * @probing_interval:
- * @timer_ref:
- * @subscription: "node down" subscription used to terminate failed connections
- */
-struct tipc_port {
- spinlock_t *lock;
- int connected;
- u32 conn_type;
- u32 conn_instance;
- u32 conn_unacked;
- int published;
- u32 congested;
- u32 max_pkt;
- u32 ref;
- struct tipc_msg phdr;
- struct list_head port_list;
- struct list_head wait_list;
- u32 waiting_pkts;
- u32 sent;
- u32 acked;
- struct list_head publications;
- u32 pub_count;
- u32 probing_state;
- u32 probing_interval;
- struct timer_list timer;
- struct tipc_node_subscr subscription;
-};
-
-extern spinlock_t tipc_port_list_lock;
-struct tipc_port_list;
-
-/*
- * TIPC port manipulation routines
- */
-u32 tipc_port_init(struct tipc_port *p_ptr,
- const unsigned int importance);
-
-int tipc_reject_msg(struct sk_buff *buf, u32 err);
-
-void tipc_acknowledge(u32 port_ref, u32 ack);
-
-void tipc_port_destroy(struct tipc_port *p_ptr);
-
-int tipc_publish(struct tipc_port *p_ptr, unsigned int scope,
- struct tipc_name_seq const *name_seq);
-
-int tipc_withdraw(struct tipc_port *p_ptr, unsigned int scope,
- struct tipc_name_seq const *name_seq);
-
-int tipc_port_connect(u32 portref, struct tipc_portid const *port);
-
-int tipc_port_disconnect(u32 portref);
-
-int tipc_port_shutdown(u32 ref);
-
-void tipc_port_wakeup(struct tipc_port *port);
-
-/*
- * The following routines require that the port be locked on entry
- */
-int __tipc_port_disconnect(struct tipc_port *tp_ptr);
-int __tipc_port_connect(u32 ref, struct tipc_port *p_ptr,
- struct tipc_portid const *peer);
-int tipc_port_peer_msg(struct tipc_port *p_ptr, struct tipc_msg *msg);
-
-/*
- * TIPC messaging routines
- */
-
-int tipc_send(struct tipc_port *port,
- struct iovec const *msg_sect,
- unsigned int len);
-
-int tipc_send2name(struct tipc_port *port,
- struct tipc_name const *name,
- u32 domain,
- struct iovec const *msg_sect,
- unsigned int len);
-
-int tipc_send2port(struct tipc_port *port,
- struct tipc_portid const *dest,
- struct iovec const *msg_sect,
- unsigned int len);
-
-int tipc_port_mcast_xmit(struct tipc_port *port,
- struct tipc_name_seq const *seq,
- struct iovec const *msg,
- unsigned int len);
-
-int tipc_port_iovec_reject(struct tipc_port *p_ptr,
- struct tipc_msg *hdr,
- struct iovec const *msg_sect,
- unsigned int len,
- int err);
-
-struct sk_buff *tipc_port_get_ports(void);
-void tipc_port_proto_rcv(struct sk_buff *buf);
-void tipc_port_mcast_rcv(struct sk_buff *buf, struct tipc_port_list *dp);
-void tipc_port_reinit(void);
-
-/**
- * tipc_port_lock - lock port instance referred to and return its pointer
- */
-static inline struct tipc_port *tipc_port_lock(u32 ref)
-{
- return (struct tipc_port *)tipc_ref_lock(ref);
-}
-
-/**
- * tipc_port_unlock - unlock a port instance
- *
- * Can use pointer instead of tipc_ref_unlock() since port is already locked.
- */
-static inline void tipc_port_unlock(struct tipc_port *p_ptr)
-{
- spin_unlock_bh(p_ptr->lock);
-}
-
-static inline int tipc_port_congested(struct tipc_port *p_ptr)
-{
- return ((p_ptr->sent - p_ptr->acked) >= TIPC_FLOWCTRL_WIN);
-}
-
-
-static inline u32 tipc_port_peernode(struct tipc_port *p_ptr)
-{
- return msg_destnode(&p_ptr->phdr);
-}
-
-static inline u32 tipc_port_peerport(struct tipc_port *p_ptr)
-{
- return msg_destport(&p_ptr->phdr);
-}
-
-static inline bool tipc_port_unreliable(struct tipc_port *port)
-{
- return msg_src_droppable(&port->phdr) != 0;
-}
-
-static inline void tipc_port_set_unreliable(struct tipc_port *port,
- bool unreliable)
-{
- msg_set_src_droppable(&port->phdr, unreliable ? 1 : 0);
-}
-
-static inline bool tipc_port_unreturnable(struct tipc_port *port)
-{
- return msg_dest_droppable(&port->phdr) != 0;
-}
-
-static inline void tipc_port_set_unreturnable(struct tipc_port *port,
- bool unreturnable)
-{
- msg_set_dest_droppable(&port->phdr, unreturnable ? 1 : 0);
-}
-
-
-static inline int tipc_port_importance(struct tipc_port *port)
-{
- return msg_importance(&port->phdr);
-}
-
-static inline void tipc_port_set_importance(struct tipc_port *port, int imp)
-{
- msg_set_importance(&port->phdr, (u32)imp);
-}
-
-#endif
diff --git a/net/tipc/ref.c b/net/tipc/ref.c
deleted file mode 100644
index 3d4ecd754eee..000000000000
--- a/net/tipc/ref.c
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * net/tipc/ref.c: TIPC object registry code
- *
- * Copyright (c) 1991-2006, Ericsson AB
- * Copyright (c) 2004-2007, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "core.h"
-#include "ref.h"
-
-/**
- * struct reference - TIPC object reference entry
- * @object: pointer to object associated with reference entry
- * @lock: spinlock controlling access to object
- * @ref: reference value for object (combines instance & array index info)
- */
-struct reference {
- void *object;
- spinlock_t lock;
- u32 ref;
-};
-
-/**
- * struct tipc_ref_table - table of TIPC object reference entries
- * @entries: pointer to array of reference entries
- * @capacity: array index of first unusable entry
- * @init_point: array index of first uninitialized entry
- * @first_free: array index of first unused object reference entry
- * @last_free: array index of last unused object reference entry
- * @index_mask: bitmask for array index portion of reference values
- * @start_mask: initial value for instance value portion of reference values
- */
-struct ref_table {
- struct reference *entries;
- u32 capacity;
- u32 init_point;
- u32 first_free;
- u32 last_free;
- u32 index_mask;
- u32 start_mask;
-};
-
-/*
- * Object reference table consists of 2**N entries.
- *
- * State Object ptr Reference
- * ----- ---------- ---------
- * In use non-NULL XXXX|own index
- * (XXXX changes each time entry is acquired)
- * Free NULL YYYY|next free index
- * (YYYY is one more than last used XXXX)
- * Uninitialized NULL 0
- *
- * Entry 0 is not used; this allows index 0 to denote the end of the free list.
- *
- * Note that a reference value of 0 does not necessarily indicate that an
- * entry is uninitialized, since the last entry in the free list could also
- * have a reference value of 0 (although this is unlikely).
- */
-
-static struct ref_table tipc_ref_table;
-
-static DEFINE_SPINLOCK(ref_table_lock);
-
-/**
- * tipc_ref_table_init - create reference table for objects
- */
-int tipc_ref_table_init(u32 requested_size, u32 start)
-{
- struct reference *table;
- u32 actual_size;
-
- /* account for unused entry, then round up size to a power of 2 */
-
- requested_size++;
- for (actual_size = 16; actual_size < requested_size; actual_size <<= 1)
- /* do nothing */ ;
-
- /* allocate table & mark all entries as uninitialized */
- table = vzalloc(actual_size * sizeof(struct reference));
- if (table == NULL)
- return -ENOMEM;
-
- tipc_ref_table.entries = table;
- tipc_ref_table.capacity = requested_size;
- tipc_ref_table.init_point = 1;
- tipc_ref_table.first_free = 0;
- tipc_ref_table.last_free = 0;
- tipc_ref_table.index_mask = actual_size - 1;
- tipc_ref_table.start_mask = start & ~tipc_ref_table.index_mask;
-
- return 0;
-}
-
-/**
- * tipc_ref_table_stop - destroy reference table for objects
- */
-void tipc_ref_table_stop(void)
-{
- vfree(tipc_ref_table.entries);
- tipc_ref_table.entries = NULL;
-}
-
-/**
- * tipc_ref_acquire - create reference to an object
- *
- * Register an object pointer in reference table and lock the object.
- * Returns a unique reference value that is used from then on to retrieve the
- * object pointer, or to determine that the object has been deregistered.
- *
- * Note: The object is returned in the locked state so that the caller can
- * register a partially initialized object, without running the risk that
- * the object will be accessed before initialization is complete.
- */
-u32 tipc_ref_acquire(void *object, spinlock_t **lock)
-{
- u32 index;
- u32 index_mask;
- u32 next_plus_upper;
- u32 ref;
- struct reference *entry = NULL;
-
- if (!object) {
- pr_err("Attempt to acquire ref. to non-existent obj\n");
- return 0;
- }
- if (!tipc_ref_table.entries) {
- pr_err("Ref. table not found in acquisition attempt\n");
- return 0;
- }
-
- /* take a free entry, if available; otherwise initialize a new entry */
- spin_lock_bh(&ref_table_lock);
- if (tipc_ref_table.first_free) {
- index = tipc_ref_table.first_free;
- entry = &(tipc_ref_table.entries[index]);
- index_mask = tipc_ref_table.index_mask;
- next_plus_upper = entry->ref;
- tipc_ref_table.first_free = next_plus_upper & index_mask;
- ref = (next_plus_upper & ~index_mask) + index;
- } else if (tipc_ref_table.init_point < tipc_ref_table.capacity) {
- index = tipc_ref_table.init_point++;
- entry = &(tipc_ref_table.entries[index]);
- spin_lock_init(&entry->lock);
- ref = tipc_ref_table.start_mask + index;
- } else {
- ref = 0;
- }
- spin_unlock_bh(&ref_table_lock);
-
- /*
- * Grab the lock so no one else can modify this entry
- * While we assign its ref value & object pointer
- */
- if (entry) {
- spin_lock_bh(&entry->lock);
- entry->ref = ref;
- entry->object = object;
- *lock = &entry->lock;
- /*
- * keep it locked, the caller is responsible
- * for unlocking this when they're done with it
- */
- }
-
- return ref;
-}
-
-/**
- * tipc_ref_discard - invalidate references to an object
- *
- * Disallow future references to an object and free up the entry for re-use.
- * Note: The entry's spin_lock may still be busy after discard
- */
-void tipc_ref_discard(u32 ref)
-{
- struct reference *entry;
- u32 index;
- u32 index_mask;
-
- if (!tipc_ref_table.entries) {
- pr_err("Ref. table not found during discard attempt\n");
- return;
- }
-
- index_mask = tipc_ref_table.index_mask;
- index = ref & index_mask;
- entry = &(tipc_ref_table.entries[index]);
-
- spin_lock_bh(&ref_table_lock);
-
- if (!entry->object) {
- pr_err("Attempt to discard ref. to non-existent obj\n");
- goto exit;
- }
- if (entry->ref != ref) {
- pr_err("Attempt to discard non-existent reference\n");
- goto exit;
- }
-
- /*
- * mark entry as unused; increment instance part of entry's reference
- * to invalidate any subsequent references
- */
- entry->object = NULL;
- entry->ref = (ref & ~index_mask) + (index_mask + 1);
-
- /* append entry to free entry list */
- if (tipc_ref_table.first_free == 0)
- tipc_ref_table.first_free = index;
- else
- tipc_ref_table.entries[tipc_ref_table.last_free].ref |= index;
- tipc_ref_table.last_free = index;
-
-exit:
- spin_unlock_bh(&ref_table_lock);
-}
-
-/**
- * tipc_ref_lock - lock referenced object and return pointer to it
- */
-void *tipc_ref_lock(u32 ref)
-{
- if (likely(tipc_ref_table.entries)) {
- struct reference *entry;
-
- entry = &tipc_ref_table.entries[ref &
- tipc_ref_table.index_mask];
- if (likely(entry->ref != 0)) {
- spin_lock_bh(&entry->lock);
- if (likely((entry->ref == ref) && (entry->object)))
- return entry->object;
- spin_unlock_bh(&entry->lock);
- }
- }
- return NULL;
-}
diff --git a/net/tipc/ref.h b/net/tipc/ref.h
deleted file mode 100644
index d01aa1df63b8..000000000000
--- a/net/tipc/ref.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * net/tipc/ref.h: Include file for TIPC object registry code
- *
- * Copyright (c) 1991-2006, Ericsson AB
- * Copyright (c) 2005-2006, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _TIPC_REF_H
-#define _TIPC_REF_H
-
-int tipc_ref_table_init(u32 requested_size, u32 start);
-void tipc_ref_table_stop(void);
-
-u32 tipc_ref_acquire(void *object, spinlock_t **lock);
-void tipc_ref_discard(u32 ref);
-
-void *tipc_ref_lock(u32 ref);
-
-#endif
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index ef0475568f9e..75275c5cf929 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -35,21 +35,84 @@
*/
#include "core.h"
-#include "port.h"
+#include "name_table.h"
#include "node.h"
-
+#include "link.h"
#include <linux/export.h>
+#include "config.h"
+#include "socket.h"
#define SS_LISTENING -1 /* socket is listening */
#define SS_READY -2 /* socket is connectionless */
-#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
+#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
+#define CONN_PROBING_INTERVAL 3600000 /* [ms] => 1 h */
+#define TIPC_FWD_MSG 1
+#define TIPC_CONN_OK 0
+#define TIPC_CONN_PROBING 1
+
+/**
+ * struct tipc_sock - TIPC socket structure
+ * @sk: socket - interacts with 'port' and with user via the socket API
+ * @connected: non-zero if port is currently connected to a peer port
+ * @conn_type: TIPC type used when connection was established
+ * @conn_instance: TIPC instance used when connection was established
+ * @published: non-zero if port has one or more associated names
+ * @max_pkt: maximum packet size "hint" used when building messages sent by port
+ * @ref: unique reference to port in TIPC object registry
+ * @phdr: preformatted message header used when sending messages
+ * @port_list: adjacent ports in TIPC's global list of ports
+ * @publications: list of publications for port
+ * @pub_count: total # of publications port has made during its lifetime
+ * @probing_state:
+ * @probing_interval:
+ * @timer:
+ * @port: port - interacts with 'sk' and with the rest of the TIPC stack
+ * @peer_name: the peer of the connection, if any
+ * @conn_timeout: the time we can wait for an unresponded setup request
+ * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
+ * @link_cong: non-zero if owner must sleep because of link congestion
+ * @sent_unacked: # messages sent by socket, and not yet acked by peer
+ * @rcv_unacked: # messages read by user, but not yet acked back to peer
+ */
+struct tipc_sock {
+ struct sock sk;
+ int connected;
+ u32 conn_type;
+ u32 conn_instance;
+ int published;
+ u32 max_pkt;
+ u32 ref;
+ struct tipc_msg phdr;
+ struct list_head sock_list;
+ struct list_head publications;
+ u32 pub_count;
+ u32 probing_state;
+ u32 probing_interval;
+ struct timer_list timer;
+ uint conn_timeout;
+ atomic_t dupl_rcvcnt;
+ bool link_cong;
+ uint sent_unacked;
+ uint rcv_unacked;
+};
static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb);
static void tipc_data_ready(struct sock *sk);
static void tipc_write_space(struct sock *sk);
static int tipc_release(struct socket *sock);
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags);
+static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p);
+static void tipc_sk_timeout(unsigned long ref);
+static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
+ struct tipc_name_seq const *seq);
+static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
+ struct tipc_name_seq const *seq);
+static u32 tipc_sk_ref_acquire(struct tipc_sock *tsk);
+static void tipc_sk_ref_discard(u32 ref);
+static struct tipc_sock *tipc_sk_get(u32 ref);
+static struct tipc_sock *tipc_sk_get_next(u32 *ref);
+static void tipc_sk_put(struct tipc_sock *tsk);
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
@@ -103,29 +166,115 @@ static struct proto tipc_proto_kern;
* - port reference
*/
-#include "socket.h"
+static u32 tsk_peer_node(struct tipc_sock *tsk)
+{
+ return msg_destnode(&tsk->phdr);
+}
+
+static u32 tsk_peer_port(struct tipc_sock *tsk)
+{
+ return msg_destport(&tsk->phdr);
+}
+
+static bool tsk_unreliable(struct tipc_sock *tsk)
+{
+ return msg_src_droppable(&tsk->phdr) != 0;
+}
+
+static void tsk_set_unreliable(struct tipc_sock *tsk, bool unreliable)
+{
+ msg_set_src_droppable(&tsk->phdr, unreliable ? 1 : 0);
+}
+
+static bool tsk_unreturnable(struct tipc_sock *tsk)
+{
+ return msg_dest_droppable(&tsk->phdr) != 0;
+}
+
+static void tsk_set_unreturnable(struct tipc_sock *tsk, bool unreturnable)
+{
+ msg_set_dest_droppable(&tsk->phdr, unreturnable ? 1 : 0);
+}
+
+static int tsk_importance(struct tipc_sock *tsk)
+{
+ return msg_importance(&tsk->phdr);
+}
+
+static int tsk_set_importance(struct tipc_sock *tsk, int imp)
+{
+ if (imp > TIPC_CRITICAL_IMPORTANCE)
+ return -EINVAL;
+ msg_set_importance(&tsk->phdr, (u32)imp);
+ return 0;
+}
+
+static struct tipc_sock *tipc_sk(const struct sock *sk)
+{
+ return container_of(sk, struct tipc_sock, sk);
+}
+
+static int tsk_conn_cong(struct tipc_sock *tsk)
+{
+ return tsk->sent_unacked >= TIPC_FLOWCTRL_WIN;
+}
/**
- * advance_rx_queue - discard first buffer in socket receive queue
+ * tsk_advance_rx_queue - discard first buffer in socket receive queue
*
* Caller must hold socket lock
*/
-static void advance_rx_queue(struct sock *sk)
+static void tsk_advance_rx_queue(struct sock *sk)
{
kfree_skb(__skb_dequeue(&sk->sk_receive_queue));
}
/**
- * reject_rx_queue - reject all buffers in socket receive queue
+ * tsk_rej_rx_queue - reject all buffers in socket receive queue
*
* Caller must hold socket lock
*/
-static void reject_rx_queue(struct sock *sk)
+static void tsk_rej_rx_queue(struct sock *sk)
{
struct sk_buff *buf;
+ u32 dnode;
+
+ while ((buf = __skb_dequeue(&sk->sk_receive_queue))) {
+ if (tipc_msg_reverse(buf, &dnode, TIPC_ERR_NO_PORT))
+ tipc_link_xmit(buf, dnode, 0);
+ }
+}
+
+/* tsk_peer_msg - verify if message was sent by connected port's peer
+ *
+ * Handles cases where the node's network address has changed from
+ * the default of <0.0.0> to its configured setting.
+ */
+static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
+{
+ u32 peer_port = tsk_peer_port(tsk);
+ u32 orig_node;
+ u32 peer_node;
+
+ if (unlikely(!tsk->connected))
+ return false;
- while ((buf = __skb_dequeue(&sk->sk_receive_queue)))
- tipc_reject_msg(buf, TIPC_ERR_NO_PORT);
+ if (unlikely(msg_origport(msg) != peer_port))
+ return false;
+
+ orig_node = msg_orignode(msg);
+ peer_node = tsk_peer_node(tsk);
+
+ if (likely(orig_node == peer_node))
+ return true;
+
+ if (!orig_node && (peer_node == tipc_own_addr))
+ return true;
+
+ if (!peer_node && (orig_node == tipc_own_addr))
+ return true;
+
+ return false;
}
/**
@@ -147,7 +296,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
socket_state state;
struct sock *sk;
struct tipc_sock *tsk;
- struct tipc_port *port;
+ struct tipc_msg *msg;
u32 ref;
/* Validate arguments */
@@ -182,32 +331,36 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
return -ENOMEM;
tsk = tipc_sk(sk);
- port = &tsk->port;
-
- ref = tipc_port_init(port, TIPC_LOW_IMPORTANCE);
+ ref = tipc_sk_ref_acquire(tsk);
if (!ref) {
- pr_warn("Socket registration failed, ref. table exhausted\n");
- sk_free(sk);
+ pr_warn("Socket create failed; reference table exhausted\n");
return -ENOMEM;
}
+ tsk->max_pkt = MAX_PKT_DEFAULT;
+ tsk->ref = ref;
+ INIT_LIST_HEAD(&tsk->publications);
+ msg = &tsk->phdr;
+ tipc_msg_init(msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG,
+ NAMED_H_SIZE, 0);
+ msg_set_origport(msg, ref);
/* Finish initializing socket data structures */
sock->ops = ops;
sock->state = state;
-
sock_init_data(sock, sk);
+ k_init_timer(&tsk->timer, (Handler)tipc_sk_timeout, ref);
sk->sk_backlog_rcv = tipc_backlog_rcv;
sk->sk_rcvbuf = sysctl_tipc_rmem[1];
sk->sk_data_ready = tipc_data_ready;
sk->sk_write_space = tipc_write_space;
tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
+ tsk->sent_unacked = 0;
atomic_set(&tsk->dupl_rcvcnt, 0);
- tipc_port_unlock(port);
if (sock->state == SS_READY) {
- tipc_port_set_unreturnable(port, true);
+ tsk_set_unreturnable(tsk, true);
if (sock->type == SOCK_DGRAM)
- tipc_port_set_unreliable(port, true);
+ tsk_set_unreliable(tsk, true);
}
return 0;
}
@@ -301,8 +454,8 @@ static int tipc_release(struct socket *sock)
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk;
- struct tipc_port *port;
struct sk_buff *buf;
+ u32 dnode;
/*
* Exit if socket isn't fully initialized (occurs when a failed accept()
@@ -312,13 +465,13 @@ static int tipc_release(struct socket *sock)
return 0;
tsk = tipc_sk(sk);
- port = &tsk->port;
lock_sock(sk);
/*
* Reject all unreceived messages, except on an active connection
* (which disconnects locally & sends a 'FIN+' to peer)
*/
+ dnode = tsk_peer_node(tsk);
while (sock->state != SS_DISCONNECTING) {
buf = __skb_dequeue(&sk->sk_receive_queue);
if (buf == NULL)
@@ -329,16 +482,27 @@ static int tipc_release(struct socket *sock)
if ((sock->state == SS_CONNECTING) ||
(sock->state == SS_CONNECTED)) {
sock->state = SS_DISCONNECTING;
- tipc_port_disconnect(port->ref);
+ tsk->connected = 0;
+ tipc_node_remove_conn(dnode, tsk->ref);
}
- tipc_reject_msg(buf, TIPC_ERR_NO_PORT);
+ if (tipc_msg_reverse(buf, &dnode, TIPC_ERR_NO_PORT))
+ tipc_link_xmit(buf, dnode, 0);
}
}
- /* Destroy TIPC port; also disconnects an active connection and
- * sends a 'FIN-' to peer.
- */
- tipc_port_destroy(port);
+ tipc_sk_withdraw(tsk, 0, NULL);
+ tipc_sk_ref_discard(tsk->ref);
+ k_cancel_timer(&tsk->timer);
+ if (tsk->connected) {
+ buf = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
+ SHORT_H_SIZE, 0, dnode, tipc_own_addr,
+ tsk_peer_port(tsk),
+ tsk->ref, TIPC_ERR_NO_PORT);
+ if (buf)
+ tipc_link_xmit(buf, dnode, tsk->ref);
+ tipc_node_remove_conn(dnode, tsk->ref);
+ }
+ k_term_timer(&tsk->timer);
/* Discard any remaining (connection-based) messages in receive queue */
__skb_queue_purge(&sk->sk_receive_queue);
@@ -346,7 +510,6 @@ static int tipc_release(struct socket *sock)
/* Reject any messages that accumulated in backlog queue */
sock->state = SS_DISCONNECTING;
release_sock(sk);
-
sock_put(sk);
sock->sk = NULL;
@@ -378,7 +541,7 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
lock_sock(sk);
if (unlikely(!uaddr_len)) {
- res = tipc_withdraw(&tsk->port, 0, NULL);
+ res = tipc_sk_withdraw(tsk, 0, NULL);
goto exit;
}
@@ -406,8 +569,8 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
}
res = (addr->scope > 0) ?
- tipc_publish(&tsk->port, addr->scope, &addr->addr.nameseq) :
- tipc_withdraw(&tsk->port, -addr->scope, &addr->addr.nameseq);
+ tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) :
+ tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq);
exit:
release_sock(sk);
return res;
@@ -437,10 +600,10 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
if ((sock->state != SS_CONNECTED) &&
((peer != 2) || (sock->state != SS_DISCONNECTING)))
return -ENOTCONN;
- addr->addr.id.ref = tipc_port_peerport(&tsk->port);
- addr->addr.id.node = tipc_port_peernode(&tsk->port);
+ addr->addr.id.ref = tsk_peer_port(tsk);
+ addr->addr.id.node = tsk_peer_node(tsk);
} else {
- addr->addr.id.ref = tsk->port.ref;
+ addr->addr.id.ref = tsk->ref;
addr->addr.id.node = tipc_own_addr;
}
@@ -504,12 +667,12 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
switch ((int)sock->state) {
case SS_UNCONNECTED:
- if (!tsk->port.congested)
+ if (!tsk->link_cong)
mask |= POLLOUT;
break;
case SS_READY:
case SS_CONNECTED:
- if (!tsk->port.congested)
+ if (!tsk->link_cong && !tsk_conn_cong(tsk))
mask |= POLLOUT;
/* fall thru' */
case SS_CONNECTING:
@@ -526,6 +689,136 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
}
/**
+ * tipc_sendmcast - send multicast message
+ * @sock: socket structure
+ * @seq: destination address
+ * @iov: message data to send
+ * @dsz: total length of message data
+ * @timeo: timeout to wait for wakeup
+ *
+ * Called from function tipc_sendmsg(), which has done all sanity checks
+ * Returns the number of bytes sent on success, or errno
+ */
+static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
+ struct iovec *iov, size_t dsz, long timeo)
+{
+ struct sock *sk = sock->sk;
+ struct tipc_msg *mhdr = &tipc_sk(sk)->phdr;
+ struct sk_buff *buf;
+ uint mtu;
+ int rc;
+
+ msg_set_type(mhdr, TIPC_MCAST_MSG);
+ msg_set_lookup_scope(mhdr, TIPC_CLUSTER_SCOPE);
+ msg_set_destport(mhdr, 0);
+ msg_set_destnode(mhdr, 0);
+ msg_set_nametype(mhdr, seq->type);
+ msg_set_namelower(mhdr, seq->lower);
+ msg_set_nameupper(mhdr, seq->upper);
+ msg_set_hdr_sz(mhdr, MCAST_H_SIZE);
+
+new_mtu:
+ mtu = tipc_bclink_get_mtu();
+ rc = tipc_msg_build(mhdr, iov, 0, dsz, mtu, &buf);
+ if (unlikely(rc < 0))
+ return rc;
+
+ do {
+ rc = tipc_bclink_xmit(buf);
+ if (likely(rc >= 0)) {
+ rc = dsz;
+ break;
+ }
+ if (rc == -EMSGSIZE)
+ goto new_mtu;
+ if (rc != -ELINKCONG)
+ break;
+ tipc_sk(sk)->link_cong = 1;
+ rc = tipc_wait_for_sndmsg(sock, &timeo);
+ if (rc)
+ kfree_skb_list(buf);
+ } while (!rc);
+ return rc;
+}
+
+/* tipc_sk_mcast_rcv - Deliver multicast message to all destination sockets
+ */
+void tipc_sk_mcast_rcv(struct sk_buff *buf)
+{
+ struct tipc_msg *msg = buf_msg(buf);
+ struct tipc_port_list dports = {0, NULL, };
+ struct tipc_port_list *item;
+ struct sk_buff *b;
+ uint i, last, dst = 0;
+ u32 scope = TIPC_CLUSTER_SCOPE;
+
+ if (in_own_node(msg_orignode(msg)))
+ scope = TIPC_NODE_SCOPE;
+
+ /* Create destination port list: */
+ tipc_nametbl_mc_translate(msg_nametype(msg),
+ msg_namelower(msg),
+ msg_nameupper(msg),
+ scope,
+ &dports);
+ last = dports.count;
+ if (!last) {
+ kfree_skb(buf);
+ return;
+ }
+
+ for (item = &dports; item; item = item->next) {
+ for (i = 0; i < PLSIZE && ++dst <= last; i++) {
+ b = (dst != last) ? skb_clone(buf, GFP_ATOMIC) : buf;
+ if (!b) {
+ pr_warn("Failed do clone mcast rcv buffer\n");
+ continue;
+ }
+ msg_set_destport(msg, item->ports[i]);
+ tipc_sk_rcv(b);
+ }
+ }
+ tipc_port_list_free(&dports);
+}
+
+/**
+ * tipc_sk_proto_rcv - receive a connection mng protocol message
+ * @tsk: receiving socket
+ * @dnode: node to send response message to, if any
+ * @buf: buffer containing protocol message
+ * Returns 0 (TIPC_OK) if message was consumed, 1 (TIPC_FWD_MSG) if
+ * (CONN_PROBE_REPLY) message should be forwarded.
+ */
+static int tipc_sk_proto_rcv(struct tipc_sock *tsk, u32 *dnode,
+ struct sk_buff *buf)
+{
+ struct tipc_msg *msg = buf_msg(buf);
+ int conn_cong;
+
+ /* Ignore if connection cannot be validated: */
+ if (!tsk_peer_msg(tsk, msg))
+ goto exit;
+
+ tsk->probing_state = TIPC_CONN_OK;
+
+ if (msg_type(msg) == CONN_ACK) {
+ conn_cong = tsk_conn_cong(tsk);
+ tsk->sent_unacked -= msg_msgcnt(msg);
+ if (conn_cong)
+ tsk->sk.sk_write_space(&tsk->sk);
+ } else if (msg_type(msg) == CONN_PROBE) {
+ if (!tipc_msg_reverse(buf, dnode, TIPC_OK))
+ return TIPC_OK;
+ msg_set_type(msg, CONN_PROBE_REPLY);
+ return TIPC_FWD_MSG;
+ }
+ /* Do nothing if msg_type() == CONN_PROBE_REPLY */
+exit:
+ kfree_skb(buf);
+ return TIPC_OK;
+}
+
+/**
* dest_name_check - verify user is permitted to send to specified port name
* @dest: destination address
* @m: descriptor for message to be sent
@@ -539,6 +832,8 @@ static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m)
{
struct tipc_cfg_msg_hdr hdr;
+ if (unlikely(dest->addrtype == TIPC_ADDR_ID))
+ return 0;
if (likely(dest->addr.name.name.type >= TIPC_RESERVED_TYPES))
return 0;
if (likely(dest->addr.name.name.type == TIPC_TOP_SRV))
@@ -575,19 +870,18 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p)
return sock_intr_errno(*timeo_p);
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- done = sk_wait_event(sk, timeo_p, !tsk->port.congested);
+ done = sk_wait_event(sk, timeo_p, !tsk->link_cong);
finish_wait(sk_sleep(sk), &wait);
} while (!done);
return 0;
}
-
/**
* tipc_sendmsg - send message in connectionless manner
* @iocb: if NULL, indicates that socket lock is already held
* @sock: socket structure
* @m: message to send
- * @total_len: length of message
+ * @dsz: amount of user data to be sent
*
* Message must have an destination specified explicitly.
* Used for SOCK_RDM and SOCK_DGRAM messages,
@@ -597,100 +891,122 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p)
* Returns the number of bytes sent on success, or errno otherwise
*/
static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t total_len)
+ struct msghdr *m, size_t dsz)
{
+ DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- struct tipc_port *port = &tsk->port;
- DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
- int needs_conn;
+ struct tipc_msg *mhdr = &tsk->phdr;
+ struct iovec *iov = m->msg_iov;
+ u32 dnode, dport;
+ struct sk_buff *buf;
+ struct tipc_name_seq *seq = &dest->addr.nameseq;
+ u32 mtu;
long timeo;
- int res = -EINVAL;
+ int rc = -EINVAL;
if (unlikely(!dest))
return -EDESTADDRREQ;
+
if (unlikely((m->msg_namelen < sizeof(*dest)) ||
(dest->family != AF_TIPC)))
return -EINVAL;
- if (total_len > TIPC_MAX_USER_MSG_SIZE)
+
+ if (dsz > TIPC_MAX_USER_MSG_SIZE)
return -EMSGSIZE;
if (iocb)
lock_sock(sk);
- needs_conn = (sock->state != SS_READY);
- if (unlikely(needs_conn)) {
+ if (unlikely(sock->state != SS_READY)) {
if (sock->state == SS_LISTENING) {
- res = -EPIPE;
+ rc = -EPIPE;
goto exit;
}
if (sock->state != SS_UNCONNECTED) {
- res = -EISCONN;
+ rc = -EISCONN;
goto exit;
}
- if (tsk->port.published) {
- res = -EOPNOTSUPP;
+ if (tsk->published) {
+ rc = -EOPNOTSUPP;
goto exit;
}
if (dest->addrtype == TIPC_ADDR_NAME) {
- tsk->port.conn_type = dest->addr.name.name.type;
- tsk->port.conn_instance = dest->addr.name.name.instance;
+ tsk->conn_type = dest->addr.name.name.type;
+ tsk->conn_instance = dest->addr.name.name.instance;
}
-
- /* Abort any pending connection attempts (very unlikely) */
- reject_rx_queue(sk);
}
+ rc = dest_name_check(dest, m);
+ if (rc)
+ goto exit;
timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
- do {
- if (dest->addrtype == TIPC_ADDR_NAME) {
- res = dest_name_check(dest, m);
- if (res)
- break;
- res = tipc_send2name(port,
- &dest->addr.name.name,
- dest->addr.name.domain,
- m->msg_iov,
- total_len);
- } else if (dest->addrtype == TIPC_ADDR_ID) {
- res = tipc_send2port(port,
- &dest->addr.id,
- m->msg_iov,
- total_len);
- } else if (dest->addrtype == TIPC_ADDR_MCAST) {
- if (needs_conn) {
- res = -EOPNOTSUPP;
- break;
- }
- res = dest_name_check(dest, m);
- if (res)
- break;
- res = tipc_port_mcast_xmit(port,
- &dest->addr.nameseq,
- m->msg_iov,
- total_len);
+
+ if (dest->addrtype == TIPC_ADDR_MCAST) {
+ rc = tipc_sendmcast(sock, seq, iov, dsz, timeo);
+ goto exit;
+ } else if (dest->addrtype == TIPC_ADDR_NAME) {
+ u32 type = dest->addr.name.name.type;
+ u32 inst = dest->addr.name.name.instance;
+ u32 domain = dest->addr.name.domain;
+
+ dnode = domain;
+ msg_set_type(mhdr, TIPC_NAMED_MSG);
+ msg_set_hdr_sz(mhdr, NAMED_H_SIZE);
+ msg_set_nametype(mhdr, type);
+ msg_set_nameinst(mhdr, inst);
+ msg_set_lookup_scope(mhdr, tipc_addr_scope(domain));
+ dport = tipc_nametbl_translate(type, inst, &dnode);
+ msg_set_destnode(mhdr, dnode);
+ msg_set_destport(mhdr, dport);
+ if (unlikely(!dport && !dnode)) {
+ rc = -EHOSTUNREACH;
+ goto exit;
}
- if (likely(res != -ELINKCONG)) {
- if (needs_conn && (res >= 0))
+ } else if (dest->addrtype == TIPC_ADDR_ID) {
+ dnode = dest->addr.id.node;
+ msg_set_type(mhdr, TIPC_DIRECT_MSG);
+ msg_set_lookup_scope(mhdr, 0);
+ msg_set_destnode(mhdr, dnode);
+ msg_set_destport(mhdr, dest->addr.id.ref);
+ msg_set_hdr_sz(mhdr, BASIC_H_SIZE);
+ }
+
+new_mtu:
+ mtu = tipc_node_get_mtu(dnode, tsk->ref);
+ rc = tipc_msg_build(mhdr, iov, 0, dsz, mtu, &buf);
+ if (rc < 0)
+ goto exit;
+
+ do {
+ TIPC_SKB_CB(buf)->wakeup_pending = tsk->link_cong;
+ rc = tipc_link_xmit(buf, dnode, tsk->ref);
+ if (likely(rc >= 0)) {
+ if (sock->state != SS_READY)
sock->state = SS_CONNECTING;
+ rc = dsz;
break;
}
- res = tipc_wait_for_sndmsg(sock, &timeo);
- if (res)
+ if (rc == -EMSGSIZE)
+ goto new_mtu;
+ if (rc != -ELINKCONG)
break;
- } while (1);
-
+ tsk->link_cong = 1;
+ rc = tipc_wait_for_sndmsg(sock, &timeo);
+ if (rc)
+ kfree_skb_list(buf);
+ } while (!rc);
exit:
if (iocb)
release_sock(sk);
- return res;
+
+ return rc;
}
static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p)
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- struct tipc_port *port = &tsk->port;
DEFINE_WAIT(wait);
int done;
@@ -709,37 +1025,48 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p)
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
done = sk_wait_event(sk, timeo_p,
- (!port->congested || !port->connected));
+ (!tsk->link_cong &&
+ !tsk_conn_cong(tsk)) ||
+ !tsk->connected);
finish_wait(sk_sleep(sk), &wait);
} while (!done);
return 0;
}
/**
- * tipc_send_packet - send a connection-oriented message
- * @iocb: if NULL, indicates that socket lock is already held
+ * tipc_send_stream - send stream-oriented data
+ * @iocb: (unused)
* @sock: socket structure
- * @m: message to send
- * @total_len: length of message
+ * @m: data to send
+ * @dsz: total length of data to be transmitted
*
- * Used for SOCK_SEQPACKET messages and SOCK_STREAM data.
+ * Used for SOCK_STREAM data.
*
- * Returns the number of bytes sent on success, or errno otherwise
+ * Returns the number of bytes sent on success (or partial success),
+ * or errno if no data sent
*/
-static int tipc_send_packet(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t total_len)
+static int tipc_send_stream(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *m, size_t dsz)
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_msg *mhdr = &tsk->phdr;
+ struct sk_buff *buf;
DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
- int res = -EINVAL;
+ u32 ref = tsk->ref;
+ int rc = -EINVAL;
long timeo;
+ u32 dnode;
+ uint mtu, send, sent = 0;
/* Handle implied connection establishment */
- if (unlikely(dest))
- return tipc_sendmsg(iocb, sock, m, total_len);
-
- if (total_len > TIPC_MAX_USER_MSG_SIZE)
+ if (unlikely(dest)) {
+ rc = tipc_sendmsg(iocb, sock, m, dsz);
+ if (dsz && (dsz == rc))
+ tsk->sent_unacked = 1;
+ return rc;
+ }
+ if (dsz > (uint)INT_MAX)
return -EMSGSIZE;
if (iocb)
@@ -747,148 +1074,88 @@ static int tipc_send_packet(struct kiocb *iocb, struct socket *sock,
if (unlikely(sock->state != SS_CONNECTED)) {
if (sock->state == SS_DISCONNECTING)
- res = -EPIPE;
+ rc = -EPIPE;
else
- res = -ENOTCONN;
+ rc = -ENOTCONN;
goto exit;
}
timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
+ dnode = tsk_peer_node(tsk);
+
+next:
+ mtu = tsk->max_pkt;
+ send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE);
+ rc = tipc_msg_build(mhdr, m->msg_iov, sent, send, mtu, &buf);
+ if (unlikely(rc < 0))
+ goto exit;
do {
- res = tipc_send(&tsk->port, m->msg_iov, total_len);
- if (likely(res != -ELINKCONG))
- break;
- res = tipc_wait_for_sndpkt(sock, &timeo);
- if (res)
- break;
- } while (1);
+ if (likely(!tsk_conn_cong(tsk))) {
+ rc = tipc_link_xmit(buf, dnode, ref);
+ if (likely(!rc)) {
+ tsk->sent_unacked++;
+ sent += send;
+ if (sent == dsz)
+ break;
+ goto next;
+ }
+ if (rc == -EMSGSIZE) {
+ tsk->max_pkt = tipc_node_get_mtu(dnode, ref);
+ goto next;
+ }
+ if (rc != -ELINKCONG)
+ break;
+ tsk->link_cong = 1;
+ }
+ rc = tipc_wait_for_sndpkt(sock, &timeo);
+ if (rc)
+ kfree_skb_list(buf);
+ } while (!rc);
exit:
if (iocb)
release_sock(sk);
- return res;
+ return sent ? sent : rc;
}
/**
- * tipc_send_stream - send stream-oriented data
- * @iocb: (unused)
+ * tipc_send_packet - send a connection-oriented message
+ * @iocb: if NULL, indicates that socket lock is already held
* @sock: socket structure
- * @m: data to send
- * @total_len: total length of data to be sent
+ * @m: message to send
+ * @dsz: length of data to be transmitted
*
- * Used for SOCK_STREAM data.
+ * Used for SOCK_SEQPACKET messages.
*
- * Returns the number of bytes sent on success (or partial success),
- * or errno if no data sent
+ * Returns the number of bytes sent on success, or errno otherwise
*/
-static int tipc_send_stream(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t total_len)
+static int tipc_send_packet(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *m, size_t dsz)
{
- struct sock *sk = sock->sk;
- struct tipc_sock *tsk = tipc_sk(sk);
- struct msghdr my_msg;
- struct iovec my_iov;
- struct iovec *curr_iov;
- int curr_iovlen;
- char __user *curr_start;
- u32 hdr_size;
- int curr_left;
- int bytes_to_send;
- int bytes_sent;
- int res;
-
- lock_sock(sk);
-
- /* Handle special cases where there is no connection */
- if (unlikely(sock->state != SS_CONNECTED)) {
- if (sock->state == SS_UNCONNECTED)
- res = tipc_send_packet(NULL, sock, m, total_len);
- else
- res = sock->state == SS_DISCONNECTING ? -EPIPE : -ENOTCONN;
- goto exit;
- }
-
- if (unlikely(m->msg_name)) {
- res = -EISCONN;
- goto exit;
- }
-
- if (total_len > (unsigned int)INT_MAX) {
- res = -EMSGSIZE;
- goto exit;
- }
-
- /*
- * Send each iovec entry using one or more messages
- *
- * Note: This algorithm is good for the most likely case
- * (i.e. one large iovec entry), but could be improved to pass sets
- * of small iovec entries into send_packet().
- */
- curr_iov = m->msg_iov;
- curr_iovlen = m->msg_iovlen;
- my_msg.msg_iov = &my_iov;
- my_msg.msg_iovlen = 1;
- my_msg.msg_flags = m->msg_flags;
- my_msg.msg_name = NULL;
- bytes_sent = 0;
-
- hdr_size = msg_hdr_sz(&tsk->port.phdr);
-
- while (curr_iovlen--) {
- curr_start = curr_iov->iov_base;
- curr_left = curr_iov->iov_len;
-
- while (curr_left) {
- bytes_to_send = tsk->port.max_pkt - hdr_size;
- if (bytes_to_send > TIPC_MAX_USER_MSG_SIZE)
- bytes_to_send = TIPC_MAX_USER_MSG_SIZE;
- if (curr_left < bytes_to_send)
- bytes_to_send = curr_left;
- my_iov.iov_base = curr_start;
- my_iov.iov_len = bytes_to_send;
- res = tipc_send_packet(NULL, sock, &my_msg,
- bytes_to_send);
- if (res < 0) {
- if (bytes_sent)
- res = bytes_sent;
- goto exit;
- }
- curr_left -= bytes_to_send;
- curr_start += bytes_to_send;
- bytes_sent += bytes_to_send;
- }
+ if (dsz > TIPC_MAX_USER_MSG_SIZE)
+ return -EMSGSIZE;
- curr_iov++;
- }
- res = bytes_sent;
-exit:
- release_sock(sk);
- return res;
+ return tipc_send_stream(iocb, sock, m, dsz);
}
-/**
- * auto_connect - complete connection setup to a remote port
- * @tsk: tipc socket structure
- * @msg: peer's response message
- *
- * Returns 0 on success, errno otherwise
+/* tipc_sk_finish_conn - complete the setup of a connection
*/
-static int auto_connect(struct tipc_sock *tsk, struct tipc_msg *msg)
+static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
+ u32 peer_node)
{
- struct tipc_port *port = &tsk->port;
- struct socket *sock = tsk->sk.sk_socket;
- struct tipc_portid peer;
-
- peer.ref = msg_origport(msg);
- peer.node = msg_orignode(msg);
-
- __tipc_port_connect(port->ref, port, &peer);
-
- if (msg_importance(msg) > TIPC_CRITICAL_IMPORTANCE)
- return -EINVAL;
- msg_set_importance(&port->phdr, (u32)msg_importance(msg));
- sock->state = SS_CONNECTED;
- return 0;
+ struct tipc_msg *msg = &tsk->phdr;
+
+ msg_set_destnode(msg, peer_node);
+ msg_set_destport(msg, peer_port);
+ msg_set_type(msg, TIPC_CONN_MSG);
+ msg_set_lookup_scope(msg, 0);
+ msg_set_hdr_sz(msg, SHORT_H_SIZE);
+
+ tsk->probing_interval = CONN_PROBING_INTERVAL;
+ tsk->probing_state = TIPC_CONN_OK;
+ tsk->connected = 1;
+ k_start_timer(&tsk->timer, tsk->probing_interval);
+ tipc_node_add_conn(peer_node, tsk->ref, peer_port);
+ tsk->max_pkt = tipc_node_get_mtu(peer_node, tsk->ref);
}
/**
@@ -915,17 +1182,17 @@ static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)
}
/**
- * anc_data_recv - optionally capture ancillary data for received message
+ * tipc_sk_anc_data_recv - optionally capture ancillary data for received message
* @m: descriptor for message info
* @msg: received message header
- * @tport: TIPC port associated with message
+ * @tsk: TIPC port associated with message
*
* Note: Ancillary data is not captured if not requested by receiver.
*
* Returns 0 if successful, otherwise errno
*/
-static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
- struct tipc_port *tport)
+static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
+ struct tipc_sock *tsk)
{
u32 anc_data[3];
u32 err;
@@ -968,10 +1235,10 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
anc_data[2] = msg_nameupper(msg);
break;
case TIPC_CONN_MSG:
- has_name = (tport->conn_type != 0);
- anc_data[0] = tport->conn_type;
- anc_data[1] = tport->conn_instance;
- anc_data[2] = tport->conn_instance;
+ has_name = (tsk->conn_type != 0);
+ anc_data[0] = tsk->conn_type;
+ anc_data[1] = tsk->conn_instance;
+ anc_data[2] = tsk->conn_instance;
break;
default:
has_name = 0;
@@ -985,6 +1252,24 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
return 0;
}
+static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack)
+{
+ struct sk_buff *buf = NULL;
+ struct tipc_msg *msg;
+ u32 peer_port = tsk_peer_port(tsk);
+ u32 dnode = tsk_peer_node(tsk);
+
+ if (!tsk->connected)
+ return;
+ buf = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, dnode,
+ tipc_own_addr, peer_port, tsk->ref, TIPC_OK);
+ if (!buf)
+ return;
+ msg = buf_msg(buf);
+ msg_set_msgcnt(msg, ack);
+ tipc_link_xmit(buf, dnode, msg_link_selector(msg));
+}
+
static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
{
struct sock *sk = sock->sk;
@@ -1035,7 +1320,6 @@ static int tipc_recvmsg(struct kiocb *iocb, struct socket *sock,
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- struct tipc_port *port = &tsk->port;
struct sk_buff *buf;
struct tipc_msg *msg;
long timeo;
@@ -1070,7 +1354,7 @@ restart:
/* Discard an empty non-errored message & try again */
if ((!sz) && (!err)) {
- advance_rx_queue(sk);
+ tsk_advance_rx_queue(sk);
goto restart;
}
@@ -1078,7 +1362,7 @@ restart:
set_orig_addr(m, msg);
/* Capture ancillary data (optional) */
- res = anc_data_recv(m, msg, port);
+ res = tipc_sk_anc_data_recv(m, msg, tsk);
if (res)
goto exit;
@@ -1104,9 +1388,11 @@ restart:
/* Consume received message (optional) */
if (likely(!(flags & MSG_PEEK))) {
if ((sock->state != SS_READY) &&
- (++port->conn_unacked >= TIPC_CONNACK_INTV))
- tipc_acknowledge(port->ref, port->conn_unacked);
- advance_rx_queue(sk);
+ (++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) {
+ tipc_sk_send_ack(tsk, tsk->rcv_unacked);
+ tsk->rcv_unacked = 0;
+ }
+ tsk_advance_rx_queue(sk);
}
exit:
release_sock(sk);
@@ -1130,7 +1416,6 @@ static int tipc_recv_stream(struct kiocb *iocb, struct socket *sock,
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- struct tipc_port *port = &tsk->port;
struct sk_buff *buf;
struct tipc_msg *msg;
long timeo;
@@ -1168,14 +1453,14 @@ restart:
/* Discard an empty non-errored message & try again */
if ((!sz) && (!err)) {
- advance_rx_queue(sk);
+ tsk_advance_rx_queue(sk);
goto restart;
}
/* Optionally capture sender's address & ancillary data of first msg */
if (sz_copied == 0) {
set_orig_addr(m, msg);
- res = anc_data_recv(m, msg, port);
+ res = tipc_sk_anc_data_recv(m, msg, tsk);
if (res)
goto exit;
}
@@ -1213,9 +1498,11 @@ restart:
/* Consume received message (optional) */
if (likely(!(flags & MSG_PEEK))) {
- if (unlikely(++port->conn_unacked >= TIPC_CONNACK_INTV))
- tipc_acknowledge(port->ref, port->conn_unacked);
- advance_rx_queue(sk);
+ if (unlikely(++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) {
+ tipc_sk_send_ack(tsk, tsk->rcv_unacked);
+ tsk->rcv_unacked = 0;
+ }
+ tsk_advance_rx_queue(sk);
}
/* Loop around if more data is required */
@@ -1269,18 +1556,14 @@ static void tipc_data_ready(struct sock *sk)
* @tsk: TIPC socket
* @msg: message
*
- * Returns TIPC error status code and socket error status code
- * once it encounters some errors
+ * Returns 0 (TIPC_OK) if everyting ok, -TIPC_ERR_NO_PORT otherwise
*/
-static u32 filter_connect(struct tipc_sock *tsk, struct sk_buff **buf)
+static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf)
{
struct sock *sk = &tsk->sk;
- struct tipc_port *port = &tsk->port;
struct socket *sock = sk->sk_socket;
struct tipc_msg *msg = buf_msg(*buf);
-
- u32 retval = TIPC_ERR_NO_PORT;
- int res;
+ int retval = -TIPC_ERR_NO_PORT;
if (msg_mcast(msg))
return retval;
@@ -1288,16 +1571,23 @@ static u32 filter_connect(struct tipc_sock *tsk, struct sk_buff **buf)
switch ((int)sock->state) {
case SS_CONNECTED:
/* Accept only connection-based messages sent by peer */
- if (msg_connected(msg) && tipc_port_peer_msg(port, msg)) {
+ if (tsk_peer_msg(tsk, msg)) {
if (unlikely(msg_errcode(msg))) {
sock->state = SS_DISCONNECTING;
- __tipc_port_disconnect(port);
+ tsk->connected = 0;
+ /* let timer expire on it's own */
+ tipc_node_remove_conn(tsk_peer_node(tsk),
+ tsk->ref);
}
retval = TIPC_OK;
}
break;
case SS_CONNECTING:
/* Accept only ACK or NACK message */
+
+ if (unlikely(!msg_connected(msg)))
+ break;
+
if (unlikely(msg_errcode(msg))) {
sock->state = SS_DISCONNECTING;
sk->sk_err = ECONNREFUSED;
@@ -1305,17 +1595,17 @@ static u32 filter_connect(struct tipc_sock *tsk, struct sk_buff **buf)
break;
}
- if (unlikely(!msg_connected(msg)))
- break;
-
- res = auto_connect(tsk, msg);
- if (res) {
+ if (unlikely(msg_importance(msg) > TIPC_CRITICAL_IMPORTANCE)) {
sock->state = SS_DISCONNECTING;
- sk->sk_err = -res;
+ sk->sk_err = EINVAL;
retval = TIPC_OK;
break;
}
+ tipc_sk_finish_conn(tsk, msg_origport(msg), msg_orignode(msg));
+ msg_set_importance(&tsk->phdr, msg_importance(msg));
+ sock->state = SS_CONNECTED;
+
/* If an incoming message is an 'ACK-', it should be
* discarded here because it doesn't contain useful
* data. In addition, we should try to wake up
@@ -1382,32 +1672,44 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf)
*
* Called with socket lock already taken; port lock may also be taken.
*
- * Returns TIPC error status code (TIPC_OK if message is not to be rejected)
+ * Returns 0 (TIPC_OK) if message was consumed, -TIPC error code if message
+ * to be rejected, 1 (TIPC_FWD_MSG) if (CONN_MANAGER) message to be forwarded
*/
-static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
+static int filter_rcv(struct sock *sk, struct sk_buff *buf)
{
struct socket *sock = sk->sk_socket;
struct tipc_sock *tsk = tipc_sk(sk);
struct tipc_msg *msg = buf_msg(buf);
unsigned int limit = rcvbuf_limit(sk, buf);
- u32 res = TIPC_OK;
+ u32 onode;
+ int rc = TIPC_OK;
+
+ if (unlikely(msg_user(msg) == CONN_MANAGER))
+ return tipc_sk_proto_rcv(tsk, &onode, buf);
+
+ if (unlikely(msg_user(msg) == SOCK_WAKEUP)) {
+ kfree_skb(buf);
+ tsk->link_cong = 0;
+ sk->sk_write_space(sk);
+ return TIPC_OK;
+ }
/* Reject message if it is wrong sort of message for socket */
if (msg_type(msg) > TIPC_DIRECT_MSG)
- return TIPC_ERR_NO_PORT;
+ return -TIPC_ERR_NO_PORT;
if (sock->state == SS_READY) {
if (msg_connected(msg))
- return TIPC_ERR_NO_PORT;
+ return -TIPC_ERR_NO_PORT;
} else {
- res = filter_connect(tsk, &buf);
- if (res != TIPC_OK || buf == NULL)
- return res;
+ rc = filter_connect(tsk, &buf);
+ if (rc != TIPC_OK || buf == NULL)
+ return rc;
}
/* Reject message if there isn't room to queue it */
if (sk_rmem_alloc_get(sk) + buf->truesize >= limit)
- return TIPC_ERR_OVERLOAD;
+ return -TIPC_ERR_OVERLOAD;
/* Enqueue message */
TIPC_SKB_CB(buf)->handle = NULL;
@@ -1429,16 +1731,23 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
*/
static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *buf)
{
- u32 res;
+ int rc;
+ u32 onode;
struct tipc_sock *tsk = tipc_sk(sk);
uint truesize = buf->truesize;
- res = filter_rcv(sk, buf);
- if (unlikely(res))
- tipc_reject_msg(buf, res);
+ rc = filter_rcv(sk, buf);
+
+ if (likely(!rc)) {
+ if (atomic_read(&tsk->dupl_rcvcnt) < TIPC_CONN_OVERLOAD_LIMIT)
+ atomic_add(truesize, &tsk->dupl_rcvcnt);
+ return 0;
+ }
+
+ if ((rc < 0) && !tipc_msg_reverse(buf, &onode, -rc))
+ return 0;
- if (atomic_read(&tsk->dupl_rcvcnt) < TIPC_CONN_OVERLOAD_LIMIT)
- atomic_add(truesize, &tsk->dupl_rcvcnt);
+ tipc_link_xmit(buf, onode, 0);
return 0;
}
@@ -1452,49 +1761,42 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *buf)
int tipc_sk_rcv(struct sk_buff *buf)
{
struct tipc_sock *tsk;
- struct tipc_port *port;
struct sock *sk;
u32 dport = msg_destport(buf_msg(buf));
- int err = TIPC_OK;
+ int rc = TIPC_OK;
uint limit;
+ u32 dnode;
- /* Forward unresolved named message */
- if (unlikely(!dport)) {
- tipc_net_route_msg(buf);
- return 0;
- }
-
- /* Validate destination */
- port = tipc_port_lock(dport);
- if (unlikely(!port)) {
- err = TIPC_ERR_NO_PORT;
+ /* Validate destination and message */
+ tsk = tipc_sk_get(dport);
+ if (unlikely(!tsk)) {
+ rc = tipc_msg_eval(buf, &dnode);
goto exit;
}
-
- tsk = tipc_port_to_sock(port);
sk = &tsk->sk;
/* Queue message */
bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) {
- err = filter_rcv(sk, buf);
+ rc = filter_rcv(sk, buf);
} else {
if (sk->sk_backlog.len == 0)
atomic_set(&tsk->dupl_rcvcnt, 0);
limit = rcvbuf_limit(sk, buf) + atomic_read(&tsk->dupl_rcvcnt);
if (sk_add_backlog(sk, buf, limit))
- err = TIPC_ERR_OVERLOAD;
+ rc = -TIPC_ERR_OVERLOAD;
}
-
bh_unlock_sock(sk);
- tipc_port_unlock(port);
-
- if (likely(!err))
+ tipc_sk_put(tsk);
+ if (likely(!rc))
return 0;
exit:
- tipc_reject_msg(buf, err);
- return -EHOSTUNREACH;
+ if ((rc < 0) && !tipc_msg_reverse(buf, &dnode, -rc))
+ return -EHOSTUNREACH;
+
+ tipc_link_xmit(buf, dnode, 0);
+ return (rc < 0) ? -EHOSTUNREACH : 0;
}
static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
@@ -1673,10 +1975,8 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
{
struct sock *new_sk, *sk = sock->sk;
struct sk_buff *buf;
- struct tipc_port *new_port;
+ struct tipc_sock *new_tsock;
struct tipc_msg *msg;
- struct tipc_portid peer;
- u32 new_ref;
long timeo;
int res;
@@ -1698,8 +1998,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
goto exit;
new_sk = new_sock->sk;
- new_port = &tipc_sk(new_sk)->port;
- new_ref = new_port->ref;
+ new_tsock = tipc_sk(new_sk);
msg = buf_msg(buf);
/* we lock on new_sk; but lockdep sees the lock on sk */
@@ -1709,18 +2008,16 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
* Reject any stray messages received by new socket
* before the socket lock was taken (very, very unlikely)
*/
- reject_rx_queue(new_sk);
+ tsk_rej_rx_queue(new_sk);
/* Connect new socket to it's peer */
- peer.ref = msg_origport(msg);
- peer.node = msg_orignode(msg);
- tipc_port_connect(new_ref, &peer);
+ tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg));
new_sock->state = SS_CONNECTED;
- tipc_port_set_importance(new_port, msg_importance(msg));
+ tsk_set_importance(new_tsock, msg_importance(msg));
if (msg_named(msg)) {
- new_port->conn_type = msg_nametype(msg);
- new_port->conn_instance = msg_nameinst(msg);
+ new_tsock->conn_type = msg_nametype(msg);
+ new_tsock->conn_instance = msg_nameinst(msg);
}
/*
@@ -1730,7 +2027,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
if (!msg_data_sz(msg)) {
struct msghdr m = {NULL,};
- advance_rx_queue(sk);
+ tsk_advance_rx_queue(sk);
tipc_send_packet(NULL, new_sock, &m, 0);
} else {
__skb_dequeue(&sk->sk_receive_queue);
@@ -1756,8 +2053,8 @@ static int tipc_shutdown(struct socket *sock, int how)
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- struct tipc_port *port = &tsk->port;
struct sk_buff *buf;
+ u32 dnode;
int res;
if (how != SHUT_RDWR)
@@ -1777,14 +2074,21 @@ restart:
kfree_skb(buf);
goto restart;
}
- tipc_port_disconnect(port->ref);
- tipc_reject_msg(buf, TIPC_CONN_SHUTDOWN);
+ if (tipc_msg_reverse(buf, &dnode, TIPC_CONN_SHUTDOWN))
+ tipc_link_xmit(buf, dnode, tsk->ref);
+ tipc_node_remove_conn(dnode, tsk->ref);
} else {
- tipc_port_shutdown(port->ref);
+ dnode = tsk_peer_node(tsk);
+ buf = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
+ TIPC_CONN_MSG, SHORT_H_SIZE,
+ 0, dnode, tipc_own_addr,
+ tsk_peer_port(tsk),
+ tsk->ref, TIPC_CONN_SHUTDOWN);
+ tipc_link_xmit(buf, dnode, tsk->ref);
}
-
+ tsk->connected = 0;
sock->state = SS_DISCONNECTING;
-
+ tipc_node_remove_conn(dnode, tsk->ref);
/* fall through */
case SS_DISCONNECTING:
@@ -1805,6 +2109,432 @@ restart:
return res;
}
+static void tipc_sk_timeout(unsigned long ref)
+{
+ struct tipc_sock *tsk;
+ struct sock *sk;
+ struct sk_buff *buf = NULL;
+ u32 peer_port, peer_node;
+
+ tsk = tipc_sk_get(ref);
+ if (!tsk)
+ return;
+
+ sk = &tsk->sk;
+ bh_lock_sock(sk);
+ if (!tsk->connected) {
+ bh_unlock_sock(sk);
+ goto exit;
+ }
+ peer_port = tsk_peer_port(tsk);
+ peer_node = tsk_peer_node(tsk);
+
+ if (tsk->probing_state == TIPC_CONN_PROBING) {
+ /* Previous probe not answered -> self abort */
+ buf = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
+ SHORT_H_SIZE, 0, tipc_own_addr,
+ peer_node, ref, peer_port,
+ TIPC_ERR_NO_PORT);
+ } else {
+ buf = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE,
+ 0, peer_node, tipc_own_addr,
+ peer_port, ref, TIPC_OK);
+ tsk->probing_state = TIPC_CONN_PROBING;
+ k_start_timer(&tsk->timer, tsk->probing_interval);
+ }
+ bh_unlock_sock(sk);
+ if (buf)
+ tipc_link_xmit(buf, peer_node, ref);
+exit:
+ tipc_sk_put(tsk);
+}
+
+static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
+ struct tipc_name_seq const *seq)
+{
+ struct publication *publ;
+ u32 key;
+
+ if (tsk->connected)
+ return -EINVAL;
+ key = tsk->ref + tsk->pub_count + 1;
+ if (key == tsk->ref)
+ return -EADDRINUSE;
+
+ publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper,
+ scope, tsk->ref, key);
+ if (unlikely(!publ))
+ return -EINVAL;
+
+ list_add(&publ->pport_list, &tsk->publications);
+ tsk->pub_count++;
+ tsk->published = 1;
+ return 0;
+}
+
+static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
+ struct tipc_name_seq const *seq)
+{
+ struct publication *publ;
+ struct publication *safe;
+ int rc = -EINVAL;
+
+ list_for_each_entry_safe(publ, safe, &tsk->publications, pport_list) {
+ if (seq) {
+ if (publ->scope != scope)
+ continue;
+ if (publ->type != seq->type)
+ continue;
+ if (publ->lower != seq->lower)
+ continue;
+ if (publ->upper != seq->upper)
+ break;
+ tipc_nametbl_withdraw(publ->type, publ->lower,
+ publ->ref, publ->key);
+ rc = 0;
+ break;
+ }
+ tipc_nametbl_withdraw(publ->type, publ->lower,
+ publ->ref, publ->key);
+ rc = 0;
+ }
+ if (list_empty(&tsk->publications))
+ tsk->published = 0;
+ return rc;
+}
+
+static int tipc_sk_show(struct tipc_sock *tsk, char *buf,
+ int len, int full_id)
+{
+ struct publication *publ;
+ int ret;
+
+ if (full_id)
+ ret = tipc_snprintf(buf, len, "<%u.%u.%u:%u>:",
+ tipc_zone(tipc_own_addr),
+ tipc_cluster(tipc_own_addr),
+ tipc_node(tipc_own_addr), tsk->ref);
+ else
+ ret = tipc_snprintf(buf, len, "%-10u:", tsk->ref);
+
+ if (tsk->connected) {
+ u32 dport = tsk_peer_port(tsk);
+ u32 destnode = tsk_peer_node(tsk);
+
+ ret += tipc_snprintf(buf + ret, len - ret,
+ " connected to <%u.%u.%u:%u>",
+ tipc_zone(destnode),
+ tipc_cluster(destnode),
+ tipc_node(destnode), dport);
+ if (tsk->conn_type != 0)
+ ret += tipc_snprintf(buf + ret, len - ret,
+ " via {%u,%u}", tsk->conn_type,
+ tsk->conn_instance);
+ } else if (tsk->published) {
+ ret += tipc_snprintf(buf + ret, len - ret, " bound to");
+ list_for_each_entry(publ, &tsk->publications, pport_list) {
+ if (publ->lower == publ->upper)
+ ret += tipc_snprintf(buf + ret, len - ret,
+ " {%u,%u}", publ->type,
+ publ->lower);
+ else
+ ret += tipc_snprintf(buf + ret, len - ret,
+ " {%u,%u,%u}", publ->type,
+ publ->lower, publ->upper);
+ }
+ }
+ ret += tipc_snprintf(buf + ret, len - ret, "\n");
+ return ret;
+}
+
+struct sk_buff *tipc_sk_socks_show(void)
+{
+ struct sk_buff *buf;
+ struct tlv_desc *rep_tlv;
+ char *pb;
+ int pb_len;
+ struct tipc_sock *tsk;
+ int str_len = 0;
+ u32 ref = 0;
+
+ buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN));
+ if (!buf)
+ return NULL;
+ rep_tlv = (struct tlv_desc *)buf->data;
+ pb = TLV_DATA(rep_tlv);
+ pb_len = ULTRA_STRING_MAX_LEN;
+
+ tsk = tipc_sk_get_next(&ref);
+ for (; tsk; tsk = tipc_sk_get_next(&ref)) {
+ lock_sock(&tsk->sk);
+ str_len += tipc_sk_show(tsk, pb + str_len,
+ pb_len - str_len, 0);
+ release_sock(&tsk->sk);
+ tipc_sk_put(tsk);
+ }
+ str_len += 1; /* for "\0" */
+ skb_put(buf, TLV_SPACE(str_len));
+ TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
+
+ return buf;
+}
+
+/* tipc_sk_reinit: set non-zero address in all existing sockets
+ * when we go from standalone to network mode.
+ */
+void tipc_sk_reinit(void)
+{
+ struct tipc_msg *msg;
+ u32 ref = 0;
+ struct tipc_sock *tsk = tipc_sk_get_next(&ref);
+
+ for (; tsk; tsk = tipc_sk_get_next(&ref)) {
+ lock_sock(&tsk->sk);
+ msg = &tsk->phdr;
+ msg_set_prevnode(msg, tipc_own_addr);
+ msg_set_orignode(msg, tipc_own_addr);
+ release_sock(&tsk->sk);
+ tipc_sk_put(tsk);
+ }
+}
+
+/**
+ * struct reference - TIPC socket reference entry
+ * @tsk: pointer to socket associated with reference entry
+ * @ref: reference value for socket (combines instance & array index info)
+ */
+struct reference {
+ struct tipc_sock *tsk;
+ u32 ref;
+};
+
+/**
+ * struct tipc_ref_table - table of TIPC socket reference entries
+ * @entries: pointer to array of reference entries
+ * @capacity: array index of first unusable entry
+ * @init_point: array index of first uninitialized entry
+ * @first_free: array index of first unused socket reference entry
+ * @last_free: array index of last unused socket reference entry
+ * @index_mask: bitmask for array index portion of reference values
+ * @start_mask: initial value for instance value portion of reference values
+ */
+struct ref_table {
+ struct reference *entries;
+ u32 capacity;
+ u32 init_point;
+ u32 first_free;
+ u32 last_free;
+ u32 index_mask;
+ u32 start_mask;
+};
+
+/* Socket reference table consists of 2**N entries.
+ *
+ * State Socket ptr Reference
+ * ----- ---------- ---------
+ * In use non-NULL XXXX|own index
+ * (XXXX changes each time entry is acquired)
+ * Free NULL YYYY|next free index
+ * (YYYY is one more than last used XXXX)
+ * Uninitialized NULL 0
+ *
+ * Entry 0 is not used; this allows index 0 to denote the end of the free list.
+ *
+ * Note that a reference value of 0 does not necessarily indicate that an
+ * entry is uninitialized, since the last entry in the free list could also
+ * have a reference value of 0 (although this is unlikely).
+ */
+
+static struct ref_table tipc_ref_table;
+
+static DEFINE_RWLOCK(ref_table_lock);
+
+/**
+ * tipc_ref_table_init - create reference table for sockets
+ */
+int tipc_sk_ref_table_init(u32 req_sz, u32 start)
+{
+ struct reference *table;
+ u32 actual_sz;
+
+ /* account for unused entry, then round up size to a power of 2 */
+
+ req_sz++;
+ for (actual_sz = 16; actual_sz < req_sz; actual_sz <<= 1) {
+ /* do nothing */
+ };
+
+ /* allocate table & mark all entries as uninitialized */
+ table = vzalloc(actual_sz * sizeof(struct reference));
+ if (table == NULL)
+ return -ENOMEM;
+
+ tipc_ref_table.entries = table;
+ tipc_ref_table.capacity = req_sz;
+ tipc_ref_table.init_point = 1;
+ tipc_ref_table.first_free = 0;
+ tipc_ref_table.last_free = 0;
+ tipc_ref_table.index_mask = actual_sz - 1;
+ tipc_ref_table.start_mask = start & ~tipc_ref_table.index_mask;
+
+ return 0;
+}
+
+/**
+ * tipc_ref_table_stop - destroy reference table for sockets
+ */
+void tipc_sk_ref_table_stop(void)
+{
+ if (!tipc_ref_table.entries)
+ return;
+ vfree(tipc_ref_table.entries);
+ tipc_ref_table.entries = NULL;
+}
+
+/* tipc_ref_acquire - create reference to a socket
+ *
+ * Register an socket pointer in the reference table.
+ * Returns a unique reference value that is used from then on to retrieve the
+ * socket pointer, or to determine if the socket has been deregistered.
+ */
+u32 tipc_sk_ref_acquire(struct tipc_sock *tsk)
+{
+ u32 index;
+ u32 index_mask;
+ u32 next_plus_upper;
+ u32 ref = 0;
+ struct reference *entry;
+
+ if (unlikely(!tsk)) {
+ pr_err("Attempt to acquire ref. to non-existent obj\n");
+ return 0;
+ }
+ if (unlikely(!tipc_ref_table.entries)) {
+ pr_err("Ref. table not found in acquisition attempt\n");
+ return 0;
+ }
+
+ /* Take a free entry, if available; otherwise initialize a new one */
+ write_lock_bh(&ref_table_lock);
+ index = tipc_ref_table.first_free;
+ entry = &tipc_ref_table.entries[index];
+
+ if (likely(index)) {
+ index = tipc_ref_table.first_free;
+ entry = &tipc_ref_table.entries[index];
+ index_mask = tipc_ref_table.index_mask;
+ next_plus_upper = entry->ref;
+ tipc_ref_table.first_free = next_plus_upper & index_mask;
+ ref = (next_plus_upper & ~index_mask) + index;
+ entry->tsk = tsk;
+ } else if (tipc_ref_table.init_point < tipc_ref_table.capacity) {
+ index = tipc_ref_table.init_point++;
+ entry = &tipc_ref_table.entries[index];
+ ref = tipc_ref_table.start_mask + index;
+ }
+
+ if (ref) {
+ entry->ref = ref;
+ entry->tsk = tsk;
+ }
+ write_unlock_bh(&ref_table_lock);
+ return ref;
+}
+
+/* tipc_sk_ref_discard - invalidate reference to an socket
+ *
+ * Disallow future references to an socket and free up the entry for re-use.
+ */
+void tipc_sk_ref_discard(u32 ref)
+{
+ struct reference *entry;
+ u32 index;
+ u32 index_mask;
+
+ if (unlikely(!tipc_ref_table.entries)) {
+ pr_err("Ref. table not found during discard attempt\n");
+ return;
+ }
+
+ index_mask = tipc_ref_table.index_mask;
+ index = ref & index_mask;
+ entry = &tipc_ref_table.entries[index];
+
+ write_lock_bh(&ref_table_lock);
+
+ if (unlikely(!entry->tsk)) {
+ pr_err("Attempt to discard ref. to non-existent socket\n");
+ goto exit;
+ }
+ if (unlikely(entry->ref != ref)) {
+ pr_err("Attempt to discard non-existent reference\n");
+ goto exit;
+ }
+
+ /* Mark entry as unused; increment instance part of entry's
+ * reference to invalidate any subsequent references
+ */
+
+ entry->tsk = NULL;
+ entry->ref = (ref & ~index_mask) + (index_mask + 1);
+
+ /* Append entry to free entry list */
+ if (unlikely(tipc_ref_table.first_free == 0))
+ tipc_ref_table.first_free = index;
+ else
+ tipc_ref_table.entries[tipc_ref_table.last_free].ref |= index;
+ tipc_ref_table.last_free = index;
+exit:
+ write_unlock_bh(&ref_table_lock);
+}
+
+/* tipc_sk_get - find referenced socket and return pointer to it
+ */
+struct tipc_sock *tipc_sk_get(u32 ref)
+{
+ struct reference *entry;
+ struct tipc_sock *tsk;
+
+ if (unlikely(!tipc_ref_table.entries))
+ return NULL;
+ read_lock_bh(&ref_table_lock);
+ entry = &tipc_ref_table.entries[ref & tipc_ref_table.index_mask];
+ tsk = entry->tsk;
+ if (likely(tsk && (entry->ref == ref)))
+ sock_hold(&tsk->sk);
+ else
+ tsk = NULL;
+ read_unlock_bh(&ref_table_lock);
+ return tsk;
+}
+
+/* tipc_sk_get_next - lock & return next socket after referenced one
+*/
+struct tipc_sock *tipc_sk_get_next(u32 *ref)
+{
+ struct reference *entry;
+ struct tipc_sock *tsk = NULL;
+ uint index = *ref & tipc_ref_table.index_mask;
+
+ read_lock_bh(&ref_table_lock);
+ while (++index < tipc_ref_table.capacity) {
+ entry = &tipc_ref_table.entries[index];
+ if (!entry->tsk)
+ continue;
+ tsk = entry->tsk;
+ sock_hold(&tsk->sk);
+ *ref = entry->ref;
+ break;
+ }
+ read_unlock_bh(&ref_table_lock);
+ return tsk;
+}
+
+static void tipc_sk_put(struct tipc_sock *tsk)
+{
+ sock_put(&tsk->sk);
+}
+
/**
* tipc_setsockopt - set socket option
* @sock: socket structure
@@ -1823,7 +2553,6 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- struct tipc_port *port = &tsk->port;
u32 value;
int res;
@@ -1841,16 +2570,16 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
switch (opt) {
case TIPC_IMPORTANCE:
- tipc_port_set_importance(port, value);
+ res = tsk_set_importance(tsk, value);
break;
case TIPC_SRC_DROPPABLE:
if (sock->type != SOCK_STREAM)
- tipc_port_set_unreliable(port, value);
+ tsk_set_unreliable(tsk, value);
else
res = -ENOPROTOOPT;
break;
case TIPC_DEST_DROPPABLE:
- tipc_port_set_unreturnable(port, value);
+ tsk_set_unreturnable(tsk, value);
break;
case TIPC_CONN_TIMEOUT:
tipc_sk(sk)->conn_timeout = value;
@@ -1883,7 +2612,6 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- struct tipc_port *port = &tsk->port;
int len;
u32 value;
int res;
@@ -1900,16 +2628,16 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
switch (opt) {
case TIPC_IMPORTANCE:
- value = tipc_port_importance(port);
+ value = tsk_importance(tsk);
break;
case TIPC_SRC_DROPPABLE:
- value = tipc_port_unreliable(port);
+ value = tsk_unreliable(tsk);
break;
case TIPC_DEST_DROPPABLE:
- value = tipc_port_unreturnable(port);
+ value = tsk_unreturnable(tsk);
break;
case TIPC_CONN_TIMEOUT:
- value = tipc_sk(sk)->conn_timeout;
+ value = tsk->conn_timeout;
/* no need to set "res", since already 0 at this point */
break;
case TIPC_NODE_RECVQ_DEPTH:
@@ -1936,7 +2664,7 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
return put_user(sizeof(value), ol);
}
-int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg)
+static int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg)
{
struct tipc_sioc_ln_req lnr;
void __user *argp = (void __user *)arg;
@@ -1952,7 +2680,6 @@ int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg)
return 0;
}
return -EADDRNOTAVAIL;
- break;
default:
return -ENOIOCTLCMD;
}
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index 3afcd2a70b31..baa43d03901e 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -35,40 +35,17 @@
#ifndef _TIPC_SOCK_H
#define _TIPC_SOCK_H
-#include "port.h"
#include <net/sock.h>
-/**
- * struct tipc_sock - TIPC socket structure
- * @sk: socket - interacts with 'port' and with user via the socket API
- * @port: port - interacts with 'sk' and with the rest of the TIPC stack
- * @peer_name: the peer of the connection, if any
- * @conn_timeout: the time we can wait for an unresponded setup request
- * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
- */
-
-struct tipc_sock {
- struct sock sk;
- struct tipc_port port;
- unsigned int conn_timeout;
- atomic_t dupl_rcvcnt;
-};
-
-static inline struct tipc_sock *tipc_sk(const struct sock *sk)
-{
- return container_of(sk, struct tipc_sock, sk);
-}
-
-static inline struct tipc_sock *tipc_port_to_sock(const struct tipc_port *port)
-{
- return container_of(port, struct tipc_sock, port);
-}
-
-static inline void tipc_sock_wakeup(struct tipc_sock *tsk)
-{
- tsk->sk.sk_write_space(&tsk->sk);
-}
-
+#define TIPC_CONNACK_INTV 256
+#define TIPC_FLOWCTRL_WIN (TIPC_CONNACK_INTV * 2)
+#define TIPC_CONN_OVERLOAD_LIMIT ((TIPC_FLOWCTRL_WIN * 2 + 1) * \
+ SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE))
int tipc_sk_rcv(struct sk_buff *buf);
+struct sk_buff *tipc_sk_socks_show(void);
+void tipc_sk_mcast_rcv(struct sk_buff *buf);
+void tipc_sk_reinit(void);
+int tipc_sk_ref_table_init(u32 requested_size, u32 start);
+void tipc_sk_ref_table_stop(void);
#endif
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 642437231ad5..31b5cb232a43 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -36,7 +36,6 @@
#include "core.h"
#include "name_table.h"
-#include "port.h"
#include "subscr.h"
/**
diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c
index f3fef93325a8..1a779b1e8510 100644
--- a/net/tipc/sysctl.c
+++ b/net/tipc/sysctl.c
@@ -47,6 +47,13 @@ static struct ctl_table tipc_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "named_timeout",
+ .data = &sysctl_tipc_named_timeout,
+ .maxlen = sizeof(sysctl_tipc_named_timeout),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
{}
};
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 9bc73f87f64a..99f7012b23b9 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -258,7 +258,7 @@ static void inc_inflight_move_tail(struct unix_sock *u)
list_move_tail(&u->link, &gc_candidates);
}
-static bool gc_in_progress = false;
+static bool gc_in_progress;
#define UNIX_INFLIGHT_TRIGGER_GC 16000
void wait_for_unix_gc(void)
diff --git a/net/wimax/id-table.c b/net/wimax/id-table.c
index 72273abfcb16..a21508d11036 100644
--- a/net/wimax/id-table.c
+++ b/net/wimax/id-table.c
@@ -137,7 +137,7 @@ void wimax_id_table_release(void)
#endif
spin_lock(&wimax_id_table_lock);
list_for_each_entry(wimax_dev, &wimax_id_table, id_table_node) {
- printk(KERN_ERR "BUG: %s wimax_dev %p ifindex %d not cleared\n",
+ pr_err("BUG: %s wimax_dev %p ifindex %d not cleared\n",
__func__, wimax_dev, wimax_dev->net_dev->ifindex);
WARN_ON(1);
}
diff --git a/net/wimax/op-msg.c b/net/wimax/op-msg.c
index c278b3356f75..54aa146930bd 100644
--- a/net/wimax/op-msg.c
+++ b/net/wimax/op-msg.c
@@ -189,7 +189,7 @@ const void *wimax_msg_data_len(struct sk_buff *msg, size_t *size)
nla = nlmsg_find_attr(nlh, sizeof(struct genlmsghdr),
WIMAX_GNL_MSG_DATA);
if (nla == NULL) {
- printk(KERN_ERR "Cannot find attribute WIMAX_GNL_MSG_DATA\n");
+ pr_err("Cannot find attribute WIMAX_GNL_MSG_DATA\n");
return NULL;
}
*size = nla_len(nla);
@@ -211,7 +211,7 @@ const void *wimax_msg_data(struct sk_buff *msg)
nla = nlmsg_find_attr(nlh, sizeof(struct genlmsghdr),
WIMAX_GNL_MSG_DATA);
if (nla == NULL) {
- printk(KERN_ERR "Cannot find attribute WIMAX_GNL_MSG_DATA\n");
+ pr_err("Cannot find attribute WIMAX_GNL_MSG_DATA\n");
return NULL;
}
return nla_data(nla);
@@ -232,7 +232,7 @@ ssize_t wimax_msg_len(struct sk_buff *msg)
nla = nlmsg_find_attr(nlh, sizeof(struct genlmsghdr),
WIMAX_GNL_MSG_DATA);
if (nla == NULL) {
- printk(KERN_ERR "Cannot find attribute WIMAX_GNL_MSG_DATA\n");
+ pr_err("Cannot find attribute WIMAX_GNL_MSG_DATA\n");
return -EINVAL;
}
return nla_len(nla);
@@ -343,8 +343,7 @@ int wimax_gnl_doit_msg_from_user(struct sk_buff *skb, struct genl_info *info)
d_fnstart(3, NULL, "(skb %p info %p)\n", skb, info);
result = -ENODEV;
if (info->attrs[WIMAX_GNL_MSG_IFIDX] == NULL) {
- printk(KERN_ERR "WIMAX_GNL_MSG_FROM_USER: can't find IFIDX "
- "attribute\n");
+ pr_err("WIMAX_GNL_MSG_FROM_USER: can't find IFIDX attribute\n");
goto error_no_wimax_dev;
}
ifindex = nla_get_u32(info->attrs[WIMAX_GNL_MSG_IFIDX]);
diff --git a/net/wimax/op-reset.c b/net/wimax/op-reset.c
index eb4580784d9d..a42079165e1f 100644
--- a/net/wimax/op-reset.c
+++ b/net/wimax/op-reset.c
@@ -107,8 +107,7 @@ int wimax_gnl_doit_reset(struct sk_buff *skb, struct genl_info *info)
d_fnstart(3, NULL, "(skb %p info %p)\n", skb, info);
result = -ENODEV;
if (info->attrs[WIMAX_GNL_RESET_IFIDX] == NULL) {
- printk(KERN_ERR "WIMAX_GNL_OP_RFKILL: can't find IFIDX "
- "attribute\n");
+ pr_err("WIMAX_GNL_OP_RFKILL: can't find IFIDX attribute\n");
goto error_no_wimax_dev;
}
ifindex = nla_get_u32(info->attrs[WIMAX_GNL_RESET_IFIDX]);
diff --git a/net/wimax/op-rfkill.c b/net/wimax/op-rfkill.c
index 403078d670a9..7d730543f243 100644
--- a/net/wimax/op-rfkill.c
+++ b/net/wimax/op-rfkill.c
@@ -421,8 +421,7 @@ int wimax_gnl_doit_rfkill(struct sk_buff *skb, struct genl_info *info)
d_fnstart(3, NULL, "(skb %p info %p)\n", skb, info);
result = -ENODEV;
if (info->attrs[WIMAX_GNL_RFKILL_IFIDX] == NULL) {
- printk(KERN_ERR "WIMAX_GNL_OP_RFKILL: can't find IFIDX "
- "attribute\n");
+ pr_err("WIMAX_GNL_OP_RFKILL: can't find IFIDX attribute\n");
goto error_no_wimax_dev;
}
ifindex = nla_get_u32(info->attrs[WIMAX_GNL_RFKILL_IFIDX]);
diff --git a/net/wimax/op-state-get.c b/net/wimax/op-state-get.c
index 995c08c827b5..e6788d281d0e 100644
--- a/net/wimax/op-state-get.c
+++ b/net/wimax/op-state-get.c
@@ -49,8 +49,7 @@ int wimax_gnl_doit_state_get(struct sk_buff *skb, struct genl_info *info)
d_fnstart(3, NULL, "(skb %p info %p)\n", skb, info);
result = -ENODEV;
if (info->attrs[WIMAX_GNL_STGET_IFIDX] == NULL) {
- printk(KERN_ERR "WIMAX_GNL_OP_STATE_GET: can't find IFIDX "
- "attribute\n");
+ pr_err("WIMAX_GNL_OP_STATE_GET: can't find IFIDX attribute\n");
goto error_no_wimax_dev;
}
ifindex = nla_get_u32(info->attrs[WIMAX_GNL_STGET_IFIDX]);
diff --git a/net/wimax/stack.c b/net/wimax/stack.c
index ec8b577db135..3f816e2971ee 100644
--- a/net/wimax/stack.c
+++ b/net/wimax/stack.c
@@ -191,8 +191,8 @@ void __check_new_state(enum wimax_st old_state, enum wimax_st new_state,
unsigned int allowed_states_bm)
{
if (WARN_ON(((1 << new_state) & allowed_states_bm) == 0)) {
- printk(KERN_ERR "SW BUG! Forbidden state change %u -> %u\n",
- old_state, new_state);
+ pr_err("SW BUG! Forbidden state change %u -> %u\n",
+ old_state, new_state);
}
}
@@ -602,8 +602,7 @@ int __init wimax_subsys_init(void)
wimax_gnl_ops,
wimax_gnl_mcgrps);
if (unlikely(result < 0)) {
- printk(KERN_ERR "cannot register generic netlink family: %d\n",
- result);
+ pr_err("cannot register generic netlink family: %d\n", result);
goto error_register_family;
}
diff --git a/net/wimax/wimax-internal.h b/net/wimax/wimax-internal.h
index b445b82020a8..733c4bf8d4b3 100644
--- a/net/wimax/wimax-internal.h
+++ b/net/wimax/wimax-internal.h
@@ -30,6 +30,12 @@
#define __WIMAX_INTERNAL_H__
#ifdef __KERNEL__
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/device.h>
#include <net/wimax.h>
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 405f3c4cf70c..29c8675f9a11 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -162,6 +162,12 @@ config CFG80211_INTERNAL_REGDB
and includes code to query that database. This is an alternative
to using CRDA for defining regulatory rules for the kernel.
+ Using this option requires some parsing of the db.txt at build time,
+ the parser will be upkept with the latest wireless-regdb updates but
+ older wireless-regdb formats will be ignored. The parser may later
+ be replaced to avoid issues with conflicts on versions of
+ wireless-regdb.
+
For details see:
http://wireless.kernel.org/en/developers/Regulatory
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 992b34070bcb..72d81e2154d5 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -4,6 +4,7 @@
* any point in time.
*
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
*/
#include <linux/export.h>
diff --git a/net/wireless/core.c b/net/wireless/core.c
index a1c40654dd9b..f52a4cd7017c 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -2,6 +2,7 @@
* This is the linux wireless configuration interface.
*
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -25,7 +26,6 @@
#include "sysfs.h"
#include "debugfs.h"
#include "wext-compat.h"
-#include "ethtool.h"
#include "rdev-ops.h"
/* name for sysfs, %d is appended */
@@ -493,12 +493,6 @@ int wiphy_register(struct wiphy *wiphy)
int i;
u16 ifmodes = wiphy->interface_modes;
- /*
- * There are major locking problems in nl80211/mac80211 for CSA,
- * disable for all drivers until this has been reworked.
- */
- wiphy->flags &= ~WIPHY_FLAG_HAS_CHANNEL_SWITCH;
-
#ifdef CONFIG_PM
if (WARN_ON(wiphy->wowlan &&
(wiphy->wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) &&
@@ -636,6 +630,9 @@ int wiphy_register(struct wiphy *wiphy)
if (IS_ERR(rdev->wiphy.debugfsdir))
rdev->wiphy.debugfsdir = NULL;
+ cfg80211_debugfs_rdev_add(rdev);
+ nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY);
+
if (wiphy->regulatory_flags & REGULATORY_CUSTOM_REG) {
struct regulatory_request request;
@@ -647,8 +644,6 @@ int wiphy_register(struct wiphy *wiphy)
nl80211_send_reg_change_event(&request);
}
- cfg80211_debugfs_rdev_add(rdev);
-
rdev->wiphy.registered = true;
rtnl_unlock();
@@ -660,8 +655,6 @@ int wiphy_register(struct wiphy *wiphy)
return res;
}
- nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY);
-
return 0;
}
EXPORT_SYMBOL(wiphy_register);
@@ -927,8 +920,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
/* allow mac80211 to determine the timeout */
wdev->ps_timeout = -1;
- netdev_set_default_ethtool_ops(dev, &cfg80211_ethtool_ops);
-
if ((wdev->iftype == NL80211_IFTYPE_STATION ||
wdev->iftype == NL80211_IFTYPE_P2P_CLIENT ||
wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr)
@@ -1015,7 +1006,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
rdev->devlist_generation++;
cfg80211_mlme_purge_registrations(wdev);
#ifdef CONFIG_CFG80211_WEXT
- kfree(wdev->wext.keys);
+ kzfree(wdev->wext.keys);
#endif
}
/*
diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c
index d4860bfc020e..e9e91298c70d 100644
--- a/net/wireless/ethtool.c
+++ b/net/wireless/ethtool.c
@@ -1,11 +1,9 @@
#include <linux/utsname.h>
#include <net/cfg80211.h>
#include "core.h"
-#include "ethtool.h"
#include "rdev-ops.h"
-static void cfg80211_get_drvinfo(struct net_device *dev,
- struct ethtool_drvinfo *info)
+void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
@@ -23,84 +21,4 @@ static void cfg80211_get_drvinfo(struct net_device *dev,
strlcpy(info->bus_info, dev_name(wiphy_dev(wdev->wiphy)),
sizeof(info->bus_info));
}
-
-static int cfg80211_get_regs_len(struct net_device *dev)
-{
- /* For now, return 0... */
- return 0;
-}
-
-static void cfg80211_get_regs(struct net_device *dev, struct ethtool_regs *regs,
- void *data)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
-
- regs->version = wdev->wiphy->hw_version;
- regs->len = 0;
-}
-
-static void cfg80211_get_ringparam(struct net_device *dev,
- struct ethtool_ringparam *rp)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
-
- memset(rp, 0, sizeof(*rp));
-
- if (rdev->ops->get_ringparam)
- rdev_get_ringparam(rdev, &rp->tx_pending, &rp->tx_max_pending,
- &rp->rx_pending, &rp->rx_max_pending);
-}
-
-static int cfg80211_set_ringparam(struct net_device *dev,
- struct ethtool_ringparam *rp)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
-
- if (rp->rx_mini_pending != 0 || rp->rx_jumbo_pending != 0)
- return -EINVAL;
-
- if (rdev->ops->set_ringparam)
- return rdev_set_ringparam(rdev, rp->tx_pending, rp->rx_pending);
-
- return -ENOTSUPP;
-}
-
-static int cfg80211_get_sset_count(struct net_device *dev, int sset)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
- if (rdev->ops->get_et_sset_count)
- return rdev_get_et_sset_count(rdev, dev, sset);
- return -EOPNOTSUPP;
-}
-
-static void cfg80211_get_stats(struct net_device *dev,
- struct ethtool_stats *stats, u64 *data)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
- if (rdev->ops->get_et_stats)
- rdev_get_et_stats(rdev, dev, stats, data);
-}
-
-static void cfg80211_get_strings(struct net_device *dev, u32 sset, u8 *data)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
- if (rdev->ops->get_et_strings)
- rdev_get_et_strings(rdev, dev, sset, data);
-}
-
-const struct ethtool_ops cfg80211_ethtool_ops = {
- .get_drvinfo = cfg80211_get_drvinfo,
- .get_regs_len = cfg80211_get_regs_len,
- .get_regs = cfg80211_get_regs,
- .get_link = ethtool_op_get_link,
- .get_ringparam = cfg80211_get_ringparam,
- .set_ringparam = cfg80211_set_ringparam,
- .get_strings = cfg80211_get_strings,
- .get_ethtool_stats = cfg80211_get_stats,
- .get_sset_count = cfg80211_get_sset_count,
-};
+EXPORT_SYMBOL(cfg80211_get_drvinfo);
diff --git a/net/wireless/ethtool.h b/net/wireless/ethtool.h
deleted file mode 100644
index 695ecad20bd6..000000000000
--- a/net/wireless/ethtool.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __CFG80211_ETHTOOL__
-#define __CFG80211_ETHTOOL__
-
-extern const struct ethtool_ops cfg80211_ethtool_ops;
-
-#endif /* __CFG80211_ETHTOOL__ */
diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk
index 40c37fc5b67c..baf2426b555a 100644
--- a/net/wireless/genregdb.awk
+++ b/net/wireless/genregdb.awk
@@ -51,32 +51,41 @@ function parse_country_head() {
function parse_reg_rule()
{
+ flag_starts_at = 7
+
start = $1
sub(/\(/, "", start)
end = $3
bw = $5
sub(/\),/, "", bw)
- gain = $6
- sub(/\(/, "", gain)
- sub(/,/, "", gain)
- power = $7
- sub(/\)/, "", power)
- sub(/,/, "", power)
+ gain = 0
+ power = $6
# power might be in mW...
- units = $8
+ units = $7
+ dfs_cac = 0
+
+ sub(/\(/, "", power)
+ sub(/\),/, "", power)
+ sub(/\),/, "", units)
sub(/\)/, "", units)
- sub(/,/, "", units)
- dfs_cac = $9
+
if (units == "mW") {
+ flag_starts_at = 8
power = 10 * log(power)/log(10)
+ if ($8 ~ /[[:digit:]]/) {
+ flag_starts_at = 9
+ dfs_cac = $8
+ }
} else {
- dfs_cac = $8
+ if ($7 ~ /[[:digit:]]/) {
+ flag_starts_at = 8
+ dfs_cac = $7
+ }
}
- sub(/,/, "", dfs_cac)
sub(/\(/, "", dfs_cac)
- sub(/\)/, "", dfs_cac)
+ sub(/\),/, "", dfs_cac)
flagstr = ""
- for (i=8; i<=NF; i++)
+ for (i=flag_starts_at; i<=NF; i++)
flagstr = flagstr $i
split(flagstr, flagarray, ",")
flags = ""
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 8f345da3ea5f..e24fc585c883 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -115,7 +115,7 @@ static int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
}
if (WARN_ON(wdev->connect_keys))
- kfree(wdev->connect_keys);
+ kzfree(wdev->connect_keys);
wdev->connect_keys = connkeys;
wdev->ibss_fixed = params->channel_fixed;
@@ -161,7 +161,7 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext)
ASSERT_WDEV_LOCK(wdev);
- kfree(wdev->connect_keys);
+ kzfree(wdev->connect_keys);
wdev->connect_keys = NULL;
rdev_set_qos_map(rdev, dev, NULL);
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 266766b8d80b..2c52b59e43f3 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -19,7 +19,7 @@
void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss,
- const u8 *buf, size_t len)
+ const u8 *buf, size_t len, int uapsd_queues)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
@@ -43,7 +43,7 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss,
return;
}
- nl80211_send_rx_assoc(rdev, dev, buf, len, GFP_KERNEL);
+ nl80211_send_rx_assoc(rdev, dev, buf, len, GFP_KERNEL, uapsd_queues);
/* update current_bss etc., consumes the bss reference */
__cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, ie, len - ieoffs,
status_code,
@@ -605,7 +605,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
}
bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm,
- const u8 *buf, size_t len, u32 flags, gfp_t gfp)
+ const u8 *buf, size_t len, u32 flags)
{
struct wiphy *wiphy = wdev->wiphy;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
@@ -648,7 +648,7 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm,
/* Indicate the received Action frame to user space */
if (nl80211_send_mgmt(rdev, wdev, reg->nlportid,
freq, sig_mbm,
- buf, len, flags, gfp))
+ buf, len, flags, GFP_ATOMIC))
continue;
result = true;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 6668daf69326..cb9f5a44ffad 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2,6 +2,7 @@
* This is the new netlink-based wireless configuration interface.
*
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
*/
#include <linux/if.h>
@@ -225,6 +226,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
[NL80211_ATTR_WIPHY_FRAG_THRESHOLD] = { .type = NLA_U32 },
[NL80211_ATTR_WIPHY_RTS_THRESHOLD] = { .type = NLA_U32 },
[NL80211_ATTR_WIPHY_COVERAGE_CLASS] = { .type = NLA_U8 },
+ [NL80211_ATTR_WIPHY_DYN_ACK] = { .type = NLA_FLAG },
[NL80211_ATTR_IFTYPE] = { .type = NLA_U32 },
[NL80211_ATTR_IFINDEX] = { .type = NLA_U32 },
@@ -337,6 +339,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
[NL80211_ATTR_TDLS_OPERATION] = { .type = NLA_U8 },
[NL80211_ATTR_TDLS_SUPPORT] = { .type = NLA_FLAG },
[NL80211_ATTR_TDLS_EXTERNAL_SETUP] = { .type = NLA_FLAG },
+ [NL80211_ATTR_TDLS_INITIATOR] = { .type = NLA_FLAG },
[NL80211_ATTR_DONT_WAIT_FOR_ACK] = { .type = NLA_FLAG },
[NL80211_ATTR_PROBE_RESP] = { .type = NLA_BINARY,
.len = IEEE80211_MAX_DATA_LEN },
@@ -387,6 +390,11 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
[NL80211_ATTR_TDLS_PEER_CAPABILITY] = { .type = NLA_U32 },
[NL80211_ATTR_IFACE_SOCKET_OWNER] = { .type = NLA_FLAG },
[NL80211_ATTR_CSA_C_OFFSETS_TX] = { .type = NLA_BINARY },
+ [NL80211_ATTR_USE_RRM] = { .type = NLA_FLAG },
+ [NL80211_ATTR_TSID] = { .type = NLA_U8 },
+ [NL80211_ATTR_USER_PRIO] = { .type = NLA_U8 },
+ [NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 },
+ [NL80211_ATTR_SMPS_MODE] = { .type = NLA_U8 },
};
/* policy for the key attributes */
@@ -1506,6 +1514,9 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
if (rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)
CMD(channel_switch, CHANNEL_SWITCH);
CMD(set_qos_map, SET_QOS_MAP);
+ if (rdev->wiphy.flags &
+ WIPHY_FLAG_SUPPORTS_WMM_ADMISSION)
+ CMD(add_tx_ts, ADD_TX_TS);
}
/* add into the if now */
#undef CMD
@@ -2236,11 +2247,21 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
}
if (info->attrs[NL80211_ATTR_WIPHY_COVERAGE_CLASS]) {
+ if (info->attrs[NL80211_ATTR_WIPHY_DYN_ACK])
+ return -EINVAL;
+
coverage_class = nla_get_u8(
info->attrs[NL80211_ATTR_WIPHY_COVERAGE_CLASS]);
changed |= WIPHY_PARAM_COVERAGE_CLASS;
}
+ if (info->attrs[NL80211_ATTR_WIPHY_DYN_ACK]) {
+ if (!(rdev->wiphy.features & NL80211_FEATURE_ACKTO_ESTIMATION))
+ return -EOPNOTSUPP;
+
+ changed |= WIPHY_PARAM_DYN_ACK;
+ }
+
if (changed) {
u8 old_retry_short, old_retry_long;
u32 old_frag_threshold, old_rts_threshold;
@@ -3325,6 +3346,29 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
return PTR_ERR(params.acl);
}
+ if (info->attrs[NL80211_ATTR_SMPS_MODE]) {
+ params.smps_mode =
+ nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]);
+ switch (params.smps_mode) {
+ case NL80211_SMPS_OFF:
+ break;
+ case NL80211_SMPS_STATIC:
+ if (!(rdev->wiphy.features &
+ NL80211_FEATURE_STATIC_SMPS))
+ return -EINVAL;
+ break;
+ case NL80211_SMPS_DYNAMIC:
+ if (!(rdev->wiphy.features &
+ NL80211_FEATURE_DYNAMIC_SMPS))
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+ } else {
+ params.smps_mode = NL80211_SMPS_OFF;
+ }
+
wdev_lock(wdev);
err = rdev_start_ap(rdev, dev, &params);
if (!err) {
@@ -3813,7 +3857,8 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
{
if (params->listen_interval != -1)
return -EINVAL;
- if (params->aid)
+ if (params->aid &&
+ !(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)))
return -EINVAL;
/* When you run into this, adjust the code below for the new flag */
@@ -6011,17 +6056,6 @@ skip_beacons:
params.radar_required = true;
}
- /* TODO: I left this here for now. With channel switch, the
- * verification is a bit more complicated, because we only do
- * it later when the channel switch really happens.
- */
- err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
- params.chandef.chan,
- CHAN_MODE_SHARED,
- radar_detect_width);
- if (err)
- return err;
-
if (info->attrs[NL80211_ATTR_CH_SWITCH_BLOCK_TX])
params.block_tx = true;
@@ -6042,7 +6076,6 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
const struct cfg80211_bss_ies *ies;
void *hdr;
struct nlattr *bss;
- bool tsf = false;
ASSERT_WDEV_LOCK(wdev);
@@ -6069,18 +6102,27 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
goto nla_put_failure;
rcu_read_lock();
+ /* indicate whether we have probe response data or not */
+ if (rcu_access_pointer(res->proberesp_ies) &&
+ nla_put_flag(msg, NL80211_BSS_PRESP_DATA))
+ goto fail_unlock_rcu;
+
+ /* this pointer prefers to be pointed to probe response data
+ * but is always valid
+ */
ies = rcu_dereference(res->ies);
if (ies) {
if (nla_put_u64(msg, NL80211_BSS_TSF, ies->tsf))
goto fail_unlock_rcu;
- tsf = true;
if (ies->len && nla_put(msg, NL80211_BSS_INFORMATION_ELEMENTS,
ies->len, ies->data))
goto fail_unlock_rcu;
}
+
+ /* and this pointer is always (unless driver didn't know) beacon data */
ies = rcu_dereference(res->beacon_ies);
- if (ies) {
- if (!tsf && nla_put_u64(msg, NL80211_BSS_TSF, ies->tsf))
+ if (ies && ies->from_beacon) {
+ if (nla_put_u64(msg, NL80211_BSS_BEACON_TSF, ies->tsf))
goto fail_unlock_rcu;
if (ies->len && nla_put(msg, NL80211_BSS_BEACON_IES,
ies->len, ies->data))
@@ -6584,6 +6626,14 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
sizeof(req.vht_capa));
}
+ if (nla_get_flag(info->attrs[NL80211_ATTR_USE_RRM])) {
+ if (!(rdev->wiphy.features &
+ NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES) ||
+ !(rdev->wiphy.features & NL80211_FEATURE_QUIET))
+ return -EINVAL;
+ req.flags |= ASSOC_REQ_USE_RRM;
+ }
+
err = nl80211_crypto_settings(rdev, info, &req.crypto, 1);
if (!err) {
wdev_lock(dev->ieee80211_ptr);
@@ -6846,7 +6896,7 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
err = cfg80211_join_ibss(rdev, dev, &ibss, connkeys);
if (err)
- kfree(connkeys);
+ kzfree(connkeys);
return err;
}
@@ -6978,6 +7028,9 @@ void __cfg80211_send_event_skb(struct sk_buff *skb, gfp_t gfp)
struct nlattr *data = ((void **)skb->cb)[2];
enum nl80211_multicast_groups mcgrp = NL80211_MCGRP_TESTMODE;
+ /* clear CB data for netlink core to own from now on */
+ memset(skb->cb, 0, sizeof(skb->cb));
+
nla_nest_end(skb, data);
genlmsg_end(skb, hdr);
@@ -7215,7 +7268,7 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) {
if (!info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]) {
- kfree(connkeys);
+ kzfree(connkeys);
return -EINVAL;
}
memcpy(&connect.ht_capa,
@@ -7233,7 +7286,7 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) {
if (!info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) {
- kfree(connkeys);
+ kzfree(connkeys);
return -EINVAL;
}
memcpy(&connect.vht_capa,
@@ -7241,11 +7294,19 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
sizeof(connect.vht_capa));
}
+ if (nla_get_flag(info->attrs[NL80211_ATTR_USE_RRM])) {
+ if (!(rdev->wiphy.features &
+ NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES) ||
+ !(rdev->wiphy.features & NL80211_FEATURE_QUIET))
+ return -EINVAL;
+ connect.flags |= ASSOC_REQ_USE_RRM;
+ }
+
wdev_lock(dev->ieee80211_ptr);
err = cfg80211_connect(rdev, dev, &connect, connkeys, NULL);
wdev_unlock(dev->ieee80211_ptr);
if (err)
- kfree(connkeys);
+ kzfree(connkeys);
return err;
}
@@ -7364,6 +7425,7 @@ static int nl80211_tdls_mgmt(struct sk_buff *skb, struct genl_info *info)
u32 peer_capability = 0;
u16 status_code;
u8 *peer;
+ bool initiator;
if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) ||
!rdev->ops->tdls_mgmt)
@@ -7380,12 +7442,14 @@ static int nl80211_tdls_mgmt(struct sk_buff *skb, struct genl_info *info)
action_code = nla_get_u8(info->attrs[NL80211_ATTR_TDLS_ACTION]);
status_code = nla_get_u16(info->attrs[NL80211_ATTR_STATUS_CODE]);
dialog_token = nla_get_u8(info->attrs[NL80211_ATTR_TDLS_DIALOG_TOKEN]);
+ initiator = nla_get_flag(info->attrs[NL80211_ATTR_TDLS_INITIATOR]);
if (info->attrs[NL80211_ATTR_TDLS_PEER_CAPABILITY])
peer_capability =
nla_get_u32(info->attrs[NL80211_ATTR_TDLS_PEER_CAPABILITY]);
return rdev_tdls_mgmt(rdev, dev, peer, action_code,
dialog_token, status_code, peer_capability,
+ initiator,
nla_data(info->attrs[NL80211_ATTR_IE]),
nla_len(info->attrs[NL80211_ATTR_IE]));
}
@@ -8928,13 +8992,9 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
if (nla_len(tb[NL80211_REKEY_DATA_KCK]) != NL80211_KCK_LEN)
return -ERANGE;
- memcpy(rekey_data.kek, nla_data(tb[NL80211_REKEY_DATA_KEK]),
- NL80211_KEK_LEN);
- memcpy(rekey_data.kck, nla_data(tb[NL80211_REKEY_DATA_KCK]),
- NL80211_KCK_LEN);
- memcpy(rekey_data.replay_ctr,
- nla_data(tb[NL80211_REKEY_DATA_REPLAY_CTR]),
- NL80211_REPLAY_CTR_LEN);
+ rekey_data.kek = nla_data(tb[NL80211_REKEY_DATA_KEK]);
+ rekey_data.kck = nla_data(tb[NL80211_REKEY_DATA_KCK]);
+ rekey_data.replay_ctr = nla_data(tb[NL80211_REKEY_DATA_REPLAY_CTR]);
wdev_lock(wdev);
if (!wdev->current_bss) {
@@ -9300,6 +9360,9 @@ int cfg80211_vendor_cmd_reply(struct sk_buff *skb)
void *hdr = ((void **)skb->cb)[1];
struct nlattr *data = ((void **)skb->cb)[2];
+ /* clear CB data for netlink core to own from now on */
+ memset(skb->cb, 0, sizeof(skb->cb));
+
if (WARN_ON(!rdev->cur_cmd_info)) {
kfree_skb(skb);
return -EINVAL;
@@ -9363,6 +9426,93 @@ static int nl80211_set_qos_map(struct sk_buff *skb,
return ret;
}
+static int nl80211_add_tx_ts(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ const u8 *peer;
+ u8 tsid, up;
+ u16 admitted_time = 0;
+ int err;
+
+ if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_WMM_ADMISSION))
+ return -EOPNOTSUPP;
+
+ if (!info->attrs[NL80211_ATTR_TSID] || !info->attrs[NL80211_ATTR_MAC] ||
+ !info->attrs[NL80211_ATTR_USER_PRIO])
+ return -EINVAL;
+
+ tsid = nla_get_u8(info->attrs[NL80211_ATTR_TSID]);
+ if (tsid >= IEEE80211_NUM_TIDS)
+ return -EINVAL;
+
+ up = nla_get_u8(info->attrs[NL80211_ATTR_USER_PRIO]);
+ if (up >= IEEE80211_NUM_UPS)
+ return -EINVAL;
+
+ /* WMM uses TIDs 0-7 even for TSPEC */
+ if (tsid < IEEE80211_FIRST_TSPEC_TSID) {
+ if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_WMM_ADMISSION))
+ return -EINVAL;
+ } else {
+ /* TODO: handle 802.11 TSPEC/admission control
+ * need more attributes for that (e.g. BA session requirement)
+ */
+ return -EINVAL;
+ }
+
+ peer = nla_data(info->attrs[NL80211_ATTR_MAC]);
+
+ if (info->attrs[NL80211_ATTR_ADMITTED_TIME]) {
+ admitted_time =
+ nla_get_u16(info->attrs[NL80211_ATTR_ADMITTED_TIME]);
+ if (!admitted_time)
+ return -EINVAL;
+ }
+
+ wdev_lock(wdev);
+ switch (wdev->iftype) {
+ case NL80211_IFTYPE_STATION:
+ case NL80211_IFTYPE_P2P_CLIENT:
+ if (wdev->current_bss)
+ break;
+ err = -ENOTCONN;
+ goto out;
+ default:
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ err = rdev_add_tx_ts(rdev, dev, tsid, peer, up, admitted_time);
+
+ out:
+ wdev_unlock(wdev);
+ return err;
+}
+
+static int nl80211_del_tx_ts(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ const u8 *peer;
+ u8 tsid;
+ int err;
+
+ if (!info->attrs[NL80211_ATTR_TSID] || !info->attrs[NL80211_ATTR_MAC])
+ return -EINVAL;
+
+ tsid = nla_get_u8(info->attrs[NL80211_ATTR_TSID]);
+ peer = nla_data(info->attrs[NL80211_ATTR_MAC]);
+
+ wdev_lock(wdev);
+ err = rdev_del_tx_ts(rdev, dev, tsid, peer);
+ wdev_unlock(wdev);
+
+ return err;
+}
+
#define NL80211_FLAG_NEED_WIPHY 0x01
#define NL80211_FLAG_NEED_NETDEV 0x02
#define NL80211_FLAG_NEED_RTNL 0x04
@@ -9373,6 +9523,7 @@ static int nl80211_set_qos_map(struct sk_buff *skb,
/* If a netdev is associated, it must be UP, P2P must be started */
#define NL80211_FLAG_NEED_WDEV_UP (NL80211_FLAG_NEED_WDEV |\
NL80211_FLAG_CHECK_NETDEV_UP)
+#define NL80211_FLAG_CLEAR_SKB 0x20
static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
struct genl_info *info)
@@ -9456,8 +9607,20 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
dev_put(info->user_ptr[1]);
}
}
+
if (ops->internal_flags & NL80211_FLAG_NEED_RTNL)
rtnl_unlock();
+
+ /* If needed, clear the netlink message payload from the SKB
+ * as it might contain key data that shouldn't stick around on
+ * the heap after the SKB is freed. The netlink message header
+ * is still needed for further processing, so leave it intact.
+ */
+ if (ops->internal_flags & NL80211_FLAG_CLEAR_SKB) {
+ struct nlmsghdr *nlh = nlmsg_hdr(skb);
+
+ memset(nlmsg_data(nlh), 0, nlmsg_len(nlh));
+ }
}
static const struct genl_ops nl80211_ops[] = {
@@ -9525,7 +9688,8 @@ static const struct genl_ops nl80211_ops[] = {
.policy = nl80211_policy,
.flags = GENL_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ NL80211_FLAG_NEED_RTNL |
+ NL80211_FLAG_CLEAR_SKB,
},
{
.cmd = NL80211_CMD_NEW_KEY,
@@ -9533,7 +9697,8 @@ static const struct genl_ops nl80211_ops[] = {
.policy = nl80211_policy,
.flags = GENL_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ NL80211_FLAG_NEED_RTNL |
+ NL80211_FLAG_CLEAR_SKB,
},
{
.cmd = NL80211_CMD_DEL_KEY,
@@ -9711,7 +9876,8 @@ static const struct genl_ops nl80211_ops[] = {
.policy = nl80211_policy,
.flags = GENL_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ NL80211_FLAG_NEED_RTNL |
+ NL80211_FLAG_CLEAR_SKB,
},
{
.cmd = NL80211_CMD_ASSOCIATE,
@@ -9945,7 +10111,8 @@ static const struct genl_ops nl80211_ops[] = {
.policy = nl80211_policy,
.flags = GENL_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ NL80211_FLAG_NEED_RTNL |
+ NL80211_FLAG_CLEAR_SKB,
},
{
.cmd = NL80211_CMD_TDLS_MGMT,
@@ -10103,6 +10270,22 @@ static const struct genl_ops nl80211_ops[] = {
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
+ {
+ .cmd = NL80211_CMD_ADD_TX_TS,
+ .doit = nl80211_add_tx_ts,
+ .policy = nl80211_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL80211_CMD_DEL_TX_TS,
+ .doit = nl80211_del_tx_ts,
+ .policy = nl80211_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_NEED_RTNL,
+ },
};
/* notification functions */
@@ -10371,7 +10554,8 @@ nla_put_failure:
static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev,
struct net_device *netdev,
const u8 *buf, size_t len,
- enum nl80211_commands cmd, gfp_t gfp)
+ enum nl80211_commands cmd, gfp_t gfp,
+ int uapsd_queues)
{
struct sk_buff *msg;
void *hdr;
@@ -10391,6 +10575,19 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev,
nla_put(msg, NL80211_ATTR_FRAME, len, buf))
goto nla_put_failure;
+ if (uapsd_queues >= 0) {
+ struct nlattr *nla_wmm =
+ nla_nest_start(msg, NL80211_ATTR_STA_WME);
+ if (!nla_wmm)
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, NL80211_STA_WME_UAPSD_QUEUES,
+ uapsd_queues))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, nla_wmm);
+ }
+
genlmsg_end(msg, hdr);
genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
@@ -10407,15 +10604,15 @@ void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev,
size_t len, gfp_t gfp)
{
nl80211_send_mlme_event(rdev, netdev, buf, len,
- NL80211_CMD_AUTHENTICATE, gfp);
+ NL80211_CMD_AUTHENTICATE, gfp, -1);
}
void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev,
struct net_device *netdev, const u8 *buf,
- size_t len, gfp_t gfp)
+ size_t len, gfp_t gfp, int uapsd_queues)
{
nl80211_send_mlme_event(rdev, netdev, buf, len,
- NL80211_CMD_ASSOCIATE, gfp);
+ NL80211_CMD_ASSOCIATE, gfp, uapsd_queues);
}
void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
@@ -10423,7 +10620,7 @@ void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
size_t len, gfp_t gfp)
{
nl80211_send_mlme_event(rdev, netdev, buf, len,
- NL80211_CMD_DEAUTHENTICATE, gfp);
+ NL80211_CMD_DEAUTHENTICATE, gfp, -1);
}
void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
@@ -10431,7 +10628,7 @@ void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
size_t len, gfp_t gfp)
{
nl80211_send_mlme_event(rdev, netdev, buf, len,
- NL80211_CMD_DISASSOCIATE, gfp);
+ NL80211_CMD_DISASSOCIATE, gfp, -1);
}
void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf,
@@ -10452,7 +10649,7 @@ void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf,
cmd = NL80211_CMD_UNPROT_DISASSOCIATE;
trace_cfg80211_rx_unprot_mlme_mgmt(dev, buf, len);
- nl80211_send_mlme_event(rdev, dev, buf, len, cmd, GFP_ATOMIC);
+ nl80211_send_mlme_event(rdev, dev, buf, len, cmd, GFP_ATOMIC, -1);
}
EXPORT_SYMBOL(cfg80211_rx_unprot_mlme_mgmt);
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 49c9a482dd12..7ad70d6f0cc6 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -23,7 +23,8 @@ void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev,
const u8 *buf, size_t len, gfp_t gfp);
void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev,
struct net_device *netdev,
- const u8 *buf, size_t len, gfp_t gfp);
+ const u8 *buf, size_t len, gfp_t gfp,
+ int uapsd_queues);
void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
struct net_device *netdev,
const u8 *buf, size_t len, gfp_t gfp);
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index d95bbe348138..f6d457d6a558 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -714,25 +714,6 @@ static inline int rdev_get_antenna(struct cfg80211_registered_device *rdev,
return ret;
}
-static inline int rdev_set_ringparam(struct cfg80211_registered_device *rdev,
- u32 tx, u32 rx)
-{
- int ret;
- trace_rdev_set_ringparam(&rdev->wiphy, tx, rx);
- ret = rdev->ops->set_ringparam(&rdev->wiphy, tx, rx);
- trace_rdev_return_int(&rdev->wiphy, ret);
- return ret;
-}
-
-static inline void rdev_get_ringparam(struct cfg80211_registered_device *rdev,
- u32 *tx, u32 *tx_max, u32 *rx,
- u32 *rx_max)
-{
- trace_rdev_get_ringparam(&rdev->wiphy);
- rdev->ops->get_ringparam(&rdev->wiphy, tx, tx_max, rx, rx_max);
- trace_rdev_return_void_tx_rx(&rdev->wiphy, *tx, *tx_max, *rx, *rx_max);
-}
-
static inline int
rdev_sched_scan_start(struct cfg80211_registered_device *rdev,
struct net_device *dev,
@@ -770,15 +751,15 @@ static inline int rdev_tdls_mgmt(struct cfg80211_registered_device *rdev,
struct net_device *dev, u8 *peer,
u8 action_code, u8 dialog_token,
u16 status_code, u32 peer_capability,
- const u8 *buf, size_t len)
+ bool initiator, const u8 *buf, size_t len)
{
int ret;
trace_rdev_tdls_mgmt(&rdev->wiphy, dev, peer, action_code,
dialog_token, status_code, peer_capability,
- buf, len);
+ initiator, buf, len);
ret = rdev->ops->tdls_mgmt(&rdev->wiphy, dev, peer, action_code,
dialog_token, status_code, peer_capability,
- buf, len);
+ initiator, buf, len);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
@@ -816,35 +797,6 @@ static inline int rdev_set_noack_map(struct cfg80211_registered_device *rdev,
}
static inline int
-rdev_get_et_sset_count(struct cfg80211_registered_device *rdev,
- struct net_device *dev, int sset)
-{
- int ret;
- trace_rdev_get_et_sset_count(&rdev->wiphy, dev, sset);
- ret = rdev->ops->get_et_sset_count(&rdev->wiphy, dev, sset);
- trace_rdev_return_int(&rdev->wiphy, ret);
- return ret;
-}
-
-static inline void rdev_get_et_stats(struct cfg80211_registered_device *rdev,
- struct net_device *dev,
- struct ethtool_stats *stats, u64 *data)
-{
- trace_rdev_get_et_stats(&rdev->wiphy, dev);
- rdev->ops->get_et_stats(&rdev->wiphy, dev, stats, data);
- trace_rdev_return_void(&rdev->wiphy);
-}
-
-static inline void rdev_get_et_strings(struct cfg80211_registered_device *rdev,
- struct net_device *dev, u32 sset,
- u8 *data)
-{
- trace_rdev_get_et_strings(&rdev->wiphy, dev, sset);
- rdev->ops->get_et_strings(&rdev->wiphy, dev, sset, data);
- trace_rdev_return_void(&rdev->wiphy);
-}
-
-static inline int
rdev_get_channel(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev,
struct cfg80211_chan_def *chandef)
@@ -963,4 +915,35 @@ rdev_set_ap_chanwidth(struct cfg80211_registered_device *rdev,
return ret;
}
+static inline int
+rdev_add_tx_ts(struct cfg80211_registered_device *rdev,
+ struct net_device *dev, u8 tsid, const u8 *peer,
+ u8 user_prio, u16 admitted_time)
+{
+ int ret = -EOPNOTSUPP;
+
+ trace_rdev_add_tx_ts(&rdev->wiphy, dev, tsid, peer,
+ user_prio, admitted_time);
+ if (rdev->ops->add_tx_ts)
+ ret = rdev->ops->add_tx_ts(&rdev->wiphy, dev, tsid, peer,
+ user_prio, admitted_time);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+
+ return ret;
+}
+
+static inline int
+rdev_del_tx_ts(struct cfg80211_registered_device *rdev,
+ struct net_device *dev, u8 tsid, const u8 *peer)
+{
+ int ret = -EOPNOTSUPP;
+
+ trace_rdev_del_tx_ts(&rdev->wiphy, dev, tsid, peer);
+ if (rdev->ops->del_tx_ts)
+ ret = rdev->ops->del_tx_ts(&rdev->wiphy, dev, tsid, peer);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+
+ return ret;
+}
+
#endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 1afdf45db38f..b725a31a4751 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -3,6 +3,7 @@
* Copyright 2005-2006, Devicescape Software, Inc.
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2008-2011 Luis R. Rodriguez <mcgrof@qca.qualcomm.com>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -798,6 +799,57 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1,
return 0;
}
+/* check whether old rule contains new rule */
+static bool rule_contains(struct ieee80211_reg_rule *r1,
+ struct ieee80211_reg_rule *r2)
+{
+ /* for simplicity, currently consider only same flags */
+ if (r1->flags != r2->flags)
+ return false;
+
+ /* verify r1 is more restrictive */
+ if ((r1->power_rule.max_antenna_gain >
+ r2->power_rule.max_antenna_gain) ||
+ r1->power_rule.max_eirp > r2->power_rule.max_eirp)
+ return false;
+
+ /* make sure r2's range is contained within r1 */
+ if (r1->freq_range.start_freq_khz > r2->freq_range.start_freq_khz ||
+ r1->freq_range.end_freq_khz < r2->freq_range.end_freq_khz)
+ return false;
+
+ /* and finally verify that r1.max_bw >= r2.max_bw */
+ if (r1->freq_range.max_bandwidth_khz <
+ r2->freq_range.max_bandwidth_khz)
+ return false;
+
+ return true;
+}
+
+/* add or extend current rules. do nothing if rule is already contained */
+static void add_rule(struct ieee80211_reg_rule *rule,
+ struct ieee80211_reg_rule *reg_rules, u32 *n_rules)
+{
+ struct ieee80211_reg_rule *tmp_rule;
+ int i;
+
+ for (i = 0; i < *n_rules; i++) {
+ tmp_rule = &reg_rules[i];
+ /* rule is already contained - do nothing */
+ if (rule_contains(tmp_rule, rule))
+ return;
+
+ /* extend rule if possible */
+ if (rule_contains(rule, tmp_rule)) {
+ memcpy(tmp_rule, rule, sizeof(*rule));
+ return;
+ }
+ }
+
+ memcpy(&reg_rules[*n_rules], rule, sizeof(*rule));
+ (*n_rules)++;
+}
+
/**
* regdom_intersect - do the intersection between two regulatory domains
* @rd1: first regulatory domain
@@ -817,12 +869,10 @@ regdom_intersect(const struct ieee80211_regdomain *rd1,
{
int r, size_of_regd;
unsigned int x, y;
- unsigned int num_rules = 0, rule_idx = 0;
+ unsigned int num_rules = 0;
const struct ieee80211_reg_rule *rule1, *rule2;
- struct ieee80211_reg_rule *intersected_rule;
+ struct ieee80211_reg_rule intersected_rule;
struct ieee80211_regdomain *rd;
- /* This is just a dummy holder to help us count */
- struct ieee80211_reg_rule dummy_rule;
if (!rd1 || !rd2)
return NULL;
@@ -840,7 +890,7 @@ regdom_intersect(const struct ieee80211_regdomain *rd1,
for (y = 0; y < rd2->n_reg_rules; y++) {
rule2 = &rd2->reg_rules[y];
if (!reg_rules_intersect(rd1, rd2, rule1, rule2,
- &dummy_rule))
+ &intersected_rule))
num_rules++;
}
}
@@ -855,34 +905,24 @@ regdom_intersect(const struct ieee80211_regdomain *rd1,
if (!rd)
return NULL;
- for (x = 0; x < rd1->n_reg_rules && rule_idx < num_rules; x++) {
+ for (x = 0; x < rd1->n_reg_rules; x++) {
rule1 = &rd1->reg_rules[x];
- for (y = 0; y < rd2->n_reg_rules && rule_idx < num_rules; y++) {
+ for (y = 0; y < rd2->n_reg_rules; y++) {
rule2 = &rd2->reg_rules[y];
- /*
- * This time around instead of using the stack lets
- * write to the target rule directly saving ourselves
- * a memcpy()
- */
- intersected_rule = &rd->reg_rules[rule_idx];
r = reg_rules_intersect(rd1, rd2, rule1, rule2,
- intersected_rule);
+ &intersected_rule);
/*
* No need to memset here the intersected rule here as
* we're not using the stack anymore
*/
if (r)
continue;
- rule_idx++;
- }
- }
- if (rule_idx != num_rules) {
- kfree(rd);
- return NULL;
+ add_rule(&intersected_rule, rd->reg_rules,
+ &rd->n_reg_rules);
+ }
}
- rd->n_reg_rules = num_rules;
rd->alpha2[0] = '9';
rd->alpha2[1] = '8';
rd->dfs_region = reg_intersect_dfs_region(rd1->dfs_region,
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 0798c62e6085..bda39f149810 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -2,6 +2,7 @@
* cfg80211 scan result handling
*
* Copyright 2008 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
*/
#include <linux/kernel.h>
#include <linux/slab.h>
@@ -884,6 +885,7 @@ struct cfg80211_bss*
cfg80211_inform_bss_width(struct wiphy *wiphy,
struct ieee80211_channel *rx_channel,
enum nl80211_bss_scan_width scan_width,
+ enum cfg80211_bss_frame_type ftype,
const u8 *bssid, u64 tsf, u16 capability,
u16 beacon_interval, const u8 *ie, size_t ielen,
s32 signal, gfp_t gfp)
@@ -911,21 +913,32 @@ cfg80211_inform_bss_width(struct wiphy *wiphy,
tmp.pub.beacon_interval = beacon_interval;
tmp.pub.capability = capability;
/*
- * Since we do not know here whether the IEs are from a Beacon or Probe
+ * If we do not know here whether the IEs are from a Beacon or Probe
* Response frame, we need to pick one of the options and only use it
* with the driver that does not provide the full Beacon/Probe Response
* frame. Use Beacon frame pointer to avoid indicating that this should
* override the IEs pointer should we have received an earlier
* indication of Probe Response data.
*/
- ies = kmalloc(sizeof(*ies) + ielen, gfp);
+ ies = kzalloc(sizeof(*ies) + ielen, gfp);
if (!ies)
return NULL;
ies->len = ielen;
ies->tsf = tsf;
+ ies->from_beacon = false;
memcpy(ies->data, ie, ielen);
- rcu_assign_pointer(tmp.pub.beacon_ies, ies);
+ switch (ftype) {
+ case CFG80211_BSS_FTYPE_BEACON:
+ ies->from_beacon = true;
+ /* fall through to assign */
+ case CFG80211_BSS_FTYPE_UNKNOWN:
+ rcu_assign_pointer(tmp.pub.beacon_ies, ies);
+ break;
+ case CFG80211_BSS_FTYPE_PRESP:
+ rcu_assign_pointer(tmp.pub.proberesp_ies, ies);
+ break;
+ }
rcu_assign_pointer(tmp.pub.ies, ies);
signal_valid = abs(rx_channel->center_freq - channel->center_freq) <=
@@ -982,11 +995,12 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy,
if (!channel)
return NULL;
- ies = kmalloc(sizeof(*ies) + ielen, gfp);
+ ies = kzalloc(sizeof(*ies) + ielen, gfp);
if (!ies)
return NULL;
ies->len = ielen;
ies->tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp);
+ ies->from_beacon = ieee80211_is_beacon(mgmt->frame_control);
memcpy(ies->data, mgmt->u.probe_resp.variable, ielen);
if (ieee80211_is_probe_resp(mgmt->frame_control))
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 8bbeeb302216..dc1668ff543b 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -641,7 +641,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
}
if (status != WLAN_STATUS_SUCCESS) {
- kfree(wdev->connect_keys);
+ kzfree(wdev->connect_keys);
wdev->connect_keys = NULL;
wdev->ssid_len = 0;
if (bss) {
@@ -918,7 +918,7 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
ASSERT_WDEV_LOCK(wdev);
if (WARN_ON(wdev->connect_keys)) {
- kfree(wdev->connect_keys);
+ kzfree(wdev->connect_keys);
wdev->connect_keys = NULL;
}
@@ -978,7 +978,7 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
ASSERT_WDEV_LOCK(wdev);
- kfree(wdev->connect_keys);
+ kzfree(wdev->connect_keys);
wdev->connect_keys = NULL;
if (wdev->conn)
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 7cc887f9da11..625a6e6d1168 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -298,11 +298,6 @@ DEFINE_EVENT(wiphy_only_evt, rdev_return_void,
TP_ARGS(wiphy)
);
-DEFINE_EVENT(wiphy_only_evt, rdev_get_ringparam,
- TP_PROTO(struct wiphy *wiphy),
- TP_ARGS(wiphy)
-);
-
DEFINE_EVENT(wiphy_only_evt, rdev_get_antenna,
TP_PROTO(struct wiphy *wiphy),
TP_ARGS(wiphy)
@@ -580,11 +575,6 @@ DEFINE_EVENT(wiphy_netdev_evt, rdev_stop_ap,
TP_ARGS(wiphy, netdev)
);
-DEFINE_EVENT(wiphy_netdev_evt, rdev_get_et_stats,
- TP_PROTO(struct wiphy *wiphy, struct net_device *netdev),
- TP_ARGS(wiphy, netdev)
-);
-
DEFINE_EVENT(wiphy_netdev_evt, rdev_sched_scan_stop,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev),
TP_ARGS(wiphy, netdev)
@@ -1439,11 +1429,6 @@ DECLARE_EVENT_CLASS(tx_rx_evt,
WIPHY_PR_ARG, __entry->tx, __entry->rx)
);
-DEFINE_EVENT(tx_rx_evt, rdev_set_ringparam,
- TP_PROTO(struct wiphy *wiphy, u32 tx, u32 rx),
- TP_ARGS(wiphy, rx, tx)
-);
-
DEFINE_EVENT(tx_rx_evt, rdev_set_antenna,
TP_PROTO(struct wiphy *wiphy, u32 tx, u32 rx),
TP_ARGS(wiphy, rx, tx)
@@ -1469,9 +1454,9 @@ TRACE_EVENT(rdev_tdls_mgmt,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
u8 *peer, u8 action_code, u8 dialog_token,
u16 status_code, u32 peer_capability,
- const u8 *buf, size_t len),
+ bool initiator, const u8 *buf, size_t len),
TP_ARGS(wiphy, netdev, peer, action_code, dialog_token, status_code,
- peer_capability, buf, len),
+ peer_capability, initiator, buf, len),
TP_STRUCT__entry(
WIPHY_ENTRY
NETDEV_ENTRY
@@ -1480,6 +1465,7 @@ TRACE_EVENT(rdev_tdls_mgmt,
__field(u8, dialog_token)
__field(u16, status_code)
__field(u32, peer_capability)
+ __field(bool, initiator)
__dynamic_array(u8, buf, len)
),
TP_fast_assign(
@@ -1490,13 +1476,16 @@ TRACE_EVENT(rdev_tdls_mgmt,
__entry->dialog_token = dialog_token;
__entry->status_code = status_code;
__entry->peer_capability = peer_capability;
+ __entry->initiator = initiator;
memcpy(__get_dynamic_array(buf), buf, len);
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT ", action_code: %u, "
- "dialog_token: %u, status_code: %u, peer_capability: %u buf: %#.2x ",
+ "dialog_token: %u, status_code: %u, peer_capability: %u "
+ "initiator: %s buf: %#.2x ",
WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer),
__entry->action_code, __entry->dialog_token,
__entry->status_code, __entry->peer_capability,
+ BOOL_TO_STR(__entry->initiator),
((u8 *)__get_dynamic_array(buf))[0])
);
@@ -1725,40 +1714,6 @@ TRACE_EVENT(rdev_set_noack_map,
WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->noack_map)
);
-TRACE_EVENT(rdev_get_et_sset_count,
- TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int sset),
- TP_ARGS(wiphy, netdev, sset),
- TP_STRUCT__entry(
- WIPHY_ENTRY
- NETDEV_ENTRY
- __field(int, sset)
- ),
- TP_fast_assign(
- WIPHY_ASSIGN;
- NETDEV_ASSIGN;
- __entry->sset = sset;
- ),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", sset: %d",
- WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sset)
-);
-
-TRACE_EVENT(rdev_get_et_strings,
- TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u32 sset),
- TP_ARGS(wiphy, netdev, sset),
- TP_STRUCT__entry(
- WIPHY_ENTRY
- NETDEV_ENTRY
- __field(u32, sset)
- ),
- TP_fast_assign(
- WIPHY_ASSIGN;
- NETDEV_ASSIGN;
- __entry->sset = sset;
- ),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", sset: %u",
- WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sset)
-);
-
DEFINE_EVENT(wiphy_wdev_evt, rdev_get_channel,
TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
TP_ARGS(wiphy, wdev)
@@ -1941,6 +1896,51 @@ TRACE_EVENT(rdev_set_ap_chanwidth,
WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG)
);
+TRACE_EVENT(rdev_add_tx_ts,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ u8 tsid, const u8 *peer, u8 user_prio, u16 admitted_time),
+ TP_ARGS(wiphy, netdev, tsid, peer, user_prio, admitted_time),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ MAC_ENTRY(peer)
+ __field(u8, tsid)
+ __field(u8, user_prio)
+ __field(u16, admitted_time)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ MAC_ASSIGN(peer, peer);
+ __entry->tsid = tsid;
+ __entry->user_prio = user_prio;
+ __entry->admitted_time = admitted_time;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT ", TSID %d, UP %d, time %d",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer),
+ __entry->tsid, __entry->user_prio, __entry->admitted_time)
+);
+
+TRACE_EVENT(rdev_del_tx_ts,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ u8 tsid, const u8 *peer),
+ TP_ARGS(wiphy, netdev, tsid, peer),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ MAC_ENTRY(peer)
+ __field(u8, tsid)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ MAC_ASSIGN(peer, peer);
+ __entry->tsid = tsid;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT ", TSID %d",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), __entry->tsid)
+);
+
/*************************************************************
* cfg80211 exported functions traces *
*************************************************************/
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 728f1c0dc70d..5e233a577d0f 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -2,6 +2,7 @@
* Wireless utility functions
*
* Copyright 2007-2009 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
*/
#include <linux/export.h>
#include <linux/bitops.h>
@@ -796,7 +797,7 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev)
netdev_err(dev, "failed to set mgtdef %d\n", i);
}
- kfree(wdev->connect_keys);
+ kzfree(wdev->connect_keys);
wdev->connect_keys = NULL;
}
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 11120bb14162..0f47948c572f 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -496,6 +496,8 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
err = 0;
if (!err) {
if (!addr) {
+ memset(wdev->wext.keys->data[idx], 0,
+ sizeof(wdev->wext.keys->data[idx]));
wdev->wext.keys->params[idx].key_len = 0;
wdev->wext.keys->params[idx].cipher = 0;
}
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index c7e5c8eb4f24..368611c05739 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -57,7 +57,7 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
err = cfg80211_connect(rdev, wdev->netdev,
&wdev->wext.connect, ck, prev_bssid);
if (err)
- kfree(ck);
+ kzfree(ck);
return err;
}
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
index 0622d319e1f2..666c5ffe929d 100644
--- a/net/xfrm/xfrm_hash.h
+++ b/net/xfrm/xfrm_hash.h
@@ -3,6 +3,7 @@
#include <linux/xfrm.h>
#include <linux/socket.h>
+#include <linux/jhash.h>
static inline unsigned int __xfrm4_addr_hash(const xfrm_address_t *addr)
{
@@ -28,6 +29,58 @@ static inline unsigned int __xfrm6_daddr_saddr_hash(const xfrm_address_t *daddr,
saddr->a6[2] ^ saddr->a6[3]);
}
+static inline u32 __bits2mask32(__u8 bits)
+{
+ u32 mask32 = 0xffffffff;
+
+ if (bits == 0)
+ mask32 = 0;
+ else if (bits < 32)
+ mask32 <<= (32 - bits);
+
+ return mask32;
+}
+
+static inline unsigned int __xfrm4_dpref_spref_hash(const xfrm_address_t *daddr,
+ const xfrm_address_t *saddr,
+ __u8 dbits,
+ __u8 sbits)
+{
+ return jhash_2words(ntohl(daddr->a4) & __bits2mask32(dbits),
+ ntohl(saddr->a4) & __bits2mask32(sbits),
+ 0);
+}
+
+static inline unsigned int __xfrm6_pref_hash(const xfrm_address_t *addr,
+ __u8 prefixlen)
+{
+ int pdw;
+ int pbi;
+ u32 initval = 0;
+
+ pdw = prefixlen >> 5; /* num of whole u32 in prefix */
+ pbi = prefixlen & 0x1f; /* num of bits in incomplete u32 in prefix */
+
+ if (pbi) {
+ __be32 mask;
+
+ mask = htonl((0xffffffff) << (32 - pbi));
+
+ initval = (__force u32)(addr->a6[pdw] & mask);
+ }
+
+ return jhash2((__force u32 *)addr->a6, pdw, initval);
+}
+
+static inline unsigned int __xfrm6_dpref_spref_hash(const xfrm_address_t *daddr,
+ const xfrm_address_t *saddr,
+ __u8 dbits,
+ __u8 sbits)
+{
+ return __xfrm6_pref_hash(daddr, dbits) ^
+ __xfrm6_pref_hash(saddr, sbits);
+}
+
static inline unsigned int __xfrm_dst_hash(const xfrm_address_t *daddr,
const xfrm_address_t *saddr,
u32 reqid, unsigned short family,
@@ -84,7 +137,8 @@ static inline unsigned int __idx_hash(u32 index, unsigned int hmask)
}
static inline unsigned int __sel_hash(const struct xfrm_selector *sel,
- unsigned short family, unsigned int hmask)
+ unsigned short family, unsigned int hmask,
+ u8 dbits, u8 sbits)
{
const xfrm_address_t *daddr = &sel->daddr;
const xfrm_address_t *saddr = &sel->saddr;
@@ -92,19 +146,19 @@ static inline unsigned int __sel_hash(const struct xfrm_selector *sel,
switch (family) {
case AF_INET:
- if (sel->prefixlen_d != 32 ||
- sel->prefixlen_s != 32)
+ if (sel->prefixlen_d < dbits ||
+ sel->prefixlen_s < sbits)
return hmask + 1;
- h = __xfrm4_daddr_saddr_hash(daddr, saddr);
+ h = __xfrm4_dpref_spref_hash(daddr, saddr, dbits, sbits);
break;
case AF_INET6:
- if (sel->prefixlen_d != 128 ||
- sel->prefixlen_s != 128)
+ if (sel->prefixlen_d < dbits ||
+ sel->prefixlen_s < sbits)
return hmask + 1;
- h = __xfrm6_daddr_saddr_hash(daddr, saddr);
+ h = __xfrm6_dpref_spref_hash(daddr, saddr, dbits, sbits);
break;
}
h ^= (h >> 16);
@@ -113,17 +167,19 @@ static inline unsigned int __sel_hash(const struct xfrm_selector *sel,
static inline unsigned int __addr_hash(const xfrm_address_t *daddr,
const xfrm_address_t *saddr,
- unsigned short family, unsigned int hmask)
+ unsigned short family,
+ unsigned int hmask,
+ u8 dbits, u8 sbits)
{
unsigned int h = 0;
switch (family) {
case AF_INET:
- h = __xfrm4_daddr_saddr_hash(daddr, saddr);
+ h = __xfrm4_dpref_spref_hash(daddr, saddr, dbits, sbits);
break;
case AF_INET6:
- h = __xfrm6_daddr_saddr_hash(daddr, saddr);
+ h = __xfrm6_dpref_spref_hash(daddr, saddr, dbits, sbits);
break;
}
h ^= (h >> 16);
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index c51e8f7b8653..499d6c18a8ce 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -166,11 +166,7 @@ static int xfrm_output_gso(struct sk_buff *skb)
err = xfrm_output2(segs);
if (unlikely(err)) {
- while ((segs = nskb)) {
- nskb = segs->next;
- segs->next = NULL;
- kfree_skb(segs);
- }
+ kfree_skb_list(nskb);
return err;
}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 0525d78ba328..4c4e457e7888 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -39,6 +39,11 @@
#define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
#define XFRM_MAX_QUEUE_LEN 100
+struct xfrm_flo {
+ struct dst_entry *dst_orig;
+ u8 flags;
+};
+
static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
__read_mostly;
@@ -344,12 +349,39 @@ static inline unsigned int idx_hash(struct net *net, u32 index)
return __idx_hash(index, net->xfrm.policy_idx_hmask);
}
+/* calculate policy hash thresholds */
+static void __get_hash_thresh(struct net *net,
+ unsigned short family, int dir,
+ u8 *dbits, u8 *sbits)
+{
+ switch (family) {
+ case AF_INET:
+ *dbits = net->xfrm.policy_bydst[dir].dbits4;
+ *sbits = net->xfrm.policy_bydst[dir].sbits4;
+ break;
+
+ case AF_INET6:
+ *dbits = net->xfrm.policy_bydst[dir].dbits6;
+ *sbits = net->xfrm.policy_bydst[dir].sbits6;
+ break;
+
+ default:
+ *dbits = 0;
+ *sbits = 0;
+ }
+}
+
static struct hlist_head *policy_hash_bysel(struct net *net,
const struct xfrm_selector *sel,
unsigned short family, int dir)
{
unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
- unsigned int hash = __sel_hash(sel, family, hmask);
+ unsigned int hash;
+ u8 dbits;
+ u8 sbits;
+
+ __get_hash_thresh(net, family, dir, &dbits, &sbits);
+ hash = __sel_hash(sel, family, hmask, dbits, sbits);
return (hash == hmask + 1 ?
&net->xfrm.policy_inexact[dir] :
@@ -362,25 +394,35 @@ static struct hlist_head *policy_hash_direct(struct net *net,
unsigned short family, int dir)
{
unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
- unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
+ unsigned int hash;
+ u8 dbits;
+ u8 sbits;
+
+ __get_hash_thresh(net, family, dir, &dbits, &sbits);
+ hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits);
return net->xfrm.policy_bydst[dir].table + hash;
}
-static void xfrm_dst_hash_transfer(struct hlist_head *list,
+static void xfrm_dst_hash_transfer(struct net *net,
+ struct hlist_head *list,
struct hlist_head *ndsttable,
- unsigned int nhashmask)
+ unsigned int nhashmask,
+ int dir)
{
struct hlist_node *tmp, *entry0 = NULL;
struct xfrm_policy *pol;
unsigned int h0 = 0;
+ u8 dbits;
+ u8 sbits;
redo:
hlist_for_each_entry_safe(pol, tmp, list, bydst) {
unsigned int h;
+ __get_hash_thresh(net, pol->family, dir, &dbits, &sbits);
h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
- pol->family, nhashmask);
+ pol->family, nhashmask, dbits, sbits);
if (!entry0) {
hlist_del(&pol->bydst);
hlist_add_head(&pol->bydst, ndsttable+h);
@@ -389,7 +431,7 @@ redo:
if (h != h0)
continue;
hlist_del(&pol->bydst);
- hlist_add_after(entry0, &pol->bydst);
+ hlist_add_behind(&pol->bydst, entry0);
}
entry0 = &pol->bydst;
}
@@ -434,7 +476,7 @@ static void xfrm_bydst_resize(struct net *net, int dir)
write_lock_bh(&net->xfrm.xfrm_policy_lock);
for (i = hmask; i >= 0; i--)
- xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
+ xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir);
net->xfrm.policy_bydst[dir].table = ndst;
net->xfrm.policy_bydst[dir].hmask = nhashmask;
@@ -529,6 +571,86 @@ static void xfrm_hash_resize(struct work_struct *work)
mutex_unlock(&hash_resize_mutex);
}
+static void xfrm_hash_rebuild(struct work_struct *work)
+{
+ struct net *net = container_of(work, struct net,
+ xfrm.policy_hthresh.work);
+ unsigned int hmask;
+ struct xfrm_policy *pol;
+ struct xfrm_policy *policy;
+ struct hlist_head *chain;
+ struct hlist_head *odst;
+ struct hlist_node *newpos;
+ int i;
+ int dir;
+ unsigned seq;
+ u8 lbits4, rbits4, lbits6, rbits6;
+
+ mutex_lock(&hash_resize_mutex);
+
+ /* read selector prefixlen thresholds */
+ do {
+ seq = read_seqbegin(&net->xfrm.policy_hthresh.lock);
+
+ lbits4 = net->xfrm.policy_hthresh.lbits4;
+ rbits4 = net->xfrm.policy_hthresh.rbits4;
+ lbits6 = net->xfrm.policy_hthresh.lbits6;
+ rbits6 = net->xfrm.policy_hthresh.rbits6;
+ } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));
+
+ write_lock_bh(&net->xfrm.xfrm_policy_lock);
+
+ /* reset the bydst and inexact table in all directions */
+ for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
+ INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
+ hmask = net->xfrm.policy_bydst[dir].hmask;
+ odst = net->xfrm.policy_bydst[dir].table;
+ for (i = hmask; i >= 0; i--)
+ INIT_HLIST_HEAD(odst + i);
+ if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
+ /* dir out => dst = remote, src = local */
+ net->xfrm.policy_bydst[dir].dbits4 = rbits4;
+ net->xfrm.policy_bydst[dir].sbits4 = lbits4;
+ net->xfrm.policy_bydst[dir].dbits6 = rbits6;
+ net->xfrm.policy_bydst[dir].sbits6 = lbits6;
+ } else {
+ /* dir in/fwd => dst = local, src = remote */
+ net->xfrm.policy_bydst[dir].dbits4 = lbits4;
+ net->xfrm.policy_bydst[dir].sbits4 = rbits4;
+ net->xfrm.policy_bydst[dir].dbits6 = lbits6;
+ net->xfrm.policy_bydst[dir].sbits6 = rbits6;
+ }
+ }
+
+ /* re-insert all policies by order of creation */
+ list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
+ newpos = NULL;
+ chain = policy_hash_bysel(net, &policy->selector,
+ policy->family,
+ xfrm_policy_id2dir(policy->index));
+ hlist_for_each_entry(pol, chain, bydst) {
+ if (policy->priority >= pol->priority)
+ newpos = &pol->bydst;
+ else
+ break;
+ }
+ if (newpos)
+ hlist_add_behind(&policy->bydst, newpos);
+ else
+ hlist_add_head(&policy->bydst, chain);
+ }
+
+ write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+
+ mutex_unlock(&hash_resize_mutex);
+}
+
+void xfrm_policy_hash_rebuild(struct net *net)
+{
+ schedule_work(&net->xfrm.policy_hthresh.work);
+}
+EXPORT_SYMBOL(xfrm_policy_hash_rebuild);
+
/* Generate new index... KAME seems to generate them ordered by cost
* of an absolute inpredictability of ordering of rules. This will not pass. */
static u32 xfrm_gen_index(struct net *net, int dir, u32 index)
@@ -654,7 +776,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
break;
}
if (newpos)
- hlist_add_after(newpos, &policy->bydst);
+ hlist_add_behind(&policy->bydst, newpos);
else
hlist_add_head(&policy->bydst, chain);
xfrm_pol_hold(policy);
@@ -1839,10 +1961,8 @@ static int xdst_queue_output(struct sock *sk, struct sk_buff *skb)
struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
struct xfrm_policy *pol = xdst->pols[0];
struct xfrm_policy_queue *pq = &pol->polq;
- const struct sk_buff *fclone = skb + 1;
- if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
- fclone->fclone == SKB_FCLONE_CLONE)) {
+ if (unlikely(skb_fclone_busy(skb))) {
kfree_skb(skb);
return 0;
}
@@ -1877,13 +1997,14 @@ static int xdst_queue_output(struct sock *sk, struct sk_buff *skb)
}
static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
- struct dst_entry *dst,
+ struct xfrm_flo *xflo,
const struct flowi *fl,
int num_xfrms,
u16 family)
{
int err;
struct net_device *dev;
+ struct dst_entry *dst;
struct dst_entry *dst1;
struct xfrm_dst *xdst;
@@ -1891,9 +2012,12 @@ static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
if (IS_ERR(xdst))
return xdst;
- if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0)
+ if (!(xflo->flags & XFRM_LOOKUP_QUEUE) ||
+ net->xfrm.sysctl_larval_drop ||
+ num_xfrms <= 0)
return xdst;
+ dst = xflo->dst_orig;
dst1 = &xdst->u.dst;
dst_hold(dst);
xdst->route = dst;
@@ -1935,7 +2059,7 @@ static struct flow_cache_object *
xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
struct flow_cache_object *oldflo, void *ctx)
{
- struct dst_entry *dst_orig = (struct dst_entry *)ctx;
+ struct xfrm_flo *xflo = (struct xfrm_flo *)ctx;
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
struct xfrm_dst *xdst, *new_xdst;
int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
@@ -1976,7 +2100,8 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
goto make_dummy_bundle;
}
- new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig);
+ new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
+ xflo->dst_orig);
if (IS_ERR(new_xdst)) {
err = PTR_ERR(new_xdst);
if (err != -EAGAIN)
@@ -2010,7 +2135,7 @@ make_dummy_bundle:
/* We found policies, but there's no bundles to instantiate:
* either because the policy blocks, has no transformations or
* we could not build template (no xfrm_states).*/
- xdst = xfrm_create_dummy_bundle(net, dst_orig, fl, num_xfrms, family);
+ xdst = xfrm_create_dummy_bundle(net, xflo, fl, num_xfrms, family);
if (IS_ERR(xdst)) {
xfrm_pols_put(pols, num_pols);
return ERR_CAST(xdst);
@@ -2104,13 +2229,18 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
}
if (xdst == NULL) {
+ struct xfrm_flo xflo;
+
+ xflo.dst_orig = dst_orig;
+ xflo.flags = flags;
+
/* To accelerate a bit... */
if ((dst_orig->flags & DST_NOXFRM) ||
!net->xfrm.policy_count[XFRM_POLICY_OUT])
goto nopol;
flo = flow_cache_lookup(net, fl, family, dir,
- xfrm_bundle_lookup, dst_orig);
+ xfrm_bundle_lookup, &xflo);
if (flo == NULL)
goto nopol;
if (IS_ERR(flo)) {
@@ -2138,7 +2268,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
xfrm_pols_put(pols, drop_pols);
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
- return make_blackhole(net, family, dst_orig);
+ return ERR_PTR(-EREMOTE);
}
err = -EAGAIN;
@@ -2195,6 +2325,23 @@ dropdst:
}
EXPORT_SYMBOL(xfrm_lookup);
+/* Callers of xfrm_lookup_route() must ensure a call to dst_output().
+ * Otherwise we may send out blackholed packets.
+ */
+struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
+ const struct flowi *fl,
+ struct sock *sk, int flags)
+{
+ struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk,
+ flags | XFRM_LOOKUP_QUEUE);
+
+ if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE)
+ return make_blackhole(net, dst_orig->ops->family, dst_orig);
+
+ return dst;
+}
+EXPORT_SYMBOL(xfrm_lookup_route);
+
static inline int
xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
{
@@ -2460,7 +2607,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
skb_dst_force(skb);
- dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0);
+ dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE);
if (IS_ERR(dst)) {
res = 0;
dst = NULL;
@@ -2830,10 +2977,21 @@ static int __net_init xfrm_policy_init(struct net *net)
if (!htab->table)
goto out_bydst;
htab->hmask = hmask;
+ htab->dbits4 = 32;
+ htab->sbits4 = 32;
+ htab->dbits6 = 128;
+ htab->sbits6 = 128;
}
+ net->xfrm.policy_hthresh.lbits4 = 32;
+ net->xfrm.policy_hthresh.rbits4 = 32;
+ net->xfrm.policy_hthresh.lbits6 = 128;
+ net->xfrm.policy_hthresh.rbits6 = 128;
+
+ seqlock_init(&net->xfrm.policy_hthresh.lock);
INIT_LIST_HEAD(&net->xfrm.policy_all);
INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
+ INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild);
if (net_eq(net, &init_net))
register_netdevice_notifier(&xfrm_dev_notifier);
return 0;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 0ab54134bb40..de971b6d38c5 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -97,8 +97,6 @@ static unsigned long xfrm_hash_new_size(unsigned int state_hmask)
return ((state_hmask + 1) << 1) * sizeof(struct hlist_head);
}
-static DEFINE_MUTEX(hash_resize_mutex);
-
static void xfrm_hash_resize(struct work_struct *work)
{
struct net *net = container_of(work, struct net, xfrm.state_hash_work);
@@ -107,22 +105,20 @@ static void xfrm_hash_resize(struct work_struct *work)
unsigned int nhashmask, ohashmask;
int i;
- mutex_lock(&hash_resize_mutex);
-
nsize = xfrm_hash_new_size(net->xfrm.state_hmask);
ndst = xfrm_hash_alloc(nsize);
if (!ndst)
- goto out_unlock;
+ return;
nsrc = xfrm_hash_alloc(nsize);
if (!nsrc) {
xfrm_hash_free(ndst, nsize);
- goto out_unlock;
+ return;
}
nspi = xfrm_hash_alloc(nsize);
if (!nspi) {
xfrm_hash_free(ndst, nsize);
xfrm_hash_free(nsrc, nsize);
- goto out_unlock;
+ return;
}
spin_lock_bh(&net->xfrm.xfrm_state_lock);
@@ -148,9 +144,6 @@ static void xfrm_hash_resize(struct work_struct *work)
xfrm_hash_free(odst, osize);
xfrm_hash_free(osrc, osize);
xfrm_hash_free(ospi, osize);
-
-out_unlock:
- mutex_unlock(&hash_resize_mutex);
}
static DEFINE_SPINLOCK(xfrm_state_afinfo_lock);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index d4db6ebb089d..e812e988c111 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -333,8 +333,7 @@ static int attach_auth_trunc(struct xfrm_algo_auth **algpp, u8 *props,
algo = xfrm_aalg_get_byname(ualg->alg_name, 1);
if (!algo)
return -ENOSYS;
- if ((ualg->alg_trunc_len / 8) > MAX_AH_AUTH_LEN ||
- ualg->alg_trunc_len > algo->uinfo.auth.icv_fullbits)
+ if (ualg->alg_trunc_len > algo->uinfo.auth.icv_fullbits)
return -EINVAL;
*props = algo->desc.sadb_alg_id;
@@ -964,7 +963,9 @@ static inline size_t xfrm_spdinfo_msgsize(void)
{
return NLMSG_ALIGN(4)
+ nla_total_size(sizeof(struct xfrmu_spdinfo))
- + nla_total_size(sizeof(struct xfrmu_spdhinfo));
+ + nla_total_size(sizeof(struct xfrmu_spdhinfo))
+ + nla_total_size(sizeof(struct xfrmu_spdhthresh))
+ + nla_total_size(sizeof(struct xfrmu_spdhthresh));
}
static int build_spdinfo(struct sk_buff *skb, struct net *net,
@@ -973,9 +974,11 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net,
struct xfrmk_spdinfo si;
struct xfrmu_spdinfo spc;
struct xfrmu_spdhinfo sph;
+ struct xfrmu_spdhthresh spt4, spt6;
struct nlmsghdr *nlh;
int err;
u32 *f;
+ unsigned lseq;
nlh = nlmsg_put(skb, portid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0);
if (nlh == NULL) /* shouldn't really happen ... */
@@ -993,9 +996,22 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net,
sph.spdhcnt = si.spdhcnt;
sph.spdhmcnt = si.spdhmcnt;
+ do {
+ lseq = read_seqbegin(&net->xfrm.policy_hthresh.lock);
+
+ spt4.lbits = net->xfrm.policy_hthresh.lbits4;
+ spt4.rbits = net->xfrm.policy_hthresh.rbits4;
+ spt6.lbits = net->xfrm.policy_hthresh.lbits6;
+ spt6.rbits = net->xfrm.policy_hthresh.rbits6;
+ } while (read_seqretry(&net->xfrm.policy_hthresh.lock, lseq));
+
err = nla_put(skb, XFRMA_SPD_INFO, sizeof(spc), &spc);
if (!err)
err = nla_put(skb, XFRMA_SPD_HINFO, sizeof(sph), &sph);
+ if (!err)
+ err = nla_put(skb, XFRMA_SPD_IPV4_HTHRESH, sizeof(spt4), &spt4);
+ if (!err)
+ err = nla_put(skb, XFRMA_SPD_IPV6_HTHRESH, sizeof(spt6), &spt6);
if (err) {
nlmsg_cancel(skb, nlh);
return err;
@@ -1004,6 +1020,51 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net,
return nlmsg_end(skb, nlh);
}
+static int xfrm_set_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct nlattr **attrs)
+{
+ struct net *net = sock_net(skb->sk);
+ struct xfrmu_spdhthresh *thresh4 = NULL;
+ struct xfrmu_spdhthresh *thresh6 = NULL;
+
+ /* selector prefixlen thresholds to hash policies */
+ if (attrs[XFRMA_SPD_IPV4_HTHRESH]) {
+ struct nlattr *rta = attrs[XFRMA_SPD_IPV4_HTHRESH];
+
+ if (nla_len(rta) < sizeof(*thresh4))
+ return -EINVAL;
+ thresh4 = nla_data(rta);
+ if (thresh4->lbits > 32 || thresh4->rbits > 32)
+ return -EINVAL;
+ }
+ if (attrs[XFRMA_SPD_IPV6_HTHRESH]) {
+ struct nlattr *rta = attrs[XFRMA_SPD_IPV6_HTHRESH];
+
+ if (nla_len(rta) < sizeof(*thresh6))
+ return -EINVAL;
+ thresh6 = nla_data(rta);
+ if (thresh6->lbits > 128 || thresh6->rbits > 128)
+ return -EINVAL;
+ }
+
+ if (thresh4 || thresh6) {
+ write_seqlock(&net->xfrm.policy_hthresh.lock);
+ if (thresh4) {
+ net->xfrm.policy_hthresh.lbits4 = thresh4->lbits;
+ net->xfrm.policy_hthresh.rbits4 = thresh4->rbits;
+ }
+ if (thresh6) {
+ net->xfrm.policy_hthresh.lbits6 = thresh6->lbits;
+ net->xfrm.policy_hthresh.rbits6 = thresh6->rbits;
+ }
+ write_sequnlock(&net->xfrm.policy_hthresh.lock);
+
+ xfrm_policy_hash_rebuild(net);
+ }
+
+ return 0;
+}
+
static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
struct nlattr **attrs)
{
@@ -2274,6 +2335,7 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
[XFRM_MSG_REPORT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report),
[XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id),
[XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = sizeof(u32),
+ [XFRM_MSG_NEWSPDINFO - XFRM_MSG_BASE] = sizeof(u32),
[XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = sizeof(u32),
};
@@ -2308,10 +2370,17 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) },
};
+static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
+ [XFRMA_SPD_IPV4_HTHRESH] = { .len = sizeof(struct xfrmu_spdhthresh) },
+ [XFRMA_SPD_IPV6_HTHRESH] = { .len = sizeof(struct xfrmu_spdhthresh) },
+};
+
static const struct xfrm_link {
int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **);
int (*dump)(struct sk_buff *, struct netlink_callback *);
int (*done)(struct netlink_callback *);
+ const struct nla_policy *nla_pol;
+ int nla_max;
} xfrm_dispatch[XFRM_NR_MSGTYPES] = {
[XFRM_MSG_NEWSA - XFRM_MSG_BASE] = { .doit = xfrm_add_sa },
[XFRM_MSG_DELSA - XFRM_MSG_BASE] = { .doit = xfrm_del_sa },
@@ -2335,6 +2404,9 @@ static const struct xfrm_link {
[XFRM_MSG_GETAE - XFRM_MSG_BASE] = { .doit = xfrm_get_ae },
[XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = { .doit = xfrm_do_migrate },
[XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = { .doit = xfrm_get_sadinfo },
+ [XFRM_MSG_NEWSPDINFO - XFRM_MSG_BASE] = { .doit = xfrm_set_spdinfo,
+ .nla_pol = xfrma_spd_policy,
+ .nla_max = XFRMA_SPD_MAX },
[XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = { .doit = xfrm_get_spdinfo },
};
@@ -2371,8 +2443,9 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
}
}
- err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs, XFRMA_MAX,
- xfrma_policy);
+ err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs,
+ link->nla_max ? : XFRMA_MAX,
+ link->nla_pol ? : xfrma_policy);
if (err < 0)
return err;