summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/garp.c4
-rw-r--r--net/802/mrp.c4
-rw-r--r--net/8021q/Kconfig2
-rw-r--r--net/8021q/vlan.c97
-rw-r--r--net/8021q/vlan.h58
-rw-r--r--net/8021q/vlan_core.c80
-rw-r--r--net/8021q/vlan_dev.c13
-rw-r--r--net/8021q/vlan_gvrp.c4
-rw-r--r--net/8021q/vlan_mvrp.c4
-rw-r--r--net/8021q/vlan_netlink.c32
-rw-r--r--net/8021q/vlanproc.c11
-rw-r--r--net/9p/trans_virtio.c48
-rw-r--r--net/Kconfig2
-rw-r--r--net/appletalk/ddp.c2
-rw-r--r--net/atm/lec.h2
-rw-r--r--net/atm/proc.c2
-rw-r--r--net/batman-adv/Kconfig14
-rw-r--r--net/batman-adv/Makefile3
-rw-r--r--net/batman-adv/bat_iv_ogm.c91
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c6
-rw-r--r--net/batman-adv/debugfs.c18
-rw-r--r--net/batman-adv/distributed-arp-table.c31
-rw-r--r--net/batman-adv/gateway_client.c2
-rw-r--r--net/batman-adv/hard-interface.c66
-rw-r--r--net/batman-adv/hard-interface.h13
-rw-r--r--net/batman-adv/main.c35
-rw-r--r--net/batman-adv/main.h15
-rw-r--r--net/batman-adv/network-coding.c1826
-rw-r--r--net/batman-adv/network-coding.h123
-rw-r--r--net/batman-adv/originator.c26
-rw-r--r--net/batman-adv/originator.h1
-rw-r--r--net/batman-adv/packet.h33
-rw-r--r--net/batman-adv/routing.c61
-rw-r--r--net/batman-adv/send.c5
-rw-r--r--net/batman-adv/soft-interface.c289
-rw-r--r--net/batman-adv/soft-interface.h3
-rw-r--r--net/batman-adv/sysfs.c27
-rw-r--r--net/batman-adv/translation-table.c36
-rw-r--r--net/batman-adv/types.h136
-rw-r--r--net/batman-adv/unicast.c6
-rw-r--r--net/batman-adv/vis.c4
-rw-r--r--net/bluetooth/a2mp.c6
-rw-r--r--net/bluetooth/af_bluetooth.c45
-rw-r--r--net/bluetooth/bnep/netdev.c2
-rw-r--r--net/bluetooth/bnep/sock.c6
-rw-r--r--net/bluetooth/cmtp/capi.c2
-rw-r--r--net/bluetooth/cmtp/sock.c6
-rw-r--r--net/bluetooth/hci_conn.c42
-rw-r--r--net/bluetooth/hci_core.c921
-rw-r--r--net/bluetooth/hci_event.c781
-rw-r--r--net/bluetooth/hci_sock.c11
-rw-r--r--net/bluetooth/hci_sysfs.c21
-rw-r--r--net/bluetooth/hidp/core.c1026
-rw-r--r--net/bluetooth/hidp/hidp.h69
-rw-r--r--net/bluetooth/hidp/sock.c28
-rw-r--r--net/bluetooth/l2cap_core.c207
-rw-r--r--net/bluetooth/l2cap_sock.c12
-rw-r--r--net/bluetooth/mgmt.c726
-rw-r--r--net/bluetooth/rfcomm/core.c167
-rw-r--r--net/bluetooth/rfcomm/sock.c5
-rw-r--r--net/bluetooth/sco.c54
-rw-r--r--net/bluetooth/smp.c6
-rw-r--r--net/bridge/br_device.c4
-rw-r--r--net/bridge/br_fdb.c30
-rw-r--r--net/bridge/br_if.c1
-rw-r--r--net/bridge/br_mdb.c4
-rw-r--r--net/bridge/br_multicast.c7
-rw-r--r--net/bridge/br_netfilter.c3
-rw-r--r--net/bridge/br_netlink.c21
-rw-r--r--net/bridge/br_stp.c9
-rw-r--r--net/bridge/br_stp_timer.c2
-rw-r--r--net/bridge/br_vlan.c20
-rw-r--r--net/bridge/netfilter/ebt_log.c53
-rw-r--r--net/bridge/netfilter/ebt_nflog.c5
-rw-r--r--net/bridge/netfilter/ebt_ulog.c148
-rw-r--r--net/bridge/netfilter/ebtable_broute.c4
-rw-r--r--net/bridge/netfilter/ebtables.c2
-rw-r--r--net/caif/caif_dev.c11
-rw-r--r--net/caif/caif_socket.c24
-rw-r--r--net/caif/caif_usb.c6
-rw-r--r--net/caif/cfcnfg.c21
-rw-r--r--net/caif/cfctrl.c16
-rw-r--r--net/caif/cfdbgl.c2
-rw-r--r--net/caif/cfdgml.c2
-rw-r--r--net/caif/cffrml.c6
-rw-r--r--net/caif/cfmuxl.c6
-rw-r--r--net/caif/cfpkt_skbuff.c10
-rw-r--r--net/caif/cfrfml.c6
-rw-r--r--net/caif/cfserl.c6
-rw-r--r--net/caif/cfsrvl.c15
-rw-r--r--net/caif/cfutill.c2
-rw-r--r--net/caif/cfveil.c2
-rw-r--r--net/caif/cfvidl.c2
-rw-r--r--net/caif/chnl_net.c10
-rw-r--r--net/can/af_can.c30
-rw-r--r--net/can/bcm.c2
-rw-r--r--net/can/gw.c5
-rw-r--r--net/can/proc.c2
-rw-r--r--net/can/raw.c5
-rw-r--r--net/ceph/Makefile2
-rw-r--r--net/ceph/auth.c117
-rw-r--r--net/ceph/auth_none.c6
-rw-r--r--net/ceph/auth_x.c24
-rw-r--r--net/ceph/auth_x.h1
-rw-r--r--net/ceph/ceph_common.c7
-rw-r--r--net/ceph/debugfs.c4
-rw-r--r--net/ceph/messenger.c1019
-rw-r--r--net/ceph/mon_client.c7
-rw-r--r--net/ceph/osd_client.c1117
-rw-r--r--net/ceph/osdmap.c45
-rw-r--r--net/ceph/snapshot.c78
-rw-r--r--net/compat.c13
-rw-r--r--net/core/datagram.c26
-rw-r--r--net/core/dev.c167
-rw-r--r--net/core/dev_addr_lists.c215
-rw-r--r--net/core/dev_ioctl.c19
-rw-r--r--net/core/dst.c9
-rw-r--r--net/core/ethtool.c31
-rw-r--r--net/core/fib_rules.c4
-rw-r--r--net/core/filter.c7
-rw-r--r--net/core/flow.c42
-rw-r--r--net/core/flow_dissector.c68
-rw-r--r--net/core/iovec.c50
-rw-r--r--net/core/neighbour.c57
-rw-r--r--net/core/net-procfs.c2
-rw-r--r--net/core/net-sysfs.c12
-rw-r--r--net/core/net_namespace.c7
-rw-r--r--net/core/netpoll.c23
-rw-r--r--net/core/pktgen.c54
-rw-r--r--net/core/rtnetlink.c178
-rw-r--r--net/core/scm.c20
-rw-r--r--net/core/secure_seq.c4
-rw-r--r--net/core/skbuff.c113
-rw-r--r--net/core/sock.c57
-rw-r--r--net/core/sock_diag.c38
-rw-r--r--net/core/utils.c25
-rw-r--r--net/dcb/dcbevent.c1
-rw-r--r--net/dcb/dcbnl.c2
-rw-r--r--net/dccp/ipv4.c5
-rw-r--r--net/dccp/ipv6.c5
-rw-r--r--net/decnet/dn_dev.c4
-rw-r--r--net/decnet/dn_fib.c203
-rw-r--r--net/decnet/dn_route.c43
-rw-r--r--net/decnet/dn_table.c45
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c12
-rw-r--r--net/dsa/dsa.c233
-rw-r--r--net/ethernet/eth.c2
-rw-r--r--net/ieee802154/6lowpan.c142
-rw-r--r--net/ieee802154/6lowpan.h7
-rw-r--r--net/ieee802154/dgram.c10
-rw-r--r--net/ieee802154/netlink.c8
-rw-r--r--net/ieee802154/nl-mac.c25
-rw-r--r--net/ipv4/Kconfig7
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c16
-rw-r--r--net/ipv4/arp.c27
-rw-r--r--net/ipv4/devinet.c83
-rw-r--r--net/ipv4/fib_frontend.c10
-rw-r--r--net/ipv4/fib_trie.c13
-rw-r--r--net/ipv4/gre.c17
-rw-r--r--net/ipv4/icmp.c4
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/inet_diag.c10
-rw-r--r--net/ipv4/inet_fragment.c87
-rw-r--r--net/ipv4/inet_lro.c5
-rw-r--r--net/ipv4/ip_fragment.c31
-rw-r--r--net/ipv4/ip_gre.c1524
-rw-r--r--net/ipv4/ip_input.c4
-rw-r--r--net/ipv4/ip_output.c5
-rw-r--r--net/ipv4/ip_tunnel.c1035
-rw-r--r--net/ipv4/ip_vti.c45
-rw-r--r--net/ipv4/ipcomp.c1
-rw-r--r--net/ipv4/ipconfig.c13
-rw-r--r--net/ipv4/ipip.c748
-rw-r--r--net/ipv4/ipmr.c12
-rw-r--r--net/ipv4/netfilter.c15
-rw-r--r--net/ipv4/netfilter/Kconfig2
-rw-r--r--net/ipv4/netfilter/arp_tables.c1
-rw-r--r--net/ipv4/netfilter/arptable_filter.c4
-rw-r--r--net/ipv4/netfilter/ip_tables.c10
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c10
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c157
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c9
-rw-r--r--net/ipv4/netfilter/iptable_nat.c23
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c1
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c1
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c9
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c1
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c2
-rw-r--r--net/ipv4/ping.c5
-rw-r--r--net/ipv4/proc.c13
-rw-r--r--net/ipv4/route.c9
-rw-r--r--net/ipv4/syncookies.c3
-rw-r--r--net/ipv4/sysctl_net_ipv4.c18
-rw-r--r--net/ipv4/tcp.c314
-rw-r--r--net/ipv4/tcp_input.c630
-rw-r--r--net/ipv4/tcp_ipv4.c129
-rw-r--r--net/ipv4/tcp_memcontrol.c3
-rw-r--r--net/ipv4/tcp_metrics.c15
-rw-r--r--net/ipv4/tcp_minisocks.c51
-rw-r--r--net/ipv4/tcp_output.c387
-rw-r--r--net/ipv4/tcp_timer.c21
-rw-r--r--net/ipv4/tcp_westwood.c2
-rw-r--r--net/ipv4/udp.c137
-rw-r--r--net/ipv4/udp_diag.c10
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c8
-rw-r--r--net/ipv6/Kconfig2
-rw-r--r--net/ipv6/Makefile2
-rw-r--r--net/ipv6/addrconf.c216
-rw-r--r--net/ipv6/addrlabel.c12
-rw-r--r--net/ipv6/af_inet6.c7
-rw-r--r--net/ipv6/datagram.c20
-rw-r--r--net/ipv6/icmp.c41
-rw-r--r--net/ipv6/inet6_connection_sock.c10
-rw-r--r--net/ipv6/ip6_flowlabel.c11
-rw-r--r--net/ipv6/ip6_gre.c64
-rw-r--r--net/ipv6/ip6_icmp.c47
-rw-r--r--net/ipv6/ip6_offload.c4
-rw-r--r--net/ipv6/ip6_output.c22
-rw-r--r--net/ipv6/ip6_tunnel.c16
-rw-r--r--net/ipv6/ip6mr.c10
-rw-r--r--net/ipv6/ndisc.c4
-rw-r--r--net/ipv6/netfilter.c19
-rw-r--r--net/ipv6/netfilter/Kconfig2
-rw-r--r--net/ipv6/netfilter/ip6_tables.c4
-rw-r--r--net/ipv6/netfilter/ip6t_NPT.c11
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c2
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c9
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c23
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c10
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c7
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c22
-rw-r--r--net/ipv6/proc.c9
-rw-r--r--net/ipv6/raw.c9
-rw-r--r--net/ipv6/reassembly.c23
-rw-r--r--net/ipv6/route.c6
-rw-r--r--net/ipv6/sit.c41
-rw-r--r--net/ipv6/syncookies.c3
-rw-r--r--net/ipv6/tcp_ipv6.c87
-rw-r--r--net/ipv6/udp.c47
-rw-r--r--net/ipv6/udp_impl.h2
-rw-r--r--net/ipv6/udp_offload.c28
-rw-r--r--net/ipv6/udplite.c2
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c7
-rw-r--r--net/ipv6/xfrm6_policy.c4
-rw-r--r--net/irda/af_irda.c5
-rw-r--r--net/irda/ircomm/ircomm_core.c2
-rw-r--r--net/irda/ircomm/ircomm_tty.c2
-rw-r--r--net/irda/ircomm/ircomm_tty_attach.c6
-rw-r--r--net/irda/irlap_frame.c2
-rw-r--r--net/irda/irlmp.c2
-rw-r--r--net/iucv/af_iucv.c3
-rw-r--r--net/key/af_key.c6
-rw-r--r--net/l2tp/l2tp_core.c2
-rw-r--r--net/l2tp/l2tp_ppp.c6
-rw-r--r--net/mac80211/cfg.c241
-rw-r--r--net/mac80211/chan.c54
-rw-r--r--net/mac80211/debugfs_key.c10
-rw-r--r--net/mac80211/debugfs_netdev.c33
-rw-r--r--net/mac80211/debugfs_sta.c33
-rw-r--r--net/mac80211/driver-ops.h67
-rw-r--r--net/mac80211/ht.c52
-rw-r--r--net/mac80211/ibss.c175
-rw-r--r--net/mac80211/ieee80211_i.h88
-rw-r--r--net/mac80211/iface.c190
-rw-r--r--net/mac80211/key.c208
-rw-r--r--net/mac80211/key.h18
-rw-r--r--net/mac80211/main.c161
-rw-r--r--net/mac80211/mesh.c70
-rw-r--r--net/mac80211/mesh.h15
-rw-r--r--net/mac80211/mesh_hwmp.c28
-rw-r--r--net/mac80211/mesh_pathtbl.c56
-rw-r--r--net/mac80211/mesh_plink.c43
-rw-r--r--net/mac80211/mlme.c652
-rw-r--r--net/mac80211/offchannel.c16
-rw-r--r--net/mac80211/pm.c132
-rw-r--r--net/mac80211/rate.c327
-rw-r--r--net/mac80211/rc80211_minstrel.c342
-rw-r--r--net/mac80211/rc80211_minstrel.h34
-rw-r--r--net/mac80211/rc80211_minstrel_debugfs.c16
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c273
-rw-r--r--net/mac80211/rc80211_minstrel_ht.h8
-rw-r--r--net/mac80211/rx.c88
-rw-r--r--net/mac80211/scan.c20
-rw-r--r--net/mac80211/sta_info.c56
-rw-r--r--net/mac80211/sta_info.h11
-rw-r--r--net/mac80211/tkip.c4
-rw-r--r--net/mac80211/trace.h97
-rw-r--r--net/mac80211/tx.c163
-rw-r--r--net/mac80211/util.c225
-rw-r--r--net/mac80211/vht.c212
-rw-r--r--net/mac802154/mac802154.h5
-rw-r--r--net/mac802154/mac_cmd.c1
-rw-r--r--net/mac802154/mib.c21
-rw-r--r--net/mac802154/tx.c29
-rw-r--r--net/mac802154/wpan.c4
-rw-r--r--net/netfilter/core.c32
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h277
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c411
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c624
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c414
-rw-r--r--net/netfilter/ipset/ip_set_core.c41
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h1100
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c344
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c362
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c368
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c469
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c402
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c478
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c456
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c622
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c31
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c312
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c126
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c703
-rw-r--r--net/netfilter/ipvs/ip_vs_dh.c86
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c115
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c190
-rw-r--r--net/netfilter/ipvs/ip_vs_lc.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_nq.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_pe.c55
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c38
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c40
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c33
-rw-r--r--net/netfilter/ipvs/ip_vs_rr.c64
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c63
-rw-r--r--net/netfilter/ipvs/ip_vs_sed.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c86
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c56
-rw-r--r--net/netfilter/ipvs/ip_vs_wlc.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_wrr.c176
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c1050
-rw-r--r--net/netfilter/nf_conntrack_amanda.c1
-rw-r--r--net/netfilter/nf_conntrack_core.c60
-rw-r--r--net/netfilter/nf_conntrack_ecache.c8
-rw-r--r--net/netfilter/nf_conntrack_expect.c9
-rw-r--r--net/netfilter/nf_conntrack_ftp.c1
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c1
-rw-r--r--net/netfilter/nf_conntrack_helper.c3
-rw-r--r--net/netfilter/nf_conntrack_irc.c1
-rw-r--r--net/netfilter/nf_conntrack_labels.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c101
-rw-r--r--net/netfilter/nf_conntrack_pptp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c9
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c20
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c7
-rw-r--r--net/netfilter/nf_conntrack_proto_udplite.c8
-rw-r--r--net/netfilter/nf_conntrack_standalone.c17
-rw-r--r--net/netfilter/nf_conntrack_tftp.c2
-rw-r--r--net/netfilter/nf_log.c211
-rw-r--r--net/netfilter/nf_nat_amanda.c1
-rw-r--r--net/netfilter/nf_nat_core.c10
-rw-r--r--net/netfilter/nf_nat_helper.c1
-rw-r--r--net/netfilter/nf_nat_proto_sctp.c5
-rw-r--r--net/netfilter/nf_nat_sip.c3
-rw-r--r--net/netfilter/nf_queue.c148
-rw-r--r--net/netfilter/nfnetlink.c27
-rw-r--r--net/netfilter/nfnetlink_acct.c7
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c7
-rw-r--r--net/netfilter/nfnetlink_log.c203
-rw-r--r--net/netfilter/nfnetlink_queue_core.c441
-rw-r--r--net/netfilter/x_tables.c7
-rw-r--r--net/netfilter/xt_LOG.c63
-rw-r--r--net/netfilter/xt_NFLOG.c3
-rw-r--r--net/netfilter/xt_NFQUEUE.c63
-rw-r--r--net/netfilter/xt_TCPMSS.c24
-rw-r--r--net/netfilter/xt_TCPOPTSTRIP.c19
-rw-r--r--net/netfilter/xt_addrtype.c27
-rw-r--r--net/netfilter/xt_conntrack.c1
-rw-r--r--net/netfilter/xt_hashlimit.c17
-rw-r--r--net/netfilter/xt_limit.c1
-rw-r--r--net/netfilter/xt_osf.c6
-rw-r--r--net/netfilter/xt_recent.c9
-rw-r--r--net/netfilter/xt_set.c94
-rw-r--r--net/netlabel/netlabel_domainhash.c69
-rw-r--r--net/netlink/Kconfig19
-rw-r--r--net/netlink/Makefile3
-rw-r--r--net/netlink/af_netlink.c900
-rw-r--r--net/netlink/af_netlink.h82
-rw-r--r--net/netlink/diag.c227
-rw-r--r--net/netlink/genetlink.c119
-rw-r--r--net/netrom/af_netrom.c2
-rw-r--r--net/nfc/Kconfig2
-rw-r--r--net/nfc/Makefile5
-rw-r--r--net/nfc/core.c43
-rw-r--r--net/nfc/llcp.h (renamed from net/nfc/llcp/llcp.h)39
-rw-r--r--net/nfc/llcp/Kconfig7
-rw-r--r--net/nfc/llcp_commands.c (renamed from net/nfc/llcp/commands.c)229
-rw-r--r--net/nfc/llcp_core.c (renamed from net/nfc/llcp/llcp.c)241
-rw-r--r--net/nfc/llcp_sock.c (renamed from net/nfc/llcp/sock.c)171
-rw-r--r--net/nfc/netlink.c175
-rw-r--r--net/nfc/nfc.h60
-rw-r--r--net/openvswitch/actions.c6
-rw-r--r--net/openvswitch/datapath.c421
-rw-r--r--net/openvswitch/datapath.h72
-rw-r--r--net/openvswitch/dp_notify.c82
-rw-r--r--net/openvswitch/flow.c8
-rw-r--r--net/openvswitch/flow.h21
-rw-r--r--net/openvswitch/vport-internal_dev.c22
-rw-r--r--net/openvswitch/vport-netdev.c15
-rw-r--r--net/openvswitch/vport-netdev.h1
-rw-r--r--net/openvswitch/vport.c58
-rw-r--r--net/openvswitch/vport.h21
-rw-r--r--net/packet/af_packet.c360
-rw-r--r--net/packet/diag.c27
-rw-r--r--net/packet/internal.h10
-rw-r--r--net/phonet/pn_netlink.c4
-rw-r--r--net/rfkill/core.c4
-rw-r--r--net/rfkill/rfkill-gpio.c7
-rw-r--r--net/rfkill/rfkill-regulator.c2
-rw-r--r--net/sched/act_api.c2
-rw-r--r--net/sched/act_csum.c39
-rw-r--r--net/sched/act_ipt.c33
-rw-r--r--net/sched/act_police.c8
-rw-r--r--net/sched/cls_api.c14
-rw-r--r--net/sched/cls_flow.c2
-rw-r--r--net/sched/em_ipset.c2
-rw-r--r--net/sched/sch_api.c55
-rw-r--r--net/sched/sch_choke.c2
-rw-r--r--net/sched/sch_generic.c8
-rw-r--r--net/sched/sch_htb.c73
-rw-r--r--net/sched/sch_tbf.c8
-rw-r--r--net/sctp/associola.c28
-rw-r--r--net/sctp/bind_addr.c7
-rw-r--r--net/sctp/endpointola.c14
-rw-r--r--net/sctp/inqueue.c7
-rw-r--r--net/sctp/output.c5
-rw-r--r--net/sctp/outqueue.c17
-rw-r--r--net/sctp/probe.c2
-rw-r--r--net/sctp/proc.c12
-rw-r--r--net/sctp/protocol.c2
-rw-r--r--net/sctp/socket.c14
-rw-r--r--net/sctp/ssnmap.c23
-rw-r--r--net/sctp/transport.c1
-rw-r--r--net/sctp/ulpqueue.c3
-rw-r--r--net/socket.c99
-rw-r--r--net/sunrpc/Kconfig2
-rw-r--r--net/sunrpc/auth.c75
-rw-r--r--net/sunrpc/auth_gss/Makefile3
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c70
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c13
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c4
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c124
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.c358
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.h48
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.c840
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.h264
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c369
-rw-r--r--net/sunrpc/cache.c29
-rw-r--r--net/sunrpc/clnt.c117
-rw-r--r--net/sunrpc/netns.h10
-rw-r--r--net/sunrpc/rpc_pipe.c13
-rw-r--r--net/sunrpc/sched.c29
-rw-r--r--net/sunrpc/stats.c2
-rw-r--r--net/sunrpc/svc.c2
-rw-r--r--net/sunrpc/svcauth_unix.c12
-rw-r--r--net/sunrpc/xprt.c63
-rw-r--r--net/sunrpc/xprtsock.c17
-rw-r--r--net/tipc/Kconfig7
-rw-r--r--net/tipc/Makefile2
-rw-r--r--net/tipc/bcast.c40
-rw-r--r--net/tipc/bearer.c7
-rw-r--r--net/tipc/bearer.h16
-rw-r--r--net/tipc/core.c12
-rw-r--r--net/tipc/discover.c2
-rw-r--r--net/tipc/eth_media.c39
-rw-r--r--net/tipc/ib_media.c387
-rw-r--r--net/tipc/link.c11
-rw-r--r--net/tipc/netlink.c6
-rw-r--r--net/unix/af_unix.c48
-rw-r--r--net/unix/garbage.c12
-rw-r--r--net/vmw_vsock/af_vsock.c4
-rw-r--r--net/vmw_vsock/vmci_transport.c16
-rw-r--r--net/vmw_vsock/vmci_transport.h3
-rw-r--r--net/wireless/ap.c62
-rw-r--r--net/wireless/core.c92
-rw-r--r--net/wireless/core.h25
-rw-r--r--net/wireless/lib80211_crypt_ccmp.c29
-rw-r--r--net/wireless/lib80211_crypt_tkip.c44
-rw-r--r--net/wireless/lib80211_crypt_wep.c5
-rw-r--r--net/wireless/mesh.c15
-rw-r--r--net/wireless/mlme.c235
-rw-r--r--net/wireless/nl80211.c2155
-rw-r--r--net/wireless/nl80211.h68
-rw-r--r--net/wireless/rdev-ops.h44
-rw-r--r--net/wireless/reg.c8
-rw-r--r--net/wireless/sme.c34
-rw-r--r--net/wireless/sysfs.c25
-rw-r--r--net/wireless/trace.h104
-rw-r--r--net/wireless/util.c30
-rw-r--r--net/x25/x25_proc.c47
-rw-r--r--net/xfrm/xfrm_algo.c13
-rw-r--r--net/xfrm/xfrm_output.c1
-rw-r--r--net/xfrm/xfrm_policy.c26
-rw-r--r--net/xfrm/xfrm_state.c1
-rw-r--r--net/xfrm/xfrm_user.c19
504 files changed, 27841 insertions, 16822 deletions
diff --git a/net/802/garp.c b/net/802/garp.c
index 8456f5d98b85..5d9630a0eb93 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -609,8 +609,12 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl
/* Delete timer and generate a final TRANSMIT_PDU event to flush out
* all pending messages before the applicant is gone. */
del_timer_sync(&app->join_timer);
+
+ spin_lock_bh(&app->lock);
garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU);
garp_pdu_queue(app);
+ spin_unlock_bh(&app->lock);
+
garp_queue_xmit(app);
dev_mc_del(dev, appl->proto.group_address);
diff --git a/net/802/mrp.c b/net/802/mrp.c
index e085bcc754f6..1eb05d80b07b 100644
--- a/net/802/mrp.c
+++ b/net/802/mrp.c
@@ -871,10 +871,10 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl)
*/
del_timer_sync(&app->join_timer);
- spin_lock(&app->lock);
+ spin_lock_bh(&app->lock);
mrp_mad_event(app, MRP_EVENT_TX);
mrp_pdu_queue(app);
- spin_unlock(&app->lock);
+ spin_unlock_bh(&app->lock);
mrp_queue_xmit(app);
diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig
index 8f7517df41a5..b85a91fa61f1 100644
--- a/net/8021q/Kconfig
+++ b/net/8021q/Kconfig
@@ -3,7 +3,7 @@
#
config VLAN_8021Q
- tristate "802.1Q VLAN Support"
+ tristate "802.1Q/802.1ad VLAN Support"
---help---
Select this and you will be able to create 802.1Q VLAN interfaces
on your ethernet interfaces. 802.1Q VLAN supports almost
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 85addcd9372b..9424f3718ea7 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -51,14 +51,18 @@ const char vlan_version[] = DRV_VERSION;
/* End of global variables definitions. */
-static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id)
+static int vlan_group_prealloc_vid(struct vlan_group *vg,
+ __be16 vlan_proto, u16 vlan_id)
{
struct net_device **array;
+ unsigned int pidx, vidx;
unsigned int size;
ASSERT_RTNL();
- array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
+ pidx = vlan_proto_idx(vlan_proto);
+ vidx = vlan_id / VLAN_GROUP_ARRAY_PART_LEN;
+ array = vg->vlan_devices_arrays[pidx][vidx];
if (array != NULL)
return 0;
@@ -67,7 +71,7 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id)
if (array == NULL)
return -ENOBUFS;
- vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN] = array;
+ vg->vlan_devices_arrays[pidx][vidx] = array;
return 0;
}
@@ -93,7 +97,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
if (vlan->flags & VLAN_FLAG_GVRP)
vlan_gvrp_request_leave(dev);
- vlan_group_set_device(grp, vlan_id, NULL);
+ vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, NULL);
/* Because unregister_netdevice_queue() makes sure at least one rcu
* grace period is respected before device freeing,
* we dont need to call synchronize_net() here.
@@ -112,13 +116,14 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
* VLAN is not 0 (leave it there for 802.1p).
*/
if (vlan_id)
- vlan_vid_del(real_dev, vlan_id);
+ vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
/* Get rid of the vlan's reference to real_dev */
dev_put(real_dev);
}
-int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id)
+int vlan_check_real_dev(struct net_device *real_dev,
+ __be16 protocol, u16 vlan_id)
{
const char *name = real_dev->name;
@@ -127,7 +132,7 @@ int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id)
return -EOPNOTSUPP;
}
- if (vlan_find_dev(real_dev, vlan_id) != NULL)
+ if (vlan_find_dev(real_dev, protocol, vlan_id) != NULL)
return -EEXIST;
return 0;
@@ -142,7 +147,7 @@ int register_vlan_dev(struct net_device *dev)
struct vlan_group *grp;
int err;
- err = vlan_vid_add(real_dev, vlan_id);
+ err = vlan_vid_add(real_dev, vlan->vlan_proto, vlan_id);
if (err)
return err;
@@ -160,7 +165,7 @@ int register_vlan_dev(struct net_device *dev)
goto out_uninit_gvrp;
}
- err = vlan_group_prealloc_vid(grp, vlan_id);
+ err = vlan_group_prealloc_vid(grp, vlan->vlan_proto, vlan_id);
if (err < 0)
goto out_uninit_mvrp;
@@ -181,7 +186,7 @@ int register_vlan_dev(struct net_device *dev)
/* So, got the sucker initialized, now lets place
* it into our local structure.
*/
- vlan_group_set_device(grp, vlan_id, dev);
+ vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, dev);
grp->nr_vlan_devs++;
return 0;
@@ -195,7 +200,7 @@ out_uninit_gvrp:
if (grp->nr_vlan_devs == 0)
vlan_gvrp_uninit_applicant(real_dev);
out_vid_del:
- vlan_vid_del(real_dev, vlan_id);
+ vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
return err;
}
@@ -213,7 +218,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
if (vlan_id >= VLAN_VID_MASK)
return -ERANGE;
- err = vlan_check_real_dev(real_dev, vlan_id);
+ err = vlan_check_real_dev(real_dev, htons(ETH_P_8021Q), vlan_id);
if (err < 0)
return err;
@@ -255,6 +260,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
new_dev->mtu = real_dev->mtu;
new_dev->priv_flags |= (real_dev->priv_flags & IFF_UNICAST_FLT);
+ vlan_dev_priv(new_dev)->vlan_proto = htons(ETH_P_8021Q);
vlan_dev_priv(new_dev)->vlan_id = vlan_id;
vlan_dev_priv(new_dev)->real_dev = real_dev;
vlan_dev_priv(new_dev)->dent = NULL;
@@ -301,7 +307,7 @@ static void vlan_transfer_features(struct net_device *dev,
{
vlandev->gso_max_size = dev->gso_max_size;
- if (dev->features & NETIF_F_HW_VLAN_TX)
+ if (dev->features & NETIF_F_HW_VLAN_CTAG_TX)
vlandev->hard_header_len = dev->hard_header_len;
else
vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN;
@@ -341,16 +347,17 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
int i, flgs;
struct net_device *vlandev;
struct vlan_dev_priv *vlan;
+ bool last = false;
LIST_HEAD(list);
if (is_vlan_dev(dev))
__vlan_device_event(dev, event);
if ((event == NETDEV_UP) &&
- (dev->features & NETIF_F_HW_VLAN_FILTER)) {
+ (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) {
pr_info("adding VLAN 0 to HW filter on device %s\n",
dev->name);
- vlan_vid_add(dev, 0);
+ vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
}
vlan_info = rtnl_dereference(dev->vlan_info);
@@ -365,22 +372,13 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
switch (event) {
case NETDEV_CHANGE:
/* Propagate real device state to vlan devices */
- for (i = 0; i < VLAN_N_VID; i++) {
- vlandev = vlan_group_get_device(grp, i);
- if (!vlandev)
- continue;
-
+ vlan_group_for_each_dev(grp, i, vlandev)
netif_stacked_transfer_operstate(dev, vlandev);
- }
break;
case NETDEV_CHANGEADDR:
/* Adjust unicast filters on underlying device */
- for (i = 0; i < VLAN_N_VID; i++) {
- vlandev = vlan_group_get_device(grp, i);
- if (!vlandev)
- continue;
-
+ vlan_group_for_each_dev(grp, i, vlandev) {
flgs = vlandev->flags;
if (!(flgs & IFF_UP))
continue;
@@ -390,11 +388,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
break;
case NETDEV_CHANGEMTU:
- for (i = 0; i < VLAN_N_VID; i++) {
- vlandev = vlan_group_get_device(grp, i);
- if (!vlandev)
- continue;
-
+ vlan_group_for_each_dev(grp, i, vlandev) {
if (vlandev->mtu <= dev->mtu)
continue;
@@ -404,26 +398,16 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
case NETDEV_FEAT_CHANGE:
/* Propagate device features to underlying device */
- for (i = 0; i < VLAN_N_VID; i++) {
- vlandev = vlan_group_get_device(grp, i);
- if (!vlandev)
- continue;
-
+ vlan_group_for_each_dev(grp, i, vlandev)
vlan_transfer_features(dev, vlandev);
- }
-
break;
case NETDEV_DOWN:
- if (dev->features & NETIF_F_HW_VLAN_FILTER)
- vlan_vid_del(dev, 0);
+ if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
+ vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
/* Put all VLANs for this dev in the down state too. */
- for (i = 0; i < VLAN_N_VID; i++) {
- vlandev = vlan_group_get_device(grp, i);
- if (!vlandev)
- continue;
-
+ vlan_group_for_each_dev(grp, i, vlandev) {
flgs = vlandev->flags;
if (!(flgs & IFF_UP))
continue;
@@ -437,11 +421,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
case NETDEV_UP:
/* Put all VLANs for this dev in the up state too. */
- for (i = 0; i < VLAN_N_VID; i++) {
- vlandev = vlan_group_get_device(grp, i);
- if (!vlandev)
- continue;
-
+ vlan_group_for_each_dev(grp, i, vlandev) {
flgs = vlandev->flags;
if (flgs & IFF_UP)
continue;
@@ -458,17 +438,15 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
if (dev->reg_state != NETREG_UNREGISTERING)
break;
- for (i = 0; i < VLAN_N_VID; i++) {
- vlandev = vlan_group_get_device(grp, i);
- if (!vlandev)
- continue;
-
+ vlan_group_for_each_dev(grp, i, vlandev) {
/* removal of last vid destroys vlan_info, abort
* afterwards */
if (vlan_info->nr_vids == 1)
- i = VLAN_N_VID;
+ last = true;
unregister_vlan_dev(vlandev, &list);
+ if (last)
+ break;
}
unregister_netdevice_many(&list);
break;
@@ -482,13 +460,8 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
case NETDEV_NOTIFY_PEERS:
case NETDEV_BONDING_FAILOVER:
/* Propagate to vlan devices */
- for (i = 0; i < VLAN_N_VID; i++) {
- vlandev = vlan_group_get_device(grp, i);
- if (!vlandev)
- continue;
-
+ vlan_group_for_each_dev(grp, i, vlandev)
call_netdevice_notifiers(event, vlandev);
- }
break;
}
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 670f1e8cfc0f..ba5983f34c42 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -49,6 +49,7 @@ struct netpoll;
* @ingress_priority_map: ingress priority mappings
* @nr_egress_mappings: number of egress priority mappings
* @egress_priority_map: hash of egress priority mappings
+ * @vlan_proto: VLAN encapsulation protocol
* @vlan_id: VLAN identifier
* @flags: device flags
* @real_dev: underlying netdevice
@@ -62,6 +63,7 @@ struct vlan_dev_priv {
unsigned int nr_egress_mappings;
struct vlan_priority_tci_mapping *egress_priority_map[16];
+ __be16 vlan_proto;
u16 vlan_id;
u16 flags;
@@ -87,10 +89,17 @@ static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev)
#define VLAN_GROUP_ARRAY_SPLIT_PARTS 8
#define VLAN_GROUP_ARRAY_PART_LEN (VLAN_N_VID/VLAN_GROUP_ARRAY_SPLIT_PARTS)
+enum vlan_protos {
+ VLAN_PROTO_8021Q = 0,
+ VLAN_PROTO_8021AD,
+ VLAN_PROTO_NUM,
+};
+
struct vlan_group {
unsigned int nr_vlan_devs;
struct hlist_node hlist; /* linked list */
- struct net_device **vlan_devices_arrays[VLAN_GROUP_ARRAY_SPLIT_PARTS];
+ struct net_device **vlan_devices_arrays[VLAN_PROTO_NUM]
+ [VLAN_GROUP_ARRAY_SPLIT_PARTS];
};
struct vlan_info {
@@ -103,37 +112,67 @@ struct vlan_info {
struct rcu_head rcu;
};
-static inline struct net_device *vlan_group_get_device(struct vlan_group *vg,
- u16 vlan_id)
+static inline unsigned int vlan_proto_idx(__be16 proto)
+{
+ switch (proto) {
+ case __constant_htons(ETH_P_8021Q):
+ return VLAN_PROTO_8021Q;
+ case __constant_htons(ETH_P_8021AD):
+ return VLAN_PROTO_8021AD;
+ default:
+ BUG();
+ return 0;
+ }
+}
+
+static inline struct net_device *__vlan_group_get_device(struct vlan_group *vg,
+ unsigned int pidx,
+ u16 vlan_id)
{
struct net_device **array;
- array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
+
+ array = vg->vlan_devices_arrays[pidx]
+ [vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
return array ? array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] : NULL;
}
+static inline struct net_device *vlan_group_get_device(struct vlan_group *vg,
+ __be16 vlan_proto,
+ u16 vlan_id)
+{
+ return __vlan_group_get_device(vg, vlan_proto_idx(vlan_proto), vlan_id);
+}
+
static inline void vlan_group_set_device(struct vlan_group *vg,
- u16 vlan_id,
+ __be16 vlan_proto, u16 vlan_id,
struct net_device *dev)
{
struct net_device **array;
if (!vg)
return;
- array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
+ array = vg->vlan_devices_arrays[vlan_proto_idx(vlan_proto)]
+ [vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] = dev;
}
/* Must be invoked with rcu_read_lock or with RTNL. */
static inline struct net_device *vlan_find_dev(struct net_device *real_dev,
- u16 vlan_id)
+ __be16 vlan_proto, u16 vlan_id)
{
struct vlan_info *vlan_info = rcu_dereference_rtnl(real_dev->vlan_info);
if (vlan_info)
- return vlan_group_get_device(&vlan_info->grp, vlan_id);
+ return vlan_group_get_device(&vlan_info->grp,
+ vlan_proto, vlan_id);
return NULL;
}
+#define vlan_group_for_each_dev(grp, i, dev) \
+ for ((i) = 0; i < VLAN_PROTO_NUM * VLAN_N_VID; i++) \
+ if (((dev) = __vlan_group_get_device((grp), (i) / VLAN_N_VID, \
+ (i) % VLAN_N_VID)))
+
/* found in vlan_dev.c */
void vlan_dev_set_ingress_priority(const struct net_device *dev,
u32 skb_prio, u16 vlan_prio);
@@ -142,7 +181,8 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask);
void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
-int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id);
+int vlan_check_real_dev(struct net_device *real_dev,
+ __be16 protocol, u16 vlan_id);
void vlan_setup(struct net_device *dev);
int register_vlan_dev(struct net_device *dev);
void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index f3b6f515eba6..8a15eaadc4bd 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -8,11 +8,12 @@
bool vlan_do_receive(struct sk_buff **skbp)
{
struct sk_buff *skb = *skbp;
+ __be16 vlan_proto = skb->vlan_proto;
u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
struct net_device *vlan_dev;
struct vlan_pcpu_stats *rx_stats;
- vlan_dev = vlan_find_dev(skb->dev, vlan_id);
+ vlan_dev = vlan_find_dev(skb->dev, vlan_proto, vlan_id);
if (!vlan_dev)
return false;
@@ -38,7 +39,8 @@ bool vlan_do_receive(struct sk_buff **skbp)
* original position later
*/
skb_push(skb, offset);
- skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci);
+ skb = *skbp = vlan_insert_tag(skb, skb->vlan_proto,
+ skb->vlan_tci);
if (!skb)
return false;
skb_pull(skb, offset + VLAN_HLEN);
@@ -62,12 +64,13 @@ bool vlan_do_receive(struct sk_buff **skbp)
/* Must be invoked with rcu_read_lock. */
struct net_device *__vlan_find_dev_deep(struct net_device *dev,
- u16 vlan_id)
+ __be16 vlan_proto, u16 vlan_id)
{
struct vlan_info *vlan_info = rcu_dereference(dev->vlan_info);
if (vlan_info) {
- return vlan_group_get_device(&vlan_info->grp, vlan_id);
+ return vlan_group_get_device(&vlan_info->grp,
+ vlan_proto, vlan_id);
} else {
/*
* Lower devices of master uppers (bonding, team) do not have
@@ -78,7 +81,8 @@ struct net_device *__vlan_find_dev_deep(struct net_device *dev,
upper_dev = netdev_master_upper_dev_get_rcu(dev);
if (upper_dev)
- return __vlan_find_dev_deep(upper_dev, vlan_id);
+ return __vlan_find_dev_deep(upper_dev,
+ vlan_proto, vlan_id);
}
return NULL;
@@ -125,7 +129,7 @@ struct sk_buff *vlan_untag(struct sk_buff *skb)
vhdr = (struct vlan_hdr *) skb->data;
vlan_tci = ntohs(vhdr->h_vlan_TCI);
- __vlan_hwaccel_put_tag(skb, vlan_tci);
+ __vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci);
skb_pull_rcsum(skb, VLAN_HLEN);
vlan_set_encap_proto(skb, vhdr);
@@ -153,10 +157,11 @@ EXPORT_SYMBOL(vlan_untag);
static void vlan_group_free(struct vlan_group *grp)
{
- int i;
+ int i, j;
- for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++)
- kfree(grp->vlan_devices_arrays[i]);
+ for (i = 0; i < VLAN_PROTO_NUM; i++)
+ for (j = 0; j < VLAN_GROUP_ARRAY_SPLIT_PARTS; j++)
+ kfree(grp->vlan_devices_arrays[i][j]);
}
static void vlan_info_free(struct vlan_info *vlan_info)
@@ -185,35 +190,49 @@ static struct vlan_info *vlan_info_alloc(struct net_device *dev)
struct vlan_vid_info {
struct list_head list;
- unsigned short vid;
+ __be16 proto;
+ u16 vid;
int refcount;
};
+static bool vlan_hw_filter_capable(const struct net_device *dev,
+ const struct vlan_vid_info *vid_info)
+{
+ if (vid_info->proto == htons(ETH_P_8021Q) &&
+ dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
+ return true;
+ if (vid_info->proto == htons(ETH_P_8021AD) &&
+ dev->features & NETIF_F_HW_VLAN_STAG_FILTER)
+ return true;
+ return false;
+}
+
static struct vlan_vid_info *vlan_vid_info_get(struct vlan_info *vlan_info,
- unsigned short vid)
+ __be16 proto, u16 vid)
{
struct vlan_vid_info *vid_info;
list_for_each_entry(vid_info, &vlan_info->vid_list, list) {
- if (vid_info->vid == vid)
+ if (vid_info->proto == proto && vid_info->vid == vid)
return vid_info;
}
return NULL;
}
-static struct vlan_vid_info *vlan_vid_info_alloc(unsigned short vid)
+static struct vlan_vid_info *vlan_vid_info_alloc(__be16 proto, u16 vid)
{
struct vlan_vid_info *vid_info;
vid_info = kzalloc(sizeof(struct vlan_vid_info), GFP_KERNEL);
if (!vid_info)
return NULL;
+ vid_info->proto = proto;
vid_info->vid = vid;
return vid_info;
}
-static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid,
+static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid,
struct vlan_vid_info **pvid_info)
{
struct net_device *dev = vlan_info->real_dev;
@@ -221,12 +240,12 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid,
struct vlan_vid_info *vid_info;
int err;
- vid_info = vlan_vid_info_alloc(vid);
+ vid_info = vlan_vid_info_alloc(proto, vid);
if (!vid_info)
return -ENOMEM;
- if (dev->features & NETIF_F_HW_VLAN_FILTER) {
- err = ops->ndo_vlan_rx_add_vid(dev, vid);
+ if (vlan_hw_filter_capable(dev, vid_info)) {
+ err = ops->ndo_vlan_rx_add_vid(dev, proto, vid);
if (err) {
kfree(vid_info);
return err;
@@ -238,7 +257,7 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid,
return 0;
}
-int vlan_vid_add(struct net_device *dev, unsigned short vid)
+int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid)
{
struct vlan_info *vlan_info;
struct vlan_vid_info *vid_info;
@@ -254,9 +273,9 @@ int vlan_vid_add(struct net_device *dev, unsigned short vid)
return -ENOMEM;
vlan_info_created = true;
}
- vid_info = vlan_vid_info_get(vlan_info, vid);
+ vid_info = vlan_vid_info_get(vlan_info, proto, vid);
if (!vid_info) {
- err = __vlan_vid_add(vlan_info, vid, &vid_info);
+ err = __vlan_vid_add(vlan_info, proto, vid, &vid_info);
if (err)
goto out_free_vlan_info;
}
@@ -279,14 +298,15 @@ static void __vlan_vid_del(struct vlan_info *vlan_info,
{
struct net_device *dev = vlan_info->real_dev;
const struct net_device_ops *ops = dev->netdev_ops;
- unsigned short vid = vid_info->vid;
+ __be16 proto = vid_info->proto;
+ u16 vid = vid_info->vid;
int err;
- if (dev->features & NETIF_F_HW_VLAN_FILTER) {
- err = ops->ndo_vlan_rx_kill_vid(dev, vid);
+ if (vlan_hw_filter_capable(dev, vid_info)) {
+ err = ops->ndo_vlan_rx_kill_vid(dev, proto, vid);
if (err) {
- pr_warn("failed to kill vid %d for device %s\n",
- vid, dev->name);
+ pr_warn("failed to kill vid %04x/%d for device %s\n",
+ proto, vid, dev->name);
}
}
list_del(&vid_info->list);
@@ -294,7 +314,7 @@ static void __vlan_vid_del(struct vlan_info *vlan_info,
vlan_info->nr_vids--;
}
-void vlan_vid_del(struct net_device *dev, unsigned short vid)
+void vlan_vid_del(struct net_device *dev, __be16 proto, u16 vid)
{
struct vlan_info *vlan_info;
struct vlan_vid_info *vid_info;
@@ -305,7 +325,7 @@ void vlan_vid_del(struct net_device *dev, unsigned short vid)
if (!vlan_info)
return;
- vid_info = vlan_vid_info_get(vlan_info, vid);
+ vid_info = vlan_vid_info_get(vlan_info, proto, vid);
if (!vid_info)
return;
vid_info->refcount--;
@@ -333,7 +353,7 @@ int vlan_vids_add_by_dev(struct net_device *dev,
return 0;
list_for_each_entry(vid_info, &vlan_info->vid_list, list) {
- err = vlan_vid_add(dev, vid_info->vid);
+ err = vlan_vid_add(dev, vid_info->proto, vid_info->vid);
if (err)
goto unwind;
}
@@ -343,7 +363,7 @@ unwind:
list_for_each_entry_continue_reverse(vid_info,
&vlan_info->vid_list,
list) {
- vlan_vid_del(dev, vid_info->vid);
+ vlan_vid_del(dev, vid_info->proto, vid_info->vid);
}
return err;
@@ -363,7 +383,7 @@ void vlan_vids_del_by_dev(struct net_device *dev,
return;
list_for_each_entry(vid_info, &vlan_info->vid_list, list)
- vlan_vid_del(dev, vid_info->vid);
+ vlan_vid_del(dev, vid_info->proto, vid_info->vid);
}
EXPORT_SYMBOL(vlan_vids_del_by_dev);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 19cf81bf9f69..3a8c8fd63c88 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -99,6 +99,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
const void *daddr, const void *saddr,
unsigned int len)
{
+ struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
struct vlan_hdr *vhdr;
unsigned int vhdrlen = 0;
u16 vlan_tci = 0;
@@ -120,8 +121,8 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
else
vhdr->h_vlan_encapsulated_proto = htons(len);
- skb->protocol = htons(ETH_P_8021Q);
- type = ETH_P_8021Q;
+ skb->protocol = vlan->vlan_proto;
+ type = ntohs(vlan->vlan_proto);
vhdrlen = VLAN_HLEN;
}
@@ -161,12 +162,12 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
* NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING
* OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
*/
- if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
+ if (veth->h_vlan_proto != vlan->vlan_proto ||
vlan->flags & VLAN_FLAG_REORDER_HDR) {
u16 vlan_tci;
vlan_tci = vlan->vlan_id;
vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
- skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
+ skb = __vlan_hwaccel_put_tag(skb, vlan->vlan_proto, vlan_tci);
}
skb->dev = vlan->real_dev;
@@ -583,7 +584,7 @@ static int vlan_dev_init(struct net_device *dev)
#endif
dev->needed_headroom = real_dev->needed_headroom;
- if (real_dev->features & NETIF_F_HW_VLAN_TX) {
+ if (real_dev->features & NETIF_F_HW_VLAN_CTAG_TX) {
dev->header_ops = real_dev->header_ops;
dev->hard_header_len = real_dev->hard_header_len;
} else {
@@ -627,7 +628,7 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
netdev_features_t features)
{
struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
- u32 old_features = features;
+ netdev_features_t old_features = features;
features &= real_dev->vlan_features;
features |= NETIF_F_RXCSUM;
diff --git a/net/8021q/vlan_gvrp.c b/net/8021q/vlan_gvrp.c
index 6f9755352760..66a80320b032 100644
--- a/net/8021q/vlan_gvrp.c
+++ b/net/8021q/vlan_gvrp.c
@@ -32,6 +32,8 @@ int vlan_gvrp_request_join(const struct net_device *dev)
const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
__be16 vlan_id = htons(vlan->vlan_id);
+ if (vlan->vlan_proto != htons(ETH_P_8021Q))
+ return 0;
return garp_request_join(vlan->real_dev, &vlan_gvrp_app,
&vlan_id, sizeof(vlan_id), GVRP_ATTR_VID);
}
@@ -41,6 +43,8 @@ void vlan_gvrp_request_leave(const struct net_device *dev)
const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
__be16 vlan_id = htons(vlan->vlan_id);
+ if (vlan->vlan_proto != htons(ETH_P_8021Q))
+ return;
garp_request_leave(vlan->real_dev, &vlan_gvrp_app,
&vlan_id, sizeof(vlan_id), GVRP_ATTR_VID);
}
diff --git a/net/8021q/vlan_mvrp.c b/net/8021q/vlan_mvrp.c
index d9ec1d5964aa..e0fe091801b0 100644
--- a/net/8021q/vlan_mvrp.c
+++ b/net/8021q/vlan_mvrp.c
@@ -38,6 +38,8 @@ int vlan_mvrp_request_join(const struct net_device *dev)
const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
__be16 vlan_id = htons(vlan->vlan_id);
+ if (vlan->vlan_proto != htons(ETH_P_8021Q))
+ return 0;
return mrp_request_join(vlan->real_dev, &vlan_mrp_app,
&vlan_id, sizeof(vlan_id), MVRP_ATTR_VID);
}
@@ -47,6 +49,8 @@ void vlan_mvrp_request_leave(const struct net_device *dev)
const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
__be16 vlan_id = htons(vlan->vlan_id);
+ if (vlan->vlan_proto != htons(ETH_P_8021Q))
+ return;
mrp_request_leave(vlan->real_dev, &vlan_mrp_app,
&vlan_id, sizeof(vlan_id), MVRP_ATTR_VID);
}
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 1789658b7cd7..309129732285 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -23,6 +23,7 @@ static const struct nla_policy vlan_policy[IFLA_VLAN_MAX + 1] = {
[IFLA_VLAN_FLAGS] = { .len = sizeof(struct ifla_vlan_flags) },
[IFLA_VLAN_EGRESS_QOS] = { .type = NLA_NESTED },
[IFLA_VLAN_INGRESS_QOS] = { .type = NLA_NESTED },
+ [IFLA_VLAN_PROTOCOL] = { .type = NLA_U16 },
};
static const struct nla_policy vlan_map_policy[IFLA_VLAN_QOS_MAX + 1] = {
@@ -53,6 +54,16 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
if (!data)
return -EINVAL;
+ if (data[IFLA_VLAN_PROTOCOL]) {
+ switch (nla_get_be16(data[IFLA_VLAN_PROTOCOL])) {
+ case __constant_htons(ETH_P_8021Q):
+ case __constant_htons(ETH_P_8021AD):
+ break;
+ default:
+ return -EPROTONOSUPPORT;
+ }
+ }
+
if (data[IFLA_VLAN_ID]) {
id = nla_get_u16(data[IFLA_VLAN_ID]);
if (id >= VLAN_VID_MASK)
@@ -107,6 +118,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
{
struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
struct net_device *real_dev;
+ __be16 proto;
int err;
if (!data[IFLA_VLAN_ID])
@@ -118,11 +130,17 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
if (!real_dev)
return -ENODEV;
- vlan->vlan_id = nla_get_u16(data[IFLA_VLAN_ID]);
- vlan->real_dev = real_dev;
- vlan->flags = VLAN_FLAG_REORDER_HDR;
+ if (data[IFLA_VLAN_PROTOCOL])
+ proto = nla_get_be16(data[IFLA_VLAN_PROTOCOL]);
+ else
+ proto = htons(ETH_P_8021Q);
+
+ vlan->vlan_proto = proto;
+ vlan->vlan_id = nla_get_u16(data[IFLA_VLAN_ID]);
+ vlan->real_dev = real_dev;
+ vlan->flags = VLAN_FLAG_REORDER_HDR;
- err = vlan_check_real_dev(real_dev, vlan->vlan_id);
+ err = vlan_check_real_dev(real_dev, vlan->vlan_proto, vlan->vlan_id);
if (err < 0)
return err;
@@ -151,7 +169,8 @@ static size_t vlan_get_size(const struct net_device *dev)
{
struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
- return nla_total_size(2) + /* IFLA_VLAN_ID */
+ return nla_total_size(2) + /* IFLA_VLAN_PROTOCOL */
+ nla_total_size(2) + /* IFLA_VLAN_ID */
sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */
vlan_qos_map_size(vlan->nr_ingress_mappings) +
vlan_qos_map_size(vlan->nr_egress_mappings);
@@ -166,7 +185,8 @@ static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
struct nlattr *nest;
unsigned int i;
- if (nla_put_u16(skb, IFLA_VLAN_ID, vlan_dev_priv(dev)->vlan_id))
+ if (nla_put_be16(skb, IFLA_VLAN_PROTOCOL, vlan->vlan_proto) ||
+ nla_put_u16(skb, IFLA_VLAN_ID, vlan->vlan_id))
goto nla_put_failure;
if (vlan->flags) {
f.flags = vlan->flags;
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index dc526ec965e4..1d0e89213a28 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -93,7 +93,7 @@ static const struct file_operations vlan_fops = {
static int vlandev_seq_open(struct inode *inode, struct file *file)
{
- return single_open(file, vlandev_seq_show, PDE(inode)->data);
+ return single_open(file, vlandev_seq_show, PDE_DATA(inode));
}
static const struct file_operations vlandev_fops = {
@@ -184,14 +184,9 @@ int vlan_proc_add_dev(struct net_device *vlandev)
*/
int vlan_proc_rem_dev(struct net_device *vlandev)
{
- struct vlan_net *vn = net_generic(dev_net(vlandev), vlan_net_id);
-
/** NOTE: This will consume the memory pointed to by dent, it seems. */
- if (vlan_dev_priv(vlandev)->dent) {
- remove_proc_entry(vlan_dev_priv(vlandev)->dent->name,
- vn->proc_vlan_dir);
- vlan_dev_priv(vlandev)->dent = NULL;
- }
+ proc_remove(vlan_dev_priv(vlandev)->dent);
+ vlan_dev_priv(vlandev)->dent = NULL;
return 0;
}
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index de2e950a0a7a..e1c26b101830 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -194,11 +194,14 @@ static int pack_sg_list(struct scatterlist *sg, int start,
if (s > count)
s = count;
BUG_ON(index > limit);
+ /* Make sure we don't terminate early. */
+ sg_unmark_end(&sg[index]);
sg_set_buf(&sg[index++], data, s);
count -= s;
data += s;
}
-
+ if (index-start)
+ sg_mark_end(&sg[index - 1]);
return index-start;
}
@@ -236,12 +239,17 @@ pack_sg_list_p(struct scatterlist *sg, int start, int limit,
s = rest_of_page(data);
if (s > count)
s = count;
+ /* Make sure we don't terminate early. */
+ sg_unmark_end(&sg[index]);
sg_set_page(&sg[index++], pdata[i++], s, data_off);
data_off = 0;
data += s;
count -= s;
nr_pages--;
}
+
+ if (index-start)
+ sg_mark_end(&sg[index - 1]);
return index - start;
}
@@ -256,9 +264,10 @@ static int
p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
{
int err;
- int in, out;
+ int in, out, out_sgs, in_sgs;
unsigned long flags;
struct virtio_chan *chan = client->trans;
+ struct scatterlist *sgs[2];
p9_debug(P9_DEBUG_TRANS, "9p debug: virtio request\n");
@@ -266,14 +275,19 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
req_retry:
spin_lock_irqsave(&chan->lock, flags);
+ out_sgs = in_sgs = 0;
/* Handle out VirtIO ring buffers */
out = pack_sg_list(chan->sg, 0,
VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
+ if (out)
+ sgs[out_sgs++] = chan->sg;
in = pack_sg_list(chan->sg, out,
VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity);
+ if (in)
+ sgs[out_sgs + in_sgs++] = chan->sg + out;
- err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc,
+ err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc,
GFP_ATOMIC);
if (err < 0) {
if (err == -ENOSPC) {
@@ -289,7 +303,7 @@ req_retry:
} else {
spin_unlock_irqrestore(&chan->lock, flags);
p9_debug(P9_DEBUG_TRANS,
- "virtio rpc add_buf returned failure\n");
+ "virtio rpc add_sgs returned failure\n");
return -EIO;
}
}
@@ -351,11 +365,12 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
char *uidata, char *uodata, int inlen,
int outlen, int in_hdr_len, int kern_buf)
{
- int in, out, err;
+ int in, out, err, out_sgs, in_sgs;
unsigned long flags;
int in_nr_pages = 0, out_nr_pages = 0;
struct page **in_pages = NULL, **out_pages = NULL;
struct virtio_chan *chan = client->trans;
+ struct scatterlist *sgs[4];
p9_debug(P9_DEBUG_TRANS, "virtio request\n");
@@ -396,13 +411,22 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
req->status = REQ_STATUS_SENT;
req_retry_pinned:
spin_lock_irqsave(&chan->lock, flags);
+
+ out_sgs = in_sgs = 0;
+
/* out data */
out = pack_sg_list(chan->sg, 0,
VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
- if (out_pages)
+ if (out)
+ sgs[out_sgs++] = chan->sg;
+
+ if (out_pages) {
+ sgs[out_sgs++] = chan->sg + out;
out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
out_pages, out_nr_pages, uodata, outlen);
+ }
+
/*
* Take care of in data
* For example TREAD have 11.
@@ -412,11 +436,17 @@ req_retry_pinned:
*/
in = pack_sg_list(chan->sg, out,
VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len);
- if (in_pages)
+ if (in)
+ sgs[out_sgs + in_sgs++] = chan->sg + out;
+
+ if (in_pages) {
+ sgs[out_sgs + in_sgs++] = chan->sg + out + in;
in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM,
in_pages, in_nr_pages, uidata, inlen);
+ }
- err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc,
+ BUG_ON(out_sgs + in_sgs > ARRAY_SIZE(sgs));
+ err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc,
GFP_ATOMIC);
if (err < 0) {
if (err == -ENOSPC) {
@@ -432,7 +462,7 @@ req_retry_pinned:
} else {
spin_unlock_irqrestore(&chan->lock, flags);
p9_debug(P9_DEBUG_TRANS,
- "virtio rpc add_buf returned failure\n");
+ "virtio rpc add_sgs returned failure\n");
err = -EIO;
goto err_out;
}
diff --git a/net/Kconfig b/net/Kconfig
index 6f676ab885be..6dfe1c636a80 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -5,6 +5,7 @@
menuconfig NET
bool "Networking support"
select NLATTR
+ select GENERIC_NET_UTILS
---help---
Unless you really know what you are doing, you should say Y here.
The reason is that some programs need kernel networking support even
@@ -217,6 +218,7 @@ source "net/dns_resolver/Kconfig"
source "net/batman-adv/Kconfig"
source "net/openvswitch/Kconfig"
source "net/vmw_vsock/Kconfig"
+source "net/netlink/Kconfig"
config RPS
boolean
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 4a141e3cf076..ef12839a7cfe 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1253,7 +1253,7 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
goto out;
*uaddr_len = sizeof(struct sockaddr_at);
- memset(&sat.sat_zero, 0, sizeof(sat.sat_zero));
+ memset(&sat, 0, sizeof(sat));
if (peer) {
err = -ENOTCONN;
diff --git a/net/atm/lec.h b/net/atm/lec.h
index a86aff9a3c04..4149db1b7885 100644
--- a/net/atm/lec.h
+++ b/net/atm/lec.h
@@ -58,7 +58,7 @@ struct lane2_ops {
* field in h_type field. Data follows immediately after header.
* 2. LLC Data frames whose total length, including LLC field and data,
* but not padding required to meet the minimum data frame length,
- * is less than 1536(0x0600) MUST be encoded by placing that length
+ * is less than ETH_P_802_3_MIN MUST be encoded by placing that length
* in the h_type field. The LLC field follows header immediately.
* 3. LLC data frames longer than this maximum MUST be encoded by placing
* the value 0 in the h_type field.
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 6ac35ff0d6b9..bbb6461a4b7f 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -385,7 +385,7 @@ static ssize_t proc_dev_atm_read(struct file *file, char __user *buf,
page = get_zeroed_page(GFP_KERNEL);
if (!page)
return -ENOMEM;
- dev = PDE(file_inode(file))->data;
+ dev = PDE_DATA(file_inode(file));
if (!dev->ops->proc_read)
length = -EINVAL;
else {
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index 8d8afb134b3a..fa780b76630e 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -36,6 +36,20 @@ config BATMAN_ADV_DAT
mesh networks. If you think that your network does not need
this option you can safely remove it and save some space.
+config BATMAN_ADV_NC
+ bool "Network Coding"
+ depends on BATMAN_ADV
+ default n
+ help
+ This option enables network coding, a mechanism that aims to
+ increase the overall network throughput by fusing multiple
+ packets in one transmission.
+ Note that interfaces controlled by batman-adv must be manually
+ configured to have promiscuous mode enabled in order to make
+ network coding work.
+ If you think that your network does not need this feature you
+ can safely disable it and save some space.
+
config BATMAN_ADV_DEBUG
bool "B.A.T.M.A.N. debugging"
depends on BATMAN_ADV
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index e45e3b4e32e3..acbac2a9c62f 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -1,5 +1,5 @@
#
-# Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+# Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
#
# Marek Lindner, Simon Wunderlich
#
@@ -30,6 +30,7 @@ batman-adv-y += hard-interface.o
batman-adv-y += hash.o
batman-adv-y += icmp_socket.o
batman-adv-y += main.o
+batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o
batman-adv-y += originator.o
batman-adv-y += ring_buffer.o
batman-adv-y += routing.o
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index a5bb0a769eb9..f680ee101878 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -27,6 +27,22 @@
#include "hard-interface.h"
#include "send.h"
#include "bat_algo.h"
+#include "network-coding.h"
+
+/**
+ * batadv_dup_status - duplicate status
+ * @BATADV_NO_DUP: the packet is a duplicate
+ * @BATADV_ORIG_DUP: OGM is a duplicate in the originator (but not for the
+ * neighbor)
+ * @BATADV_NEIGH_DUP: OGM is a duplicate for the neighbor
+ * @BATADV_PROTECTED: originator is currently protected (after reboot)
+ */
+enum batadv_dup_status {
+ BATADV_NO_DUP = 0,
+ BATADV_ORIG_DUP,
+ BATADV_NEIGH_DUP,
+ BATADV_PROTECTED,
+};
static struct batadv_neigh_node *
batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface,
@@ -649,7 +665,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
const struct batadv_ogm_packet *batadv_ogm_packet,
struct batadv_hard_iface *if_incoming,
const unsigned char *tt_buff,
- int is_duplicate)
+ enum batadv_dup_status dup_status)
{
struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL;
struct batadv_neigh_node *router = NULL;
@@ -675,7 +691,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
continue;
}
- if (is_duplicate)
+ if (dup_status != BATADV_NO_DUP)
continue;
spin_lock_bh(&tmp_neigh_node->lq_update_lock);
@@ -717,7 +733,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
neigh_node->tq_avg = batadv_ring_buffer_avg(neigh_node->tq_recv);
spin_unlock_bh(&neigh_node->lq_update_lock);
- if (!is_duplicate) {
+ if (dup_status == BATADV_NO_DUP) {
orig_node->last_ttl = batadv_ogm_packet->header.ttl;
neigh_node->last_ttl = batadv_ogm_packet->header.ttl;
}
@@ -901,15 +917,16 @@ out:
return ret;
}
-/* processes a batman packet for all interfaces, adjusts the sequence number and
- * finds out whether it is a duplicate.
- * returns:
- * 1 the packet is a duplicate
- * 0 the packet has not yet been received
- * -1 the packet is old and has been received while the seqno window
- * was protected. Caller should drop it.
+/**
+ * batadv_iv_ogm_update_seqnos - process a batman packet for all interfaces,
+ * adjust the sequence number and find out whether it is a duplicate
+ * @ethhdr: ethernet header of the packet
+ * @batadv_ogm_packet: OGM packet to be considered
+ * @if_incoming: interface on which the OGM packet was received
+ *
+ * Returns duplicate status as enum batadv_dup_status
*/
-static int
+static enum batadv_dup_status
batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
const struct batadv_ogm_packet *batadv_ogm_packet,
const struct batadv_hard_iface *if_incoming)
@@ -917,17 +934,18 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
struct batadv_orig_node *orig_node;
struct batadv_neigh_node *tmp_neigh_node;
- int is_duplicate = 0;
+ int is_dup;
int32_t seq_diff;
int need_update = 0;
- int set_mark, ret = -1;
+ int set_mark;
+ enum batadv_dup_status ret = BATADV_NO_DUP;
uint32_t seqno = ntohl(batadv_ogm_packet->seqno);
uint8_t *neigh_addr;
uint8_t packet_count;
orig_node = batadv_get_orig_node(bat_priv, batadv_ogm_packet->orig);
if (!orig_node)
- return 0;
+ return BATADV_NO_DUP;
spin_lock_bh(&orig_node->ogm_cnt_lock);
seq_diff = seqno - orig_node->last_real_seqno;
@@ -935,22 +953,29 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
/* signalize caller that the packet is to be dropped. */
if (!hlist_empty(&orig_node->neigh_list) &&
batadv_window_protected(bat_priv, seq_diff,
- &orig_node->batman_seqno_reset))
+ &orig_node->batman_seqno_reset)) {
+ ret = BATADV_PROTECTED;
goto out;
+ }
rcu_read_lock();
hlist_for_each_entry_rcu(tmp_neigh_node,
&orig_node->neigh_list, list) {
- is_duplicate |= batadv_test_bit(tmp_neigh_node->real_bits,
- orig_node->last_real_seqno,
- seqno);
-
neigh_addr = tmp_neigh_node->addr;
+ is_dup = batadv_test_bit(tmp_neigh_node->real_bits,
+ orig_node->last_real_seqno,
+ seqno);
+
if (batadv_compare_eth(neigh_addr, ethhdr->h_source) &&
- tmp_neigh_node->if_incoming == if_incoming)
+ tmp_neigh_node->if_incoming == if_incoming) {
set_mark = 1;
- else
+ if (is_dup)
+ ret = BATADV_NEIGH_DUP;
+ } else {
set_mark = 0;
+ if (is_dup && (ret != BATADV_NEIGH_DUP))
+ ret = BATADV_ORIG_DUP;
+ }
/* if the window moved, set the update flag. */
need_update |= batadv_bit_get_packet(bat_priv,
@@ -970,8 +995,6 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
orig_node->last_real_seqno = seqno;
}
- ret = is_duplicate;
-
out:
spin_unlock_bh(&orig_node->ogm_cnt_lock);
batadv_orig_node_free_ref(orig_node);
@@ -993,7 +1016,8 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
int is_broadcast = 0, is_bidirect;
bool is_single_hop_neigh = false;
bool is_from_best_next_hop = false;
- int is_duplicate, sameseq, simlar_ttl;
+ int sameseq, similar_ttl;
+ enum batadv_dup_status dup_status;
uint32_t if_incoming_seqno;
uint8_t *prev_sender;
@@ -1137,10 +1161,10 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
if (!orig_node)
return;
- is_duplicate = batadv_iv_ogm_update_seqnos(ethhdr, batadv_ogm_packet,
- if_incoming);
+ dup_status = batadv_iv_ogm_update_seqnos(ethhdr, batadv_ogm_packet,
+ if_incoming);
- if (is_duplicate == -1) {
+ if (dup_status == BATADV_PROTECTED) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Drop packet: packet within seqno protection time (sender: %pM)\n",
ethhdr->h_source);
@@ -1185,6 +1209,10 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
if (!orig_neigh_node)
goto out;
+ /* Update nc_nodes of the originator */
+ batadv_nc_update_nc_node(bat_priv, orig_node, orig_neigh_node,
+ batadv_ogm_packet, is_single_hop_neigh);
+
orig_neigh_router = batadv_orig_node_get_router(orig_neigh_node);
/* drop packet if sender is not a direct neighbor and if we
@@ -1206,11 +1234,12 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
* seqno and similar ttl as the non-duplicate
*/
sameseq = orig_node->last_real_seqno == ntohl(batadv_ogm_packet->seqno);
- simlar_ttl = orig_node->last_ttl - 3 <= batadv_ogm_packet->header.ttl;
- if (is_bidirect && (!is_duplicate || (sameseq && simlar_ttl)))
+ similar_ttl = orig_node->last_ttl - 3 <= batadv_ogm_packet->header.ttl;
+ if (is_bidirect && ((dup_status == BATADV_NO_DUP) ||
+ (sameseq && similar_ttl)))
batadv_iv_ogm_orig_update(bat_priv, orig_node, ethhdr,
batadv_ogm_packet, if_incoming,
- tt_buff, is_duplicate);
+ tt_buff, dup_status);
/* is single hop (direct) neighbor */
if (is_single_hop_neigh) {
@@ -1231,7 +1260,7 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
goto out_neigh;
}
- if (is_duplicate) {
+ if (dup_status == BATADV_NEIGH_DUP) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Drop packet: duplicate packet received\n");
goto out_neigh;
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 6a4f728680ae..de27b3175cfd 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -341,7 +341,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,
}
if (vid != -1)
- skb = vlan_insert_tag(skb, vid);
+ skb = vlan_insert_tag(skb, htons(ETH_P_8021Q), vid);
skb_reset_mac_header(skb);
skb->protocol = eth_type_trans(skb, soft_iface);
@@ -1067,6 +1067,10 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
group = htons(crc16(0, primary_if->net_dev->dev_addr, ETH_ALEN));
bat_priv->bla.claim_dest.group = group;
+ /* purge everything when bridge loop avoidance is turned off */
+ if (!atomic_read(&bat_priv->bridge_loop_avoidance))
+ oldif = NULL;
+
if (!oldif) {
batadv_bla_purge_claims(bat_priv, NULL, 1);
batadv_bla_purge_backbone_gw(bat_priv, 1);
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 6ae86516db4d..f186a55b23c3 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -32,6 +32,7 @@
#include "icmp_socket.h"
#include "bridge_loop_avoidance.h"
#include "distributed-arp-table.h"
+#include "network-coding.h"
static struct dentry *batadv_debugfs;
@@ -310,6 +311,14 @@ struct batadv_debuginfo {
const struct file_operations fops;
};
+#ifdef CONFIG_BATMAN_ADV_NC
+static int batadv_nc_nodes_open(struct inode *inode, struct file *file)
+{
+ struct net_device *net_dev = (struct net_device *)inode->i_private;
+ return single_open(file, batadv_nc_nodes_seq_print_text, net_dev);
+}
+#endif
+
#define BATADV_DEBUGINFO(_name, _mode, _open) \
struct batadv_debuginfo batadv_debuginfo_##_name = { \
.attr = { .name = __stringify(_name), \
@@ -348,6 +357,9 @@ static BATADV_DEBUGINFO(dat_cache, S_IRUGO, batadv_dat_cache_open);
static BATADV_DEBUGINFO(transtable_local, S_IRUGO,
batadv_transtable_local_open);
static BATADV_DEBUGINFO(vis_data, S_IRUGO, batadv_vis_data_open);
+#ifdef CONFIG_BATMAN_ADV_NC
+static BATADV_DEBUGINFO(nc_nodes, S_IRUGO, batadv_nc_nodes_open);
+#endif
static struct batadv_debuginfo *batadv_mesh_debuginfos[] = {
&batadv_debuginfo_originators,
@@ -362,6 +374,9 @@ static struct batadv_debuginfo *batadv_mesh_debuginfos[] = {
#endif
&batadv_debuginfo_transtable_local,
&batadv_debuginfo_vis_data,
+#ifdef CONFIG_BATMAN_ADV_NC
+ &batadv_debuginfo_nc_nodes,
+#endif
NULL,
};
@@ -431,6 +446,9 @@ int batadv_debugfs_add_meshif(struct net_device *dev)
}
}
+ if (batadv_nc_init_debugfs(bat_priv) < 0)
+ goto rem_attr;
+
return 0;
rem_attr:
debugfs_remove_recursive(bat_priv->debug_dir);
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index d54188a112ea..239992021b1d 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -816,7 +816,6 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
bool ret = false;
struct batadv_dat_entry *dat_entry = NULL;
struct sk_buff *skb_new;
- struct batadv_hard_iface *primary_if = NULL;
if (!atomic_read(&bat_priv->distributed_arp_table))
goto out;
@@ -838,22 +837,31 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst);
if (dat_entry) {
- primary_if = batadv_primary_if_get_selected(bat_priv);
- if (!primary_if)
+ /* If the ARP request is destined for a local client the local
+ * client will answer itself. DAT would only generate a
+ * duplicate packet.
+ *
+ * Moreover, if the soft-interface is enslaved into a bridge, an
+ * additional DAT answer may trigger kernel warnings about
+ * a packet coming from the wrong port.
+ */
+ if (batadv_is_my_client(bat_priv, dat_entry->mac_addr)) {
+ ret = true;
goto out;
+ }
skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src,
- primary_if->soft_iface, ip_dst, hw_src,
+ bat_priv->soft_iface, ip_dst, hw_src,
dat_entry->mac_addr, hw_src);
if (!skb_new)
goto out;
skb_reset_mac_header(skb_new);
skb_new->protocol = eth_type_trans(skb_new,
- primary_if->soft_iface);
+ bat_priv->soft_iface);
bat_priv->stats.rx_packets++;
bat_priv->stats.rx_bytes += skb->len + ETH_HLEN;
- primary_if->soft_iface->last_rx = jiffies;
+ bat_priv->soft_iface->last_rx = jiffies;
netif_rx(skb_new);
batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n");
@@ -866,8 +874,6 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
out:
if (dat_entry)
batadv_dat_entry_free_ref(dat_entry);
- if (primary_if)
- batadv_hardif_free_ref(primary_if);
return ret;
}
@@ -887,7 +893,6 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
__be32 ip_src, ip_dst;
uint8_t *hw_src;
struct sk_buff *skb_new;
- struct batadv_hard_iface *primary_if = NULL;
struct batadv_dat_entry *dat_entry = NULL;
bool ret = false;
int err;
@@ -912,12 +917,8 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
if (!dat_entry)
goto out;
- primary_if = batadv_primary_if_get_selected(bat_priv);
- if (!primary_if)
- goto out;
-
skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src,
- primary_if->soft_iface, ip_dst, hw_src,
+ bat_priv->soft_iface, ip_dst, hw_src,
dat_entry->mac_addr, hw_src);
if (!skb_new)
@@ -941,8 +942,6 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
out:
if (dat_entry)
batadv_dat_entry_free_ref(dat_entry);
- if (primary_if)
- batadv_hardif_free_ref(primary_if);
if (ret)
kfree_skb(skb);
return ret;
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 34f99a46ec1d..f105219f4a4b 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -500,7 +500,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
rcu_read_unlock();
if (gw_count == 0)
- seq_printf(seq, "No gateways in range ...\n");
+ seq_puts(seq, "No gateways in range ...\n");
out:
if (primary_if)
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 368219e026a9..522243aff2f3 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -307,11 +307,35 @@ batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface)
batadv_update_min_mtu(hard_iface->soft_iface);
}
+/**
+ * batadv_master_del_slave - remove hard_iface from the current master interface
+ * @slave: the interface enslaved in another master
+ * @master: the master from which slave has to be removed
+ *
+ * Invoke ndo_del_slave on master passing slave as argument. In this way slave
+ * is free'd and master can correctly change its internal state.
+ * Return 0 on success, a negative value representing the error otherwise
+ */
+static int batadv_master_del_slave(struct batadv_hard_iface *slave,
+ struct net_device *master)
+{
+ int ret;
+
+ if (!master)
+ return 0;
+
+ ret = -EBUSY;
+ if (master->netdev_ops->ndo_del_slave)
+ ret = master->netdev_ops->ndo_del_slave(master, slave->net_dev);
+
+ return ret;
+}
+
int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
const char *iface_name)
{
struct batadv_priv *bat_priv;
- struct net_device *soft_iface;
+ struct net_device *soft_iface, *master;
__be16 ethertype = __constant_htons(ETH_P_BATMAN);
int ret;
@@ -321,11 +345,6 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
if (!atomic_inc_not_zero(&hard_iface->refcount))
goto out;
- /* hard-interface is part of a bridge */
- if (hard_iface->net_dev->priv_flags & IFF_BRIDGE_PORT)
- pr_err("You are about to enable batman-adv on '%s' which already is part of a bridge. Unless you know exactly what you are doing this is probably wrong and won't work the way you think it would.\n",
- hard_iface->net_dev->name);
-
soft_iface = dev_get_by_name(&init_net, iface_name);
if (!soft_iface) {
@@ -347,12 +366,24 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
goto err_dev;
}
+ /* check if the interface is enslaved in another virtual one and
+ * in that case unlink it first
+ */
+ master = netdev_master_upper_dev_get(hard_iface->net_dev);
+ ret = batadv_master_del_slave(hard_iface, master);
+ if (ret)
+ goto err_dev;
+
hard_iface->soft_iface = soft_iface;
bat_priv = netdev_priv(hard_iface->soft_iface);
+ ret = netdev_master_upper_dev_link(hard_iface->net_dev, soft_iface);
+ if (ret)
+ goto err_dev;
+
ret = bat_priv->bat_algo_ops->bat_iface_enable(hard_iface);
if (ret < 0)
- goto err_dev;
+ goto err_upper;
hard_iface->if_num = bat_priv->num_ifaces;
bat_priv->num_ifaces++;
@@ -362,7 +393,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
bat_priv->bat_algo_ops->bat_iface_disable(hard_iface);
bat_priv->num_ifaces--;
hard_iface->if_status = BATADV_IF_NOT_IN_USE;
- goto err_dev;
+ goto err_upper;
}
hard_iface->batman_adv_ptype.type = ethertype;
@@ -401,14 +432,18 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
out:
return 0;
+err_upper:
+ netdev_upper_dev_unlink(hard_iface->net_dev, soft_iface);
err_dev:
+ hard_iface->soft_iface = NULL;
dev_put(soft_iface);
err:
batadv_hardif_free_ref(hard_iface);
return ret;
}
-void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface)
+void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
+ enum batadv_hard_if_cleanup autodel)
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
struct batadv_hard_iface *primary_if = NULL;
@@ -446,9 +481,10 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface)
dev_put(hard_iface->soft_iface);
/* nobody uses this interface anymore */
- if (!bat_priv->num_ifaces)
- batadv_softif_destroy(hard_iface->soft_iface);
+ if (!bat_priv->num_ifaces && autodel == BATADV_IF_CLEANUP_AUTO)
+ batadv_softif_destroy_sysfs(hard_iface->soft_iface);
+ netdev_upper_dev_unlink(hard_iface->net_dev, hard_iface->soft_iface);
hard_iface->soft_iface = NULL;
batadv_hardif_free_ref(hard_iface);
@@ -533,7 +569,8 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface)
/* first deactivate interface */
if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
- batadv_hardif_disable_interface(hard_iface);
+ batadv_hardif_disable_interface(hard_iface,
+ BATADV_IF_CLEANUP_AUTO);
if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
return;
@@ -563,6 +600,11 @@ static int batadv_hard_if_event(struct notifier_block *this,
struct batadv_hard_iface *primary_if = NULL;
struct batadv_priv *bat_priv;
+ if (batadv_softif_is_valid(net_dev) && event == NETDEV_REGISTER) {
+ batadv_sysfs_add_meshif(net_dev);
+ return NOTIFY_DONE;
+ }
+
hard_iface = batadv_hardif_get_by_netdev(net_dev);
if (!hard_iface && event == NETDEV_REGISTER)
hard_iface = batadv_hardif_add_interface(net_dev);
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 308437d52e22..49892881a7c5 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -29,13 +29,24 @@ enum batadv_hard_if_state {
BATADV_IF_I_WANT_YOU,
};
+/**
+ * enum batadv_hard_if_cleanup - Cleanup modi for soft_iface after slave removal
+ * @BATADV_IF_CLEANUP_KEEP: Don't automatically delete soft-interface
+ * @BATADV_IF_CLEANUP_AUTO: Delete soft-interface after last slave was removed
+ */
+enum batadv_hard_if_cleanup {
+ BATADV_IF_CLEANUP_KEEP,
+ BATADV_IF_CLEANUP_AUTO,
+};
+
extern struct notifier_block batadv_hard_if_notifier;
struct batadv_hard_iface*
batadv_hardif_get_by_netdev(const struct net_device *net_dev);
int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
const char *iface_name);
-void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface);
+void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
+ enum batadv_hard_if_cleanup autodel);
void batadv_hardif_remove_interfaces(void);
int batadv_hardif_min_mtu(struct net_device *soft_iface);
void batadv_update_min_mtu(struct net_device *soft_iface);
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index fa563e497c48..51aafd669cbb 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -35,6 +35,7 @@
#include "vis.h"
#include "hash.h"
#include "bat_algo.h"
+#include "network-coding.h"
/* List manipulations on hardif_list have to be rtnl_lock()'ed,
@@ -70,6 +71,7 @@ static int __init batadv_init(void)
batadv_debugfs_init();
register_netdevice_notifier(&batadv_hard_if_notifier);
+ rtnl_link_register(&batadv_link_ops);
pr_info("B.A.T.M.A.N. advanced %s (compatibility version %i) loaded\n",
BATADV_SOURCE_VERSION, BATADV_COMPAT_VERSION);
@@ -80,6 +82,7 @@ static int __init batadv_init(void)
static void __exit batadv_exit(void)
{
batadv_debugfs_destroy();
+ rtnl_link_unregister(&batadv_link_ops);
unregister_netdevice_notifier(&batadv_hard_if_notifier);
batadv_hardif_remove_interfaces();
@@ -135,6 +138,10 @@ int batadv_mesh_init(struct net_device *soft_iface)
if (ret < 0)
goto err;
+ ret = batadv_nc_init(bat_priv);
+ if (ret < 0)
+ goto err;
+
atomic_set(&bat_priv->gw.reselect, 0);
atomic_set(&bat_priv->mesh_state, BATADV_MESH_ACTIVE);
@@ -156,19 +163,35 @@ void batadv_mesh_free(struct net_device *soft_iface)
batadv_vis_quit(bat_priv);
batadv_gw_node_purge(bat_priv);
- batadv_originator_free(bat_priv);
+ batadv_nc_free(bat_priv);
+ batadv_dat_free(bat_priv);
+ batadv_bla_free(bat_priv);
+ /* Free the TT and the originator tables only after having terminated
+ * all the other depending components which may use these structures for
+ * their purposes.
+ */
batadv_tt_free(bat_priv);
- batadv_bla_free(bat_priv);
-
- batadv_dat_free(bat_priv);
+ /* Since the originator table clean up routine is accessing the TT
+ * tables as well, it has to be invoked after the TT tables have been
+ * freed and marked as empty. This ensures that no cleanup RCU callbacks
+ * accessing the TT data are scheduled for later execution.
+ */
+ batadv_originator_free(bat_priv);
free_percpu(bat_priv->bat_counters);
+ bat_priv->bat_counters = NULL;
atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE);
}
+/**
+ * batadv_is_my_mac - check if the given mac address belongs to any of the real
+ * interfaces in the current mesh
+ * @bat_priv: the bat priv with all the soft interface information
+ * @addr: the address to check
+ */
int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr)
{
const struct batadv_hard_iface *hard_iface;
@@ -414,7 +437,7 @@ int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
{
struct batadv_algo_ops *bat_algo_ops;
- seq_printf(seq, "Available routing algorithms:\n");
+ seq_puts(seq, "Available routing algorithms:\n");
hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) {
seq_printf(seq, "%s\n", bat_algo_ops->name);
@@ -461,7 +484,7 @@ static int batadv_param_set_ra(const char *val, const struct kernel_param *kp)
char *algo_name = (char *)val;
size_t name_len = strlen(algo_name);
- if (algo_name[name_len - 1] == '\n')
+ if (name_len > 0 && algo_name[name_len - 1] == '\n')
algo_name[name_len - 1] = '\0';
bat_algo_ops = batadv_algo_get(algo_name);
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index d40910dfc8ea..59a0d6af15c8 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -26,7 +26,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2013.1.0"
+#define BATADV_SOURCE_VERSION "2013.2.0"
#endif
/* B.A.T.M.A.N. parameters */
@@ -105,6 +105,8 @@
#define BATADV_RESET_PROTECTION_MS 30000
#define BATADV_EXPECTED_SEQNO_RANGE 65536
+#define BATADV_NC_NODE_TIMEOUT 10000 /* Milliseconds */
+
enum batadv_mesh_state {
BATADV_MESH_INACTIVE,
BATADV_MESH_ACTIVE,
@@ -150,6 +152,7 @@ enum batadv_uev_type {
#include <linux/percpu.h>
#include <linux/slab.h>
#include <net/sock.h> /* struct sock */
+#include <net/rtnetlink.h>
#include <linux/jiffies.h>
#include <linux/seq_file.h>
#include "types.h"
@@ -185,6 +188,7 @@ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr);
* @BATADV_DBG_TT: translation table messages
* @BATADV_DBG_BLA: bridge loop avoidance messages
* @BATADV_DBG_DAT: ARP snooping and DAT related messages
+ * @BATADV_DBG_NC: network coding related messages
* @BATADV_DBG_ALL: the union of all the above log levels
*/
enum batadv_dbg_level {
@@ -193,7 +197,8 @@ enum batadv_dbg_level {
BATADV_DBG_TT = BIT(2),
BATADV_DBG_BLA = BIT(3),
BATADV_DBG_DAT = BIT(4),
- BATADV_DBG_ALL = 31,
+ BATADV_DBG_NC = BIT(5),
+ BATADV_DBG_ALL = 63,
};
#ifdef CONFIG_BATMAN_ADV_DEBUG
@@ -298,4 +303,10 @@ static inline uint64_t batadv_sum_counter(struct batadv_priv *bat_priv,
return sum;
}
+/* Define a macro to reach the control buffer of the skb. The members of the
+ * control buffer are defined in struct batadv_skb_cb in types.h.
+ * The macro is inspired by the similar macro TCP_SKB_CB() in tcp.h.
+ */
+#define BATADV_SKB_CB(__skb) ((struct batadv_skb_cb *)&((__skb)->cb[0]))
+
#endif /* _NET_BATMAN_ADV_MAIN_H_ */
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
new file mode 100644
index 000000000000..e84629ece9b7
--- /dev/null
+++ b/net/batman-adv/network-coding.c
@@ -0,0 +1,1826 @@
+/* Copyright (C) 2012-2013 B.A.T.M.A.N. contributors:
+ *
+ * Martin Hundebøll, Jeppe Ledet-Pedersen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include <linux/debugfs.h>
+
+#include "main.h"
+#include "hash.h"
+#include "network-coding.h"
+#include "send.h"
+#include "originator.h"
+#include "hard-interface.h"
+#include "routing.h"
+
+static struct lock_class_key batadv_nc_coding_hash_lock_class_key;
+static struct lock_class_key batadv_nc_decoding_hash_lock_class_key;
+
+static void batadv_nc_worker(struct work_struct *work);
+static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
+ struct batadv_hard_iface *recv_if);
+
+/**
+ * batadv_nc_start_timer - initialise the nc periodic worker
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+static void batadv_nc_start_timer(struct batadv_priv *bat_priv)
+{
+ queue_delayed_work(batadv_event_workqueue, &bat_priv->nc.work,
+ msecs_to_jiffies(10));
+}
+
+/**
+ * batadv_nc_init - initialise coding hash table and start house keeping
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+int batadv_nc_init(struct batadv_priv *bat_priv)
+{
+ bat_priv->nc.timestamp_fwd_flush = jiffies;
+ bat_priv->nc.timestamp_sniffed_purge = jiffies;
+
+ if (bat_priv->nc.coding_hash || bat_priv->nc.decoding_hash)
+ return 0;
+
+ bat_priv->nc.coding_hash = batadv_hash_new(128);
+ if (!bat_priv->nc.coding_hash)
+ goto err;
+
+ batadv_hash_set_lock_class(bat_priv->nc.coding_hash,
+ &batadv_nc_coding_hash_lock_class_key);
+
+ bat_priv->nc.decoding_hash = batadv_hash_new(128);
+ if (!bat_priv->nc.decoding_hash)
+ goto err;
+
+ batadv_hash_set_lock_class(bat_priv->nc.coding_hash,
+ &batadv_nc_decoding_hash_lock_class_key);
+
+ /* Register our packet type */
+ if (batadv_recv_handler_register(BATADV_CODED,
+ batadv_nc_recv_coded_packet) < 0)
+ goto err;
+
+ INIT_DELAYED_WORK(&bat_priv->nc.work, batadv_nc_worker);
+ batadv_nc_start_timer(bat_priv);
+
+ return 0;
+
+err:
+ return -ENOMEM;
+}
+
+/**
+ * batadv_nc_init_bat_priv - initialise the nc specific bat_priv variables
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv)
+{
+ atomic_set(&bat_priv->network_coding, 1);
+ bat_priv->nc.min_tq = 200;
+ bat_priv->nc.max_fwd_delay = 10;
+ bat_priv->nc.max_buffer_time = 200;
+}
+
+/**
+ * batadv_nc_init_orig - initialise the nc fields of an orig_node
+ * @orig_node: the orig_node which is going to be initialised
+ */
+void batadv_nc_init_orig(struct batadv_orig_node *orig_node)
+{
+ INIT_LIST_HEAD(&orig_node->in_coding_list);
+ INIT_LIST_HEAD(&orig_node->out_coding_list);
+ spin_lock_init(&orig_node->in_coding_list_lock);
+ spin_lock_init(&orig_node->out_coding_list_lock);
+}
+
+/**
+ * batadv_nc_node_free_rcu - rcu callback to free an nc node and remove
+ * its refcount on the orig_node
+ * @rcu: rcu pointer of the nc node
+ */
+static void batadv_nc_node_free_rcu(struct rcu_head *rcu)
+{
+ struct batadv_nc_node *nc_node;
+
+ nc_node = container_of(rcu, struct batadv_nc_node, rcu);
+ batadv_orig_node_free_ref(nc_node->orig_node);
+ kfree(nc_node);
+}
+
+/**
+ * batadv_nc_node_free_ref - decrements the nc node refcounter and possibly
+ * frees it
+ * @nc_node: the nc node to free
+ */
+static void batadv_nc_node_free_ref(struct batadv_nc_node *nc_node)
+{
+ if (atomic_dec_and_test(&nc_node->refcount))
+ call_rcu(&nc_node->rcu, batadv_nc_node_free_rcu);
+}
+
+/**
+ * batadv_nc_path_free_ref - decrements the nc path refcounter and possibly
+ * frees it
+ * @nc_path: the nc node to free
+ */
+static void batadv_nc_path_free_ref(struct batadv_nc_path *nc_path)
+{
+ if (atomic_dec_and_test(&nc_path->refcount))
+ kfree_rcu(nc_path, rcu);
+}
+
+/**
+ * batadv_nc_packet_free - frees nc packet
+ * @nc_packet: the nc packet to free
+ */
+static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet)
+{
+ if (nc_packet->skb)
+ kfree_skb(nc_packet->skb);
+
+ batadv_nc_path_free_ref(nc_packet->nc_path);
+ kfree(nc_packet);
+}
+
+/**
+ * batadv_nc_to_purge_nc_node - checks whether an nc node has to be purged
+ * @bat_priv: the bat priv with all the soft interface information
+ * @nc_node: the nc node to check
+ *
+ * Returns true if the entry has to be purged now, false otherwise
+ */
+static bool batadv_nc_to_purge_nc_node(struct batadv_priv *bat_priv,
+ struct batadv_nc_node *nc_node)
+{
+ if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
+ return true;
+
+ return batadv_has_timed_out(nc_node->last_seen, BATADV_NC_NODE_TIMEOUT);
+}
+
+/**
+ * batadv_nc_to_purge_nc_path_coding - checks whether an nc path has timed out
+ * @bat_priv: the bat priv with all the soft interface information
+ * @nc_path: the nc path to check
+ *
+ * Returns true if the entry has to be purged now, false otherwise
+ */
+static bool batadv_nc_to_purge_nc_path_coding(struct batadv_priv *bat_priv,
+ struct batadv_nc_path *nc_path)
+{
+ if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
+ return true;
+
+ /* purge the path when no packets has been added for 10 times the
+ * max_fwd_delay time
+ */
+ return batadv_has_timed_out(nc_path->last_valid,
+ bat_priv->nc.max_fwd_delay * 10);
+}
+
+/**
+ * batadv_nc_to_purge_nc_path_decoding - checks whether an nc path has timed out
+ * @bat_priv: the bat priv with all the soft interface information
+ * @nc_path: the nc path to check
+ *
+ * Returns true if the entry has to be purged now, false otherwise
+ */
+static bool batadv_nc_to_purge_nc_path_decoding(struct batadv_priv *bat_priv,
+ struct batadv_nc_path *nc_path)
+{
+ if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
+ return true;
+
+ /* purge the path when no packets has been added for 10 times the
+ * max_buffer time
+ */
+ return batadv_has_timed_out(nc_path->last_valid,
+ bat_priv->nc.max_buffer_time*10);
+}
+
+/**
+ * batadv_nc_purge_orig_nc_nodes - go through list of nc nodes and purge stale
+ * entries
+ * @bat_priv: the bat priv with all the soft interface information
+ * @list: list of nc nodes
+ * @lock: nc node list lock
+ * @to_purge: function in charge to decide whether an entry has to be purged or
+ * not. This function takes the nc node as argument and has to return
+ * a boolean value: true if the entry has to be deleted, false
+ * otherwise
+ */
+static void
+batadv_nc_purge_orig_nc_nodes(struct batadv_priv *bat_priv,
+ struct list_head *list,
+ spinlock_t *lock,
+ bool (*to_purge)(struct batadv_priv *,
+ struct batadv_nc_node *))
+{
+ struct batadv_nc_node *nc_node, *nc_node_tmp;
+
+ /* For each nc_node in list */
+ spin_lock_bh(lock);
+ list_for_each_entry_safe(nc_node, nc_node_tmp, list, list) {
+ /* if an helper function has been passed as parameter,
+ * ask it if the entry has to be purged or not
+ */
+ if (to_purge && !to_purge(bat_priv, nc_node))
+ continue;
+
+ batadv_dbg(BATADV_DBG_NC, bat_priv,
+ "Removing nc_node %pM -> %pM\n",
+ nc_node->addr, nc_node->orig_node->orig);
+ list_del_rcu(&nc_node->list);
+ batadv_nc_node_free_ref(nc_node);
+ }
+ spin_unlock_bh(lock);
+}
+
+/**
+ * batadv_nc_purge_orig - purges all nc node data attached of the given
+ * originator
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig_node: orig_node with the nc node entries to be purged
+ * @to_purge: function in charge to decide whether an entry has to be purged or
+ * not. This function takes the nc node as argument and has to return
+ * a boolean value: true is the entry has to be deleted, false
+ * otherwise
+ */
+void batadv_nc_purge_orig(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig_node,
+ bool (*to_purge)(struct batadv_priv *,
+ struct batadv_nc_node *))
+{
+ /* Check ingoing nc_node's of this orig_node */
+ batadv_nc_purge_orig_nc_nodes(bat_priv, &orig_node->in_coding_list,
+ &orig_node->in_coding_list_lock,
+ to_purge);
+
+ /* Check outgoing nc_node's of this orig_node */
+ batadv_nc_purge_orig_nc_nodes(bat_priv, &orig_node->out_coding_list,
+ &orig_node->out_coding_list_lock,
+ to_purge);
+}
+
+/**
+ * batadv_nc_purge_orig_hash - traverse entire originator hash to check if they
+ * have timed out nc nodes
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+static void batadv_nc_purge_orig_hash(struct batadv_priv *bat_priv)
+{
+ struct batadv_hashtable *hash = bat_priv->orig_hash;
+ struct hlist_head *head;
+ struct batadv_orig_node *orig_node;
+ uint32_t i;
+
+ if (!hash)
+ return;
+
+ /* For each orig_node */
+ for (i = 0; i < hash->size; i++) {
+ head = &hash->table[i];
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(orig_node, head, hash_entry)
+ batadv_nc_purge_orig(bat_priv, orig_node,
+ batadv_nc_to_purge_nc_node);
+ rcu_read_unlock();
+ }
+}
+
+/**
+ * batadv_nc_purge_paths - traverse all nc paths part of the hash and remove
+ * unused ones
+ * @bat_priv: the bat priv with all the soft interface information
+ * @hash: hash table containing the nc paths to check
+ * @to_purge: function in charge to decide whether an entry has to be purged or
+ * not. This function takes the nc node as argument and has to return
+ * a boolean value: true is the entry has to be deleted, false
+ * otherwise
+ */
+static void batadv_nc_purge_paths(struct batadv_priv *bat_priv,
+ struct batadv_hashtable *hash,
+ bool (*to_purge)(struct batadv_priv *,
+ struct batadv_nc_path *))
+{
+ struct hlist_head *head;
+ struct hlist_node *node_tmp;
+ struct batadv_nc_path *nc_path;
+ spinlock_t *lock; /* Protects lists in hash */
+ uint32_t i;
+
+ for (i = 0; i < hash->size; i++) {
+ head = &hash->table[i];
+ lock = &hash->list_locks[i];
+
+ /* For each nc_path in this bin */
+ spin_lock_bh(lock);
+ hlist_for_each_entry_safe(nc_path, node_tmp, head, hash_entry) {
+ /* if an helper function has been passed as parameter,
+ * ask it if the entry has to be purged or not
+ */
+ if (to_purge && !to_purge(bat_priv, nc_path))
+ continue;
+
+ /* purging an non-empty nc_path should never happen, but
+ * is observed under high CPU load. Delay the purging
+ * until next iteration to allow the packet_list to be
+ * emptied first.
+ */
+ if (!unlikely(list_empty(&nc_path->packet_list))) {
+ net_ratelimited_function(printk,
+ KERN_WARNING
+ "Skipping free of non-empty nc_path (%pM -> %pM)!\n",
+ nc_path->prev_hop,
+ nc_path->next_hop);
+ continue;
+ }
+
+ /* nc_path is unused, so remove it */
+ batadv_dbg(BATADV_DBG_NC, bat_priv,
+ "Remove nc_path %pM -> %pM\n",
+ nc_path->prev_hop, nc_path->next_hop);
+ hlist_del_rcu(&nc_path->hash_entry);
+ batadv_nc_path_free_ref(nc_path);
+ }
+ spin_unlock_bh(lock);
+ }
+}
+
+/**
+ * batadv_nc_hash_key_gen - computes the nc_path hash key
+ * @key: buffer to hold the final hash key
+ * @src: source ethernet mac address going into the hash key
+ * @dst: destination ethernet mac address going into the hash key
+ */
+static void batadv_nc_hash_key_gen(struct batadv_nc_path *key, const char *src,
+ const char *dst)
+{
+ memcpy(key->prev_hop, src, sizeof(key->prev_hop));
+ memcpy(key->next_hop, dst, sizeof(key->next_hop));
+}
+
+/**
+ * batadv_nc_hash_choose - compute the hash value for an nc path
+ * @data: data to hash
+ * @size: size of the hash table
+ *
+ * Returns the selected index in the hash table for the given data.
+ */
+static uint32_t batadv_nc_hash_choose(const void *data, uint32_t size)
+{
+ const struct batadv_nc_path *nc_path = data;
+ uint32_t hash = 0;
+
+ hash = batadv_hash_bytes(hash, &nc_path->prev_hop,
+ sizeof(nc_path->prev_hop));
+ hash = batadv_hash_bytes(hash, &nc_path->next_hop,
+ sizeof(nc_path->next_hop));
+
+ hash += (hash << 3);
+ hash ^= (hash >> 11);
+ hash += (hash << 15);
+
+ return hash % size;
+}
+
+/**
+ * batadv_nc_hash_compare - comparing function used in the network coding hash
+ * tables
+ * @node: node in the local table
+ * @data2: second object to compare the node to
+ *
+ * Returns 1 if the two entry are the same, 0 otherwise
+ */
+static int batadv_nc_hash_compare(const struct hlist_node *node,
+ const void *data2)
+{
+ const struct batadv_nc_path *nc_path1, *nc_path2;
+
+ nc_path1 = container_of(node, struct batadv_nc_path, hash_entry);
+ nc_path2 = data2;
+
+ /* Return 1 if the two keys are identical */
+ if (memcmp(nc_path1->prev_hop, nc_path2->prev_hop,
+ sizeof(nc_path1->prev_hop)) != 0)
+ return 0;
+
+ if (memcmp(nc_path1->next_hop, nc_path2->next_hop,
+ sizeof(nc_path1->next_hop)) != 0)
+ return 0;
+
+ return 1;
+}
+
+/**
+ * batadv_nc_hash_find - search for an existing nc path and return it
+ * @hash: hash table containing the nc path
+ * @data: search key
+ *
+ * Returns the nc_path if found, NULL otherwise.
+ */
+static struct batadv_nc_path *
+batadv_nc_hash_find(struct batadv_hashtable *hash,
+ void *data)
+{
+ struct hlist_head *head;
+ struct batadv_nc_path *nc_path, *nc_path_tmp = NULL;
+ int index;
+
+ if (!hash)
+ return NULL;
+
+ index = batadv_nc_hash_choose(data, hash->size);
+ head = &hash->table[index];
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(nc_path, head, hash_entry) {
+ if (!batadv_nc_hash_compare(&nc_path->hash_entry, data))
+ continue;
+
+ if (!atomic_inc_not_zero(&nc_path->refcount))
+ continue;
+
+ nc_path_tmp = nc_path;
+ break;
+ }
+ rcu_read_unlock();
+
+ return nc_path_tmp;
+}
+
+/**
+ * batadv_nc_send_packet - send non-coded packet and free nc_packet struct
+ * @nc_packet: the nc packet to send
+ */
+static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet)
+{
+ batadv_send_skb_packet(nc_packet->skb,
+ nc_packet->neigh_node->if_incoming,
+ nc_packet->nc_path->next_hop);
+ nc_packet->skb = NULL;
+ batadv_nc_packet_free(nc_packet);
+}
+
+/**
+ * batadv_nc_sniffed_purge - Checks timestamp of given sniffed nc_packet.
+ * @bat_priv: the bat priv with all the soft interface information
+ * @nc_path: the nc path the packet belongs to
+ * @nc_packet: the nc packet to be checked
+ *
+ * Checks whether the given sniffed (overheard) nc_packet has hit its buffering
+ * timeout. If so, the packet is no longer kept and the entry deleted from the
+ * queue. Has to be called with the appropriate locks.
+ *
+ * Returns false as soon as the entry in the fifo queue has not been timed out
+ * yet and true otherwise.
+ */
+static bool batadv_nc_sniffed_purge(struct batadv_priv *bat_priv,
+ struct batadv_nc_path *nc_path,
+ struct batadv_nc_packet *nc_packet)
+{
+ unsigned long timeout = bat_priv->nc.max_buffer_time;
+ bool res = false;
+
+ /* Packets are added to tail, so the remaining packets did not time
+ * out and we can stop processing the current queue
+ */
+ if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE &&
+ !batadv_has_timed_out(nc_packet->timestamp, timeout))
+ goto out;
+
+ /* purge nc packet */
+ list_del(&nc_packet->list);
+ batadv_nc_packet_free(nc_packet);
+
+ res = true;
+
+out:
+ return res;
+}
+
+/**
+ * batadv_nc_fwd_flush - Checks the timestamp of the given nc packet.
+ * @bat_priv: the bat priv with all the soft interface information
+ * @nc_path: the nc path the packet belongs to
+ * @nc_packet: the nc packet to be checked
+ *
+ * Checks whether the given nc packet has hit its forward timeout. If so, the
+ * packet is no longer delayed, immediately sent and the entry deleted from the
+ * queue. Has to be called with the appropriate locks.
+ *
+ * Returns false as soon as the entry in the fifo queue has not been timed out
+ * yet and true otherwise.
+ */
+static bool batadv_nc_fwd_flush(struct batadv_priv *bat_priv,
+ struct batadv_nc_path *nc_path,
+ struct batadv_nc_packet *nc_packet)
+{
+ unsigned long timeout = bat_priv->nc.max_fwd_delay;
+
+ /* Packets are added to tail, so the remaining packets did not time
+ * out and we can stop processing the current queue
+ */
+ if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE &&
+ !batadv_has_timed_out(nc_packet->timestamp, timeout))
+ return false;
+
+ /* Send packet */
+ batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD);
+ batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES,
+ nc_packet->skb->len + ETH_HLEN);
+ list_del(&nc_packet->list);
+ batadv_nc_send_packet(nc_packet);
+
+ return true;
+}
+
+/**
+ * batadv_nc_process_nc_paths - traverse given nc packet pool and free timed out
+ * nc packets
+ * @bat_priv: the bat priv with all the soft interface information
+ * @hash: to be processed hash table
+ * @process_fn: Function called to process given nc packet. Should return true
+ * to encourage this function to proceed with the next packet.
+ * Otherwise the rest of the current queue is skipped.
+ */
+static void
+batadv_nc_process_nc_paths(struct batadv_priv *bat_priv,
+ struct batadv_hashtable *hash,
+ bool (*process_fn)(struct batadv_priv *,
+ struct batadv_nc_path *,
+ struct batadv_nc_packet *))
+{
+ struct hlist_head *head;
+ struct batadv_nc_packet *nc_packet, *nc_packet_tmp;
+ struct batadv_nc_path *nc_path;
+ bool ret;
+ int i;
+
+ if (!hash)
+ return;
+
+ /* Loop hash table bins */
+ for (i = 0; i < hash->size; i++) {
+ head = &hash->table[i];
+
+ /* Loop coding paths */
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(nc_path, head, hash_entry) {
+ /* Loop packets */
+ spin_lock_bh(&nc_path->packet_list_lock);
+ list_for_each_entry_safe(nc_packet, nc_packet_tmp,
+ &nc_path->packet_list, list) {
+ ret = process_fn(bat_priv, nc_path, nc_packet);
+ if (!ret)
+ break;
+ }
+ spin_unlock_bh(&nc_path->packet_list_lock);
+ }
+ rcu_read_unlock();
+ }
+}
+
+/**
+ * batadv_nc_worker - periodic task for house keeping related to network coding
+ * @work: kernel work struct
+ */
+static void batadv_nc_worker(struct work_struct *work)
+{
+ struct delayed_work *delayed_work;
+ struct batadv_priv_nc *priv_nc;
+ struct batadv_priv *bat_priv;
+ unsigned long timeout;
+
+ delayed_work = container_of(work, struct delayed_work, work);
+ priv_nc = container_of(delayed_work, struct batadv_priv_nc, work);
+ bat_priv = container_of(priv_nc, struct batadv_priv, nc);
+
+ batadv_nc_purge_orig_hash(bat_priv);
+ batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash,
+ batadv_nc_to_purge_nc_path_coding);
+ batadv_nc_purge_paths(bat_priv, bat_priv->nc.decoding_hash,
+ batadv_nc_to_purge_nc_path_decoding);
+
+ timeout = bat_priv->nc.max_fwd_delay;
+
+ if (batadv_has_timed_out(bat_priv->nc.timestamp_fwd_flush, timeout)) {
+ batadv_nc_process_nc_paths(bat_priv, bat_priv->nc.coding_hash,
+ batadv_nc_fwd_flush);
+ bat_priv->nc.timestamp_fwd_flush = jiffies;
+ }
+
+ if (batadv_has_timed_out(bat_priv->nc.timestamp_sniffed_purge,
+ bat_priv->nc.max_buffer_time)) {
+ batadv_nc_process_nc_paths(bat_priv, bat_priv->nc.decoding_hash,
+ batadv_nc_sniffed_purge);
+ bat_priv->nc.timestamp_sniffed_purge = jiffies;
+ }
+
+ /* Schedule a new check */
+ batadv_nc_start_timer(bat_priv);
+}
+
+/**
+ * batadv_can_nc_with_orig - checks whether the given orig node is suitable for
+ * coding or not
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig_node: neighboring orig node which may be used as nc candidate
+ * @ogm_packet: incoming ogm packet also used for the checks
+ *
+ * Returns true if:
+ * 1) The OGM must have the most recent sequence number.
+ * 2) The TTL must be decremented by one and only one.
+ * 3) The OGM must be received from the first hop from orig_node.
+ * 4) The TQ value of the OGM must be above bat_priv->nc.min_tq.
+ */
+static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig_node,
+ struct batadv_ogm_packet *ogm_packet)
+{
+ if (orig_node->last_real_seqno != ntohl(ogm_packet->seqno))
+ return false;
+ if (orig_node->last_ttl != ogm_packet->header.ttl + 1)
+ return false;
+ if (!batadv_compare_eth(ogm_packet->orig, ogm_packet->prev_sender))
+ return false;
+ if (ogm_packet->tq < bat_priv->nc.min_tq)
+ return false;
+
+ return true;
+}
+
+/**
+ * batadv_nc_find_nc_node - search for an existing nc node and return it
+ * @orig_node: orig node originating the ogm packet
+ * @orig_neigh_node: neighboring orig node from which we received the ogm packet
+ * (can be equal to orig_node)
+ * @in_coding: traverse incoming or outgoing network coding list
+ *
+ * Returns the nc_node if found, NULL otherwise.
+ */
+static struct batadv_nc_node
+*batadv_nc_find_nc_node(struct batadv_orig_node *orig_node,
+ struct batadv_orig_node *orig_neigh_node,
+ bool in_coding)
+{
+ struct batadv_nc_node *nc_node, *nc_node_out = NULL;
+ struct list_head *list;
+
+ if (in_coding)
+ list = &orig_neigh_node->in_coding_list;
+ else
+ list = &orig_neigh_node->out_coding_list;
+
+ /* Traverse list of nc_nodes to orig_node */
+ rcu_read_lock();
+ list_for_each_entry_rcu(nc_node, list, list) {
+ if (!batadv_compare_eth(nc_node->addr, orig_node->orig))
+ continue;
+
+ if (!atomic_inc_not_zero(&nc_node->refcount))
+ continue;
+
+ /* Found a match */
+ nc_node_out = nc_node;
+ break;
+ }
+ rcu_read_unlock();
+
+ return nc_node_out;
+}
+
+/**
+ * batadv_nc_get_nc_node - retrieves an nc node or creates the entry if it was
+ * not found
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig_node: orig node originating the ogm packet
+ * @orig_neigh_node: neighboring orig node from which we received the ogm packet
+ * (can be equal to orig_node)
+ * @in_coding: traverse incoming or outgoing network coding list
+ *
+ * Returns the nc_node if found or created, NULL in case of an error.
+ */
+static struct batadv_nc_node
+*batadv_nc_get_nc_node(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig_node,
+ struct batadv_orig_node *orig_neigh_node,
+ bool in_coding)
+{
+ struct batadv_nc_node *nc_node;
+ spinlock_t *lock; /* Used to lock list selected by "int in_coding" */
+ struct list_head *list;
+
+ /* Check if nc_node is already added */
+ nc_node = batadv_nc_find_nc_node(orig_node, orig_neigh_node, in_coding);
+
+ /* Node found */
+ if (nc_node)
+ return nc_node;
+
+ nc_node = kzalloc(sizeof(*nc_node), GFP_ATOMIC);
+ if (!nc_node)
+ return NULL;
+
+ if (!atomic_inc_not_zero(&orig_neigh_node->refcount))
+ goto free;
+
+ /* Initialize nc_node */
+ INIT_LIST_HEAD(&nc_node->list);
+ memcpy(nc_node->addr, orig_node->orig, ETH_ALEN);
+ nc_node->orig_node = orig_neigh_node;
+ atomic_set(&nc_node->refcount, 2);
+
+ /* Select ingoing or outgoing coding node */
+ if (in_coding) {
+ lock = &orig_neigh_node->in_coding_list_lock;
+ list = &orig_neigh_node->in_coding_list;
+ } else {
+ lock = &orig_neigh_node->out_coding_list_lock;
+ list = &orig_neigh_node->out_coding_list;
+ }
+
+ batadv_dbg(BATADV_DBG_NC, bat_priv, "Adding nc_node %pM -> %pM\n",
+ nc_node->addr, nc_node->orig_node->orig);
+
+ /* Add nc_node to orig_node */
+ spin_lock_bh(lock);
+ list_add_tail_rcu(&nc_node->list, list);
+ spin_unlock_bh(lock);
+
+ return nc_node;
+
+free:
+ kfree(nc_node);
+ return NULL;
+}
+
+/**
+ * batadv_nc_update_nc_node - updates stored incoming and outgoing nc node structs
+ * (best called on incoming OGMs)
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig_node: orig node originating the ogm packet
+ * @orig_neigh_node: neighboring orig node from which we received the ogm packet
+ * (can be equal to orig_node)
+ * @ogm_packet: incoming ogm packet
+ * @is_single_hop_neigh: orig_node is a single hop neighbor
+ */
+void batadv_nc_update_nc_node(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig_node,
+ struct batadv_orig_node *orig_neigh_node,
+ struct batadv_ogm_packet *ogm_packet,
+ int is_single_hop_neigh)
+{
+ struct batadv_nc_node *in_nc_node = NULL, *out_nc_node = NULL;
+
+ /* Check if network coding is enabled */
+ if (!atomic_read(&bat_priv->network_coding))
+ goto out;
+
+ /* accept ogms from 'good' neighbors and single hop neighbors */
+ if (!batadv_can_nc_with_orig(bat_priv, orig_node, ogm_packet) &&
+ !is_single_hop_neigh)
+ goto out;
+
+ /* Add orig_node as in_nc_node on hop */
+ in_nc_node = batadv_nc_get_nc_node(bat_priv, orig_node,
+ orig_neigh_node, true);
+ if (!in_nc_node)
+ goto out;
+
+ in_nc_node->last_seen = jiffies;
+
+ /* Add hop as out_nc_node on orig_node */
+ out_nc_node = batadv_nc_get_nc_node(bat_priv, orig_neigh_node,
+ orig_node, false);
+ if (!out_nc_node)
+ goto out;
+
+ out_nc_node->last_seen = jiffies;
+
+out:
+ if (in_nc_node)
+ batadv_nc_node_free_ref(in_nc_node);
+ if (out_nc_node)
+ batadv_nc_node_free_ref(out_nc_node);
+}
+
+/**
+ * batadv_nc_get_path - get existing nc_path or allocate a new one
+ * @bat_priv: the bat priv with all the soft interface information
+ * @hash: hash table containing the nc path
+ * @src: ethernet source address - first half of the nc path search key
+ * @dst: ethernet destination address - second half of the nc path search key
+ *
+ * Returns pointer to nc_path if the path was found or created, returns NULL
+ * on error.
+ */
+static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
+ struct batadv_hashtable *hash,
+ uint8_t *src,
+ uint8_t *dst)
+{
+ int hash_added;
+ struct batadv_nc_path *nc_path, nc_path_key;
+
+ batadv_nc_hash_key_gen(&nc_path_key, src, dst);
+
+ /* Search for existing nc_path */
+ nc_path = batadv_nc_hash_find(hash, (void *)&nc_path_key);
+
+ if (nc_path) {
+ /* Set timestamp to delay removal of nc_path */
+ nc_path->last_valid = jiffies;
+ return nc_path;
+ }
+
+ /* No existing nc_path was found; create a new */
+ nc_path = kzalloc(sizeof(*nc_path), GFP_ATOMIC);
+
+ if (!nc_path)
+ return NULL;
+
+ /* Initialize nc_path */
+ INIT_LIST_HEAD(&nc_path->packet_list);
+ spin_lock_init(&nc_path->packet_list_lock);
+ atomic_set(&nc_path->refcount, 2);
+ nc_path->last_valid = jiffies;
+ memcpy(nc_path->next_hop, dst, ETH_ALEN);
+ memcpy(nc_path->prev_hop, src, ETH_ALEN);
+
+ batadv_dbg(BATADV_DBG_NC, bat_priv, "Adding nc_path %pM -> %pM\n",
+ nc_path->prev_hop,
+ nc_path->next_hop);
+
+ /* Add nc_path to hash table */
+ hash_added = batadv_hash_add(hash, batadv_nc_hash_compare,
+ batadv_nc_hash_choose, &nc_path_key,
+ &nc_path->hash_entry);
+
+ if (hash_added < 0) {
+ kfree(nc_path);
+ return NULL;
+ }
+
+ return nc_path;
+}
+
+/**
+ * batadv_nc_random_weight_tq - scale the receivers TQ-value to avoid unfair
+ * selection of a receiver with slightly lower TQ than the other
+ * @tq: to be weighted tq value
+ */
+static uint8_t batadv_nc_random_weight_tq(uint8_t tq)
+{
+ uint8_t rand_val, rand_tq;
+
+ get_random_bytes(&rand_val, sizeof(rand_val));
+
+ /* randomize the estimated packet loss (max TQ - estimated TQ) */
+ rand_tq = rand_val * (BATADV_TQ_MAX_VALUE - tq);
+
+ /* normalize the randomized packet loss */
+ rand_tq /= BATADV_TQ_MAX_VALUE;
+
+ /* convert to (randomized) estimated tq again */
+ return BATADV_TQ_MAX_VALUE - rand_tq;
+}
+
+/**
+ * batadv_nc_memxor - XOR destination with source
+ * @dst: byte array to XOR into
+ * @src: byte array to XOR from
+ * @len: length of destination array
+ */
+static void batadv_nc_memxor(char *dst, const char *src, unsigned int len)
+{
+ unsigned int i;
+
+ for (i = 0; i < len; ++i)
+ dst[i] ^= src[i];
+}
+
+/**
+ * batadv_nc_code_packets - code a received unicast_packet with an nc packet
+ * into a coded_packet and send it
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: data skb to forward
+ * @ethhdr: pointer to the ethernet header inside the skb
+ * @nc_packet: structure containing the packet to the skb can be coded with
+ * @neigh_node: next hop to forward packet to
+ *
+ * Returns true if both packets are consumed, false otherwise.
+ */
+static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ struct ethhdr *ethhdr,
+ struct batadv_nc_packet *nc_packet,
+ struct batadv_neigh_node *neigh_node)
+{
+ uint8_t tq_weighted_neigh, tq_weighted_coding;
+ struct sk_buff *skb_dest, *skb_src;
+ struct batadv_unicast_packet *packet1;
+ struct batadv_unicast_packet *packet2;
+ struct batadv_coded_packet *coded_packet;
+ struct batadv_neigh_node *neigh_tmp, *router_neigh;
+ struct batadv_neigh_node *router_coding = NULL;
+ uint8_t *first_source, *first_dest, *second_source, *second_dest;
+ __be32 packet_id1, packet_id2;
+ size_t count;
+ bool res = false;
+ int coding_len;
+ int unicast_size = sizeof(*packet1);
+ int coded_size = sizeof(*coded_packet);
+ int header_add = coded_size - unicast_size;
+
+ router_neigh = batadv_orig_node_get_router(neigh_node->orig_node);
+ if (!router_neigh)
+ goto out;
+
+ neigh_tmp = nc_packet->neigh_node;
+ router_coding = batadv_orig_node_get_router(neigh_tmp->orig_node);
+ if (!router_coding)
+ goto out;
+
+ tq_weighted_neigh = batadv_nc_random_weight_tq(router_neigh->tq_avg);
+ tq_weighted_coding = batadv_nc_random_weight_tq(router_coding->tq_avg);
+
+ /* Select one destination for the MAC-header dst-field based on
+ * weighted TQ-values.
+ */
+ if (tq_weighted_neigh >= tq_weighted_coding) {
+ /* Destination from nc_packet is selected for MAC-header */
+ first_dest = nc_packet->nc_path->next_hop;
+ first_source = nc_packet->nc_path->prev_hop;
+ second_dest = neigh_node->addr;
+ second_source = ethhdr->h_source;
+ packet1 = (struct batadv_unicast_packet *)nc_packet->skb->data;
+ packet2 = (struct batadv_unicast_packet *)skb->data;
+ packet_id1 = nc_packet->packet_id;
+ packet_id2 = batadv_skb_crc32(skb,
+ skb->data + sizeof(*packet2));
+ } else {
+ /* Destination for skb is selected for MAC-header */
+ first_dest = neigh_node->addr;
+ first_source = ethhdr->h_source;
+ second_dest = nc_packet->nc_path->next_hop;
+ second_source = nc_packet->nc_path->prev_hop;
+ packet1 = (struct batadv_unicast_packet *)skb->data;
+ packet2 = (struct batadv_unicast_packet *)nc_packet->skb->data;
+ packet_id1 = batadv_skb_crc32(skb,
+ skb->data + sizeof(*packet1));
+ packet_id2 = nc_packet->packet_id;
+ }
+
+ /* Instead of zero padding the smallest data buffer, we
+ * code into the largest.
+ */
+ if (skb->len <= nc_packet->skb->len) {
+ skb_dest = nc_packet->skb;
+ skb_src = skb;
+ } else {
+ skb_dest = skb;
+ skb_src = nc_packet->skb;
+ }
+
+ /* coding_len is used when decoding the packet shorter packet */
+ coding_len = skb_src->len - unicast_size;
+
+ if (skb_linearize(skb_dest) < 0 || skb_linearize(skb_src) < 0)
+ goto out;
+
+ skb_push(skb_dest, header_add);
+
+ coded_packet = (struct batadv_coded_packet *)skb_dest->data;
+ skb_reset_mac_header(skb_dest);
+
+ coded_packet->header.packet_type = BATADV_CODED;
+ coded_packet->header.version = BATADV_COMPAT_VERSION;
+ coded_packet->header.ttl = packet1->header.ttl;
+
+ /* Info about first unicast packet */
+ memcpy(coded_packet->first_source, first_source, ETH_ALEN);
+ memcpy(coded_packet->first_orig_dest, packet1->dest, ETH_ALEN);
+ coded_packet->first_crc = packet_id1;
+ coded_packet->first_ttvn = packet1->ttvn;
+
+ /* Info about second unicast packet */
+ memcpy(coded_packet->second_dest, second_dest, ETH_ALEN);
+ memcpy(coded_packet->second_source, second_source, ETH_ALEN);
+ memcpy(coded_packet->second_orig_dest, packet2->dest, ETH_ALEN);
+ coded_packet->second_crc = packet_id2;
+ coded_packet->second_ttl = packet2->header.ttl;
+ coded_packet->second_ttvn = packet2->ttvn;
+ coded_packet->coded_len = htons(coding_len);
+
+ /* This is where the magic happens: Code skb_src into skb_dest */
+ batadv_nc_memxor(skb_dest->data + coded_size,
+ skb_src->data + unicast_size, coding_len);
+
+ /* Update counters accordingly */
+ if (BATADV_SKB_CB(skb_src)->decoded &&
+ BATADV_SKB_CB(skb_dest)->decoded) {
+ /* Both packets are recoded */
+ count = skb_src->len + ETH_HLEN;
+ count += skb_dest->len + ETH_HLEN;
+ batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE, 2);
+ batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE_BYTES, count);
+ } else if (!BATADV_SKB_CB(skb_src)->decoded &&
+ !BATADV_SKB_CB(skb_dest)->decoded) {
+ /* Both packets are newly coded */
+ count = skb_src->len + ETH_HLEN;
+ count += skb_dest->len + ETH_HLEN;
+ batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE, 2);
+ batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE_BYTES, count);
+ } else if (BATADV_SKB_CB(skb_src)->decoded &&
+ !BATADV_SKB_CB(skb_dest)->decoded) {
+ /* skb_src recoded and skb_dest is newly coded */
+ batadv_inc_counter(bat_priv, BATADV_CNT_NC_RECODE);
+ batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE_BYTES,
+ skb_src->len + ETH_HLEN);
+ batadv_inc_counter(bat_priv, BATADV_CNT_NC_CODE);
+ batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE_BYTES,
+ skb_dest->len + ETH_HLEN);
+ } else if (!BATADV_SKB_CB(skb_src)->decoded &&
+ BATADV_SKB_CB(skb_dest)->decoded) {
+ /* skb_src is newly coded and skb_dest is recoded */
+ batadv_inc_counter(bat_priv, BATADV_CNT_NC_CODE);
+ batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE_BYTES,
+ skb_src->len + ETH_HLEN);
+ batadv_inc_counter(bat_priv, BATADV_CNT_NC_RECODE);
+ batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE_BYTES,
+ skb_dest->len + ETH_HLEN);
+ }
+
+ /* skb_src is now coded into skb_dest, so free it */
+ kfree_skb(skb_src);
+
+ /* avoid duplicate free of skb from nc_packet */
+ nc_packet->skb = NULL;
+ batadv_nc_packet_free(nc_packet);
+
+ /* Send the coded packet and return true */
+ batadv_send_skb_packet(skb_dest, neigh_node->if_incoming, first_dest);
+ res = true;
+out:
+ if (router_neigh)
+ batadv_neigh_node_free_ref(router_neigh);
+ if (router_coding)
+ batadv_neigh_node_free_ref(router_coding);
+ return res;
+}
+
+/**
+ * batadv_nc_skb_coding_possible - true if a decoded skb is available at dst.
+ * @skb: data skb to forward
+ * @dst: destination mac address of the other skb to code with
+ * @src: source mac address of skb
+ *
+ * Whenever we network code a packet we have to check whether we received it in
+ * a network coded form. If so, we may not be able to use it for coding because
+ * some neighbors may also have received (overheard) the packet in the network
+ * coded form without being able to decode it. It is hard to know which of the
+ * neighboring nodes was able to decode the packet, therefore we can only
+ * re-code the packet if the source of the previous encoded packet is involved.
+ * Since the source encoded the packet we can be certain it has all necessary
+ * decode information.
+ *
+ * Returns true if coding of a decoded packet is allowed.
+ */
+static bool batadv_nc_skb_coding_possible(struct sk_buff *skb,
+ uint8_t *dst, uint8_t *src)
+{
+ if (BATADV_SKB_CB(skb)->decoded && !batadv_compare_eth(dst, src))
+ return false;
+ else
+ return true;
+}
+
+/**
+ * batadv_nc_path_search - Find the coding path matching in_nc_node and
+ * out_nc_node to retrieve a buffered packet that can be used for coding.
+ * @bat_priv: the bat priv with all the soft interface information
+ * @in_nc_node: pointer to skb next hop's neighbor nc node
+ * @out_nc_node: pointer to skb source's neighbor nc node
+ * @skb: data skb to forward
+ * @eth_dst: next hop mac address of skb
+ *
+ * Returns true if coding of a decoded skb is allowed.
+ */
+static struct batadv_nc_packet *
+batadv_nc_path_search(struct batadv_priv *bat_priv,
+ struct batadv_nc_node *in_nc_node,
+ struct batadv_nc_node *out_nc_node,
+ struct sk_buff *skb,
+ uint8_t *eth_dst)
+{
+ struct batadv_nc_path *nc_path, nc_path_key;
+ struct batadv_nc_packet *nc_packet_out = NULL;
+ struct batadv_nc_packet *nc_packet, *nc_packet_tmp;
+ struct batadv_hashtable *hash = bat_priv->nc.coding_hash;
+ int idx;
+
+ if (!hash)
+ return NULL;
+
+ /* Create almost path key */
+ batadv_nc_hash_key_gen(&nc_path_key, in_nc_node->addr,
+ out_nc_node->addr);
+ idx = batadv_nc_hash_choose(&nc_path_key, hash->size);
+
+ /* Check for coding opportunities in this nc_path */
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(nc_path, &hash->table[idx], hash_entry) {
+ if (!batadv_compare_eth(nc_path->prev_hop, in_nc_node->addr))
+ continue;
+
+ if (!batadv_compare_eth(nc_path->next_hop, out_nc_node->addr))
+ continue;
+
+ spin_lock_bh(&nc_path->packet_list_lock);
+ if (list_empty(&nc_path->packet_list)) {
+ spin_unlock_bh(&nc_path->packet_list_lock);
+ continue;
+ }
+
+ list_for_each_entry_safe(nc_packet, nc_packet_tmp,
+ &nc_path->packet_list, list) {
+ if (!batadv_nc_skb_coding_possible(nc_packet->skb,
+ eth_dst,
+ in_nc_node->addr))
+ continue;
+
+ /* Coding opportunity is found! */
+ list_del(&nc_packet->list);
+ nc_packet_out = nc_packet;
+ break;
+ }
+
+ spin_unlock_bh(&nc_path->packet_list_lock);
+ break;
+ }
+ rcu_read_unlock();
+
+ return nc_packet_out;
+}
+
+/**
+ * batadv_nc_skb_src_search - Loops through the list of neighoring nodes of the
+ * skb's sender (may be equal to the originator).
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: data skb to forward
+ * @eth_dst: next hop mac address of skb
+ * @eth_src: source mac address of skb
+ * @in_nc_node: pointer to skb next hop's neighbor nc node
+ *
+ * Returns an nc packet if a suitable coding packet was found, NULL otherwise.
+ */
+static struct batadv_nc_packet *
+batadv_nc_skb_src_search(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ uint8_t *eth_dst,
+ uint8_t *eth_src,
+ struct batadv_nc_node *in_nc_node)
+{
+ struct batadv_orig_node *orig_node;
+ struct batadv_nc_node *out_nc_node;
+ struct batadv_nc_packet *nc_packet = NULL;
+
+ orig_node = batadv_orig_hash_find(bat_priv, eth_src);
+ if (!orig_node)
+ return NULL;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(out_nc_node,
+ &orig_node->out_coding_list, list) {
+ /* Check if the skb is decoded and if recoding is possible */
+ if (!batadv_nc_skb_coding_possible(skb,
+ out_nc_node->addr, eth_src))
+ continue;
+
+ /* Search for an opportunity in this nc_path */
+ nc_packet = batadv_nc_path_search(bat_priv, in_nc_node,
+ out_nc_node, skb, eth_dst);
+ if (nc_packet)
+ break;
+ }
+ rcu_read_unlock();
+
+ batadv_orig_node_free_ref(orig_node);
+ return nc_packet;
+}
+
+/**
+ * batadv_nc_skb_store_before_coding - set the ethernet src and dst of the
+ * unicast skb before it is stored for use in later decoding
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: data skb to store
+ * @eth_dst_new: new destination mac address of skb
+ */
+static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ uint8_t *eth_dst_new)
+{
+ struct ethhdr *ethhdr;
+
+ /* Copy skb header to change the mac header */
+ skb = pskb_copy(skb, GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ /* Set the mac header as if we actually sent the packet uncoded */
+ ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ memcpy(ethhdr->h_source, ethhdr->h_dest, ETH_ALEN);
+ memcpy(ethhdr->h_dest, eth_dst_new, ETH_ALEN);
+
+ /* Set data pointer to MAC header to mimic packets from our tx path */
+ skb_push(skb, ETH_HLEN);
+
+ /* Add the packet to the decoding packet pool */
+ batadv_nc_skb_store_for_decoding(bat_priv, skb);
+
+ /* batadv_nc_skb_store_for_decoding() clones the skb, so we must free
+ * our ref
+ */
+ kfree_skb(skb);
+}
+
+/**
+ * batadv_nc_skb_dst_search - Loops through list of neighboring nodes to dst.
+ * @skb: data skb to forward
+ * @neigh_node: next hop to forward packet to
+ * @ethhdr: pointer to the ethernet header inside the skb
+ *
+ * Loops through list of neighboring nodes the next hop has a good connection to
+ * (receives OGMs with a sufficient quality). We need to find a neighbor of our
+ * next hop that potentially sent a packet which our next hop also received
+ * (overheard) and has stored for later decoding.
+ *
+ * Returns true if the skb was consumed (encoded packet sent) or false otherwise
+ */
+static bool batadv_nc_skb_dst_search(struct sk_buff *skb,
+ struct batadv_neigh_node *neigh_node,
+ struct ethhdr *ethhdr)
+{
+ struct net_device *netdev = neigh_node->if_incoming->soft_iface;
+ struct batadv_priv *bat_priv = netdev_priv(netdev);
+ struct batadv_orig_node *orig_node = neigh_node->orig_node;
+ struct batadv_nc_node *nc_node;
+ struct batadv_nc_packet *nc_packet = NULL;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(nc_node, &orig_node->in_coding_list, list) {
+ /* Search for coding opportunity with this in_nc_node */
+ nc_packet = batadv_nc_skb_src_search(bat_priv, skb,
+ neigh_node->addr,
+ ethhdr->h_source, nc_node);
+
+ /* Opportunity was found, so stop searching */
+ if (nc_packet)
+ break;
+ }
+ rcu_read_unlock();
+
+ if (!nc_packet)
+ return false;
+
+ /* Save packets for later decoding */
+ batadv_nc_skb_store_before_coding(bat_priv, skb,
+ neigh_node->addr);
+ batadv_nc_skb_store_before_coding(bat_priv, nc_packet->skb,
+ nc_packet->neigh_node->addr);
+
+ /* Code and send packets */
+ if (batadv_nc_code_packets(bat_priv, skb, ethhdr, nc_packet,
+ neigh_node))
+ return true;
+
+ /* out of mem ? Coding failed - we have to free the buffered packet
+ * to avoid memleaks. The skb passed as argument will be dealt with
+ * by the calling function.
+ */
+ batadv_nc_send_packet(nc_packet);
+ return false;
+}
+
+/**
+ * batadv_nc_skb_add_to_path - buffer skb for later encoding / decoding
+ * @skb: skb to add to path
+ * @nc_path: path to add skb to
+ * @neigh_node: next hop to forward packet to
+ * @packet_id: checksum to identify packet
+ *
+ * Returns true if the packet was buffered or false in case of an error.
+ */
+static bool batadv_nc_skb_add_to_path(struct sk_buff *skb,
+ struct batadv_nc_path *nc_path,
+ struct batadv_neigh_node *neigh_node,
+ __be32 packet_id)
+{
+ struct batadv_nc_packet *nc_packet;
+
+ nc_packet = kzalloc(sizeof(*nc_packet), GFP_ATOMIC);
+ if (!nc_packet)
+ return false;
+
+ /* Initialize nc_packet */
+ nc_packet->timestamp = jiffies;
+ nc_packet->packet_id = packet_id;
+ nc_packet->skb = skb;
+ nc_packet->neigh_node = neigh_node;
+ nc_packet->nc_path = nc_path;
+
+ /* Add coding packet to list */
+ spin_lock_bh(&nc_path->packet_list_lock);
+ list_add_tail(&nc_packet->list, &nc_path->packet_list);
+ spin_unlock_bh(&nc_path->packet_list_lock);
+
+ return true;
+}
+
+/**
+ * batadv_nc_skb_forward - try to code a packet or add it to the coding packet
+ * buffer
+ * @skb: data skb to forward
+ * @neigh_node: next hop to forward packet to
+ * @ethhdr: pointer to the ethernet header inside the skb
+ *
+ * Returns true if the skb was consumed (encoded packet sent) or false otherwise
+ */
+bool batadv_nc_skb_forward(struct sk_buff *skb,
+ struct batadv_neigh_node *neigh_node,
+ struct ethhdr *ethhdr)
+{
+ const struct net_device *netdev = neigh_node->if_incoming->soft_iface;
+ struct batadv_priv *bat_priv = netdev_priv(netdev);
+ struct batadv_unicast_packet *packet;
+ struct batadv_nc_path *nc_path;
+ __be32 packet_id;
+ u8 *payload;
+
+ /* Check if network coding is enabled */
+ if (!atomic_read(&bat_priv->network_coding))
+ goto out;
+
+ /* We only handle unicast packets */
+ payload = skb_network_header(skb);
+ packet = (struct batadv_unicast_packet *)payload;
+ if (packet->header.packet_type != BATADV_UNICAST)
+ goto out;
+
+ /* Try to find a coding opportunity and send the skb if one is found */
+ if (batadv_nc_skb_dst_search(skb, neigh_node, ethhdr))
+ return true;
+
+ /* Find or create a nc_path for this src-dst pair */
+ nc_path = batadv_nc_get_path(bat_priv,
+ bat_priv->nc.coding_hash,
+ ethhdr->h_source,
+ neigh_node->addr);
+
+ if (!nc_path)
+ goto out;
+
+ /* Add skb to nc_path */
+ packet_id = batadv_skb_crc32(skb, payload + sizeof(*packet));
+ if (!batadv_nc_skb_add_to_path(skb, nc_path, neigh_node, packet_id))
+ goto free_nc_path;
+
+ /* Packet is consumed */
+ return true;
+
+free_nc_path:
+ batadv_nc_path_free_ref(nc_path);
+out:
+ /* Packet is not consumed */
+ return false;
+}
+
+/**
+ * batadv_nc_skb_store_for_decoding - save a clone of the skb which can be used
+ * when decoding coded packets
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: data skb to store
+ */
+void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ struct batadv_unicast_packet *packet;
+ struct batadv_nc_path *nc_path;
+ struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ __be32 packet_id;
+ u8 *payload;
+
+ /* Check if network coding is enabled */
+ if (!atomic_read(&bat_priv->network_coding))
+ goto out;
+
+ /* Check for supported packet type */
+ payload = skb_network_header(skb);
+ packet = (struct batadv_unicast_packet *)payload;
+ if (packet->header.packet_type != BATADV_UNICAST)
+ goto out;
+
+ /* Find existing nc_path or create a new */
+ nc_path = batadv_nc_get_path(bat_priv,
+ bat_priv->nc.decoding_hash,
+ ethhdr->h_source,
+ ethhdr->h_dest);
+
+ if (!nc_path)
+ goto out;
+
+ /* Clone skb and adjust skb->data to point at batman header */
+ skb = skb_clone(skb, GFP_ATOMIC);
+ if (unlikely(!skb))
+ goto free_nc_path;
+
+ if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
+ goto free_skb;
+
+ if (unlikely(!skb_pull_rcsum(skb, ETH_HLEN)))
+ goto free_skb;
+
+ /* Add skb to nc_path */
+ packet_id = batadv_skb_crc32(skb, payload + sizeof(*packet));
+ if (!batadv_nc_skb_add_to_path(skb, nc_path, NULL, packet_id))
+ goto free_skb;
+
+ batadv_inc_counter(bat_priv, BATADV_CNT_NC_BUFFER);
+ return;
+
+free_skb:
+ kfree_skb(skb);
+free_nc_path:
+ batadv_nc_path_free_ref(nc_path);
+out:
+ return;
+}
+
+/**
+ * batadv_nc_skb_store_sniffed_unicast - check if a received unicast packet
+ * should be saved in the decoding buffer and, if so, store it there
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: unicast skb to store
+ */
+void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb);
+
+ if (batadv_is_my_mac(bat_priv, ethhdr->h_dest))
+ return;
+
+ /* Set data pointer to MAC header to mimic packets from our tx path */
+ skb_push(skb, ETH_HLEN);
+
+ batadv_nc_skb_store_for_decoding(bat_priv, skb);
+}
+
+/**
+ * batadv_nc_skb_decode_packet - decode given skb using the decode data stored
+ * in nc_packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: unicast skb to decode
+ * @nc_packet: decode data needed to decode the skb
+ *
+ * Returns pointer to decoded unicast packet if the packet was decoded or NULL
+ * in case of an error.
+ */
+static struct batadv_unicast_packet *
+batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ struct batadv_nc_packet *nc_packet)
+{
+ const int h_size = sizeof(struct batadv_unicast_packet);
+ const int h_diff = sizeof(struct batadv_coded_packet) - h_size;
+ struct batadv_unicast_packet *unicast_packet;
+ struct batadv_coded_packet coded_packet_tmp;
+ struct ethhdr *ethhdr, ethhdr_tmp;
+ uint8_t *orig_dest, ttl, ttvn;
+ unsigned int coding_len;
+ int err;
+
+ /* Save headers temporarily */
+ memcpy(&coded_packet_tmp, skb->data, sizeof(coded_packet_tmp));
+ memcpy(&ethhdr_tmp, skb_mac_header(skb), sizeof(ethhdr_tmp));
+
+ if (skb_cow(skb, 0) < 0)
+ return NULL;
+
+ if (unlikely(!skb_pull_rcsum(skb, h_diff)))
+ return NULL;
+
+ /* Data points to batman header, so set mac header 14 bytes before
+ * and network to data
+ */
+ skb_set_mac_header(skb, -ETH_HLEN);
+ skb_reset_network_header(skb);
+
+ /* Reconstruct original mac header */
+ ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ memcpy(ethhdr, &ethhdr_tmp, sizeof(*ethhdr));
+
+ /* Select the correct unicast header information based on the location
+ * of our mac address in the coded_packet header
+ */
+ if (batadv_is_my_mac(bat_priv, coded_packet_tmp.second_dest)) {
+ /* If we are the second destination the packet was overheard,
+ * so the Ethernet address must be copied to h_dest and
+ * pkt_type changed from PACKET_OTHERHOST to PACKET_HOST
+ */
+ memcpy(ethhdr->h_dest, coded_packet_tmp.second_dest, ETH_ALEN);
+ skb->pkt_type = PACKET_HOST;
+
+ orig_dest = coded_packet_tmp.second_orig_dest;
+ ttl = coded_packet_tmp.second_ttl;
+ ttvn = coded_packet_tmp.second_ttvn;
+ } else {
+ orig_dest = coded_packet_tmp.first_orig_dest;
+ ttl = coded_packet_tmp.header.ttl;
+ ttvn = coded_packet_tmp.first_ttvn;
+ }
+
+ coding_len = ntohs(coded_packet_tmp.coded_len);
+
+ if (coding_len > skb->len)
+ return NULL;
+
+ /* Here the magic is reversed:
+ * extract the missing packet from the received coded packet
+ */
+ batadv_nc_memxor(skb->data + h_size,
+ nc_packet->skb->data + h_size,
+ coding_len);
+
+ /* Resize decoded skb if decoded with larger packet */
+ if (nc_packet->skb->len > coding_len + h_size) {
+ err = pskb_trim_rcsum(skb, coding_len + h_size);
+ if (err)
+ return NULL;
+ }
+
+ /* Create decoded unicast packet */
+ unicast_packet = (struct batadv_unicast_packet *)skb->data;
+ unicast_packet->header.packet_type = BATADV_UNICAST;
+ unicast_packet->header.version = BATADV_COMPAT_VERSION;
+ unicast_packet->header.ttl = ttl;
+ memcpy(unicast_packet->dest, orig_dest, ETH_ALEN);
+ unicast_packet->ttvn = ttvn;
+
+ batadv_nc_packet_free(nc_packet);
+ return unicast_packet;
+}
+
+/**
+ * batadv_nc_find_decoding_packet - search through buffered decoding data to
+ * find the data needed to decode the coded packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @ethhdr: pointer to the ethernet header inside the coded packet
+ * @coded: coded packet we try to find decode data for
+ *
+ * Returns pointer to nc packet if the needed data was found or NULL otherwise.
+ */
+static struct batadv_nc_packet *
+batadv_nc_find_decoding_packet(struct batadv_priv *bat_priv,
+ struct ethhdr *ethhdr,
+ struct batadv_coded_packet *coded)
+{
+ struct batadv_hashtable *hash = bat_priv->nc.decoding_hash;
+ struct batadv_nc_packet *tmp_nc_packet, *nc_packet = NULL;
+ struct batadv_nc_path *nc_path, nc_path_key;
+ uint8_t *dest, *source;
+ __be32 packet_id;
+ int index;
+
+ if (!hash)
+ return NULL;
+
+ /* Select the correct packet id based on the location of our mac-addr */
+ dest = ethhdr->h_source;
+ if (!batadv_is_my_mac(bat_priv, coded->second_dest)) {
+ source = coded->second_source;
+ packet_id = coded->second_crc;
+ } else {
+ source = coded->first_source;
+ packet_id = coded->first_crc;
+ }
+
+ batadv_nc_hash_key_gen(&nc_path_key, source, dest);
+ index = batadv_nc_hash_choose(&nc_path_key, hash->size);
+
+ /* Search for matching coding path */
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(nc_path, &hash->table[index], hash_entry) {
+ /* Find matching nc_packet */
+ spin_lock_bh(&nc_path->packet_list_lock);
+ list_for_each_entry(tmp_nc_packet,
+ &nc_path->packet_list, list) {
+ if (packet_id == tmp_nc_packet->packet_id) {
+ list_del(&tmp_nc_packet->list);
+
+ nc_packet = tmp_nc_packet;
+ break;
+ }
+ }
+ spin_unlock_bh(&nc_path->packet_list_lock);
+
+ if (nc_packet)
+ break;
+ }
+ rcu_read_unlock();
+
+ if (!nc_packet)
+ batadv_dbg(BATADV_DBG_NC, bat_priv,
+ "No decoding packet found for %u\n", packet_id);
+
+ return nc_packet;
+}
+
+/**
+ * batadv_nc_recv_coded_packet - try to decode coded packet and enqueue the
+ * resulting unicast packet
+ * @skb: incoming coded packet
+ * @recv_if: pointer to interface this packet was received on
+ */
+static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
+ struct batadv_hard_iface *recv_if)
+{
+ struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
+ struct batadv_unicast_packet *unicast_packet;
+ struct batadv_coded_packet *coded_packet;
+ struct batadv_nc_packet *nc_packet;
+ struct ethhdr *ethhdr;
+ int hdr_size = sizeof(*coded_packet);
+
+ /* Check if network coding is enabled */
+ if (!atomic_read(&bat_priv->network_coding))
+ return NET_RX_DROP;
+
+ /* Make sure we can access (and remove) header */
+ if (unlikely(!pskb_may_pull(skb, hdr_size)))
+ return NET_RX_DROP;
+
+ coded_packet = (struct batadv_coded_packet *)skb->data;
+ ethhdr = (struct ethhdr *)skb_mac_header(skb);
+
+ /* Verify frame is destined for us */
+ if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest) &&
+ !batadv_is_my_mac(bat_priv, coded_packet->second_dest))
+ return NET_RX_DROP;
+
+ /* Update stat counter */
+ if (batadv_is_my_mac(bat_priv, coded_packet->second_dest))
+ batadv_inc_counter(bat_priv, BATADV_CNT_NC_SNIFFED);
+
+ nc_packet = batadv_nc_find_decoding_packet(bat_priv, ethhdr,
+ coded_packet);
+ if (!nc_packet) {
+ batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE_FAILED);
+ return NET_RX_DROP;
+ }
+
+ /* Make skb's linear, because decoding accesses the entire buffer */
+ if (skb_linearize(skb) < 0)
+ goto free_nc_packet;
+
+ if (skb_linearize(nc_packet->skb) < 0)
+ goto free_nc_packet;
+
+ /* Decode the packet */
+ unicast_packet = batadv_nc_skb_decode_packet(bat_priv, skb, nc_packet);
+ if (!unicast_packet) {
+ batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE_FAILED);
+ goto free_nc_packet;
+ }
+
+ /* Mark packet as decoded to do correct recoding when forwarding */
+ BATADV_SKB_CB(skb)->decoded = true;
+ batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE);
+ batadv_add_counter(bat_priv, BATADV_CNT_NC_DECODE_BYTES,
+ skb->len + ETH_HLEN);
+ return batadv_recv_unicast_packet(skb, recv_if);
+
+free_nc_packet:
+ batadv_nc_packet_free(nc_packet);
+ return NET_RX_DROP;
+}
+
+/**
+ * batadv_nc_free - clean up network coding memory
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+void batadv_nc_free(struct batadv_priv *bat_priv)
+{
+ batadv_recv_handler_unregister(BATADV_CODED);
+ cancel_delayed_work_sync(&bat_priv->nc.work);
+
+ batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash, NULL);
+ batadv_hash_destroy(bat_priv->nc.coding_hash);
+ batadv_nc_purge_paths(bat_priv, bat_priv->nc.decoding_hash, NULL);
+ batadv_hash_destroy(bat_priv->nc.decoding_hash);
+}
+
+/**
+ * batadv_nc_nodes_seq_print_text - print the nc node information
+ * @seq: seq file to print on
+ * @offset: not used
+ */
+int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset)
+{
+ struct net_device *net_dev = (struct net_device *)seq->private;
+ struct batadv_priv *bat_priv = netdev_priv(net_dev);
+ struct batadv_hashtable *hash = bat_priv->orig_hash;
+ struct batadv_hard_iface *primary_if;
+ struct hlist_head *head;
+ struct batadv_orig_node *orig_node;
+ struct batadv_nc_node *nc_node;
+ int i;
+
+ primary_if = batadv_seq_print_text_primary_if_get(seq);
+ if (!primary_if)
+ goto out;
+
+ /* Traverse list of originators */
+ for (i = 0; i < hash->size; i++) {
+ head = &hash->table[i];
+
+ /* For each orig_node in this bin */
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+ seq_printf(seq, "Node: %pM\n", orig_node->orig);
+
+ seq_puts(seq, " Ingoing: ");
+ /* For each in_nc_node to this orig_node */
+ list_for_each_entry_rcu(nc_node,
+ &orig_node->in_coding_list,
+ list)
+ seq_printf(seq, "%pM ",
+ nc_node->addr);
+ seq_puts(seq, "\n");
+
+ seq_puts(seq, " Outgoing: ");
+ /* For out_nc_node to this orig_node */
+ list_for_each_entry_rcu(nc_node,
+ &orig_node->out_coding_list,
+ list)
+ seq_printf(seq, "%pM ",
+ nc_node->addr);
+ seq_puts(seq, "\n\n");
+ }
+ rcu_read_unlock();
+ }
+
+out:
+ if (primary_if)
+ batadv_hardif_free_ref(primary_if);
+ return 0;
+}
+
+/**
+ * batadv_nc_init_debugfs - create nc folder and related files in debugfs
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+int batadv_nc_init_debugfs(struct batadv_priv *bat_priv)
+{
+ struct dentry *nc_dir, *file;
+
+ nc_dir = debugfs_create_dir("nc", bat_priv->debug_dir);
+ if (!nc_dir)
+ goto out;
+
+ file = debugfs_create_u8("min_tq", S_IRUGO | S_IWUSR, nc_dir,
+ &bat_priv->nc.min_tq);
+ if (!file)
+ goto out;
+
+ file = debugfs_create_u32("max_fwd_delay", S_IRUGO | S_IWUSR, nc_dir,
+ &bat_priv->nc.max_fwd_delay);
+ if (!file)
+ goto out;
+
+ file = debugfs_create_u32("max_buffer_time", S_IRUGO | S_IWUSR, nc_dir,
+ &bat_priv->nc.max_buffer_time);
+ if (!file)
+ goto out;
+
+ return 0;
+
+out:
+ return -ENOMEM;
+}
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
new file mode 100644
index 000000000000..4fa6d0caddbd
--- /dev/null
+++ b/net/batman-adv/network-coding.h
@@ -0,0 +1,123 @@
+/* Copyright (C) 2012-2013 B.A.T.M.A.N. contributors:
+ *
+ * Martin Hundebøll, Jeppe Ledet-Pedersen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef _NET_BATMAN_ADV_NETWORK_CODING_H_
+#define _NET_BATMAN_ADV_NETWORK_CODING_H_
+
+#ifdef CONFIG_BATMAN_ADV_NC
+
+int batadv_nc_init(struct batadv_priv *bat_priv);
+void batadv_nc_free(struct batadv_priv *bat_priv);
+void batadv_nc_update_nc_node(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig_node,
+ struct batadv_orig_node *orig_neigh_node,
+ struct batadv_ogm_packet *ogm_packet,
+ int is_single_hop_neigh);
+void batadv_nc_purge_orig(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig_node,
+ bool (*to_purge)(struct batadv_priv *,
+ struct batadv_nc_node *));
+void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv);
+void batadv_nc_init_orig(struct batadv_orig_node *orig_node);
+bool batadv_nc_skb_forward(struct sk_buff *skb,
+ struct batadv_neigh_node *neigh_node,
+ struct ethhdr *ethhdr);
+void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv,
+ struct sk_buff *skb);
+void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv,
+ struct sk_buff *skb);
+int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_nc_init_debugfs(struct batadv_priv *bat_priv);
+
+#else /* ifdef CONFIG_BATMAN_ADV_NC */
+
+static inline int batadv_nc_init(struct batadv_priv *bat_priv)
+{
+ return 0;
+}
+
+static inline void batadv_nc_free(struct batadv_priv *bat_priv)
+{
+ return;
+}
+
+static inline void
+batadv_nc_update_nc_node(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig_node,
+ struct batadv_orig_node *orig_neigh_node,
+ struct batadv_ogm_packet *ogm_packet,
+ int is_single_hop_neigh)
+{
+ return;
+}
+
+static inline void
+batadv_nc_purge_orig(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig_node,
+ bool (*to_purge)(struct batadv_priv *,
+ struct batadv_nc_node *))
+{
+ return;
+}
+
+static inline void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv)
+{
+ return;
+}
+
+static inline void batadv_nc_init_orig(struct batadv_orig_node *orig_node)
+{
+ return;
+}
+
+static inline bool batadv_nc_skb_forward(struct sk_buff *skb,
+ struct batadv_neigh_node *neigh_node,
+ struct ethhdr *ethhdr)
+{
+ return false;
+}
+
+static inline void
+batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ return;
+}
+
+static inline void
+batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ return;
+}
+
+static inline int batadv_nc_nodes_seq_print_text(struct seq_file *seq,
+ void *offset)
+{
+ return 0;
+}
+
+static inline int batadv_nc_init_debugfs(struct batadv_priv *bat_priv)
+{
+ return 0;
+}
+
+#endif /* ifdef CONFIG_BATMAN_ADV_NC */
+
+#endif /* _NET_BATMAN_ADV_NETWORK_CODING_H_ */
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 96fb80b724dc..fad1a2093e15 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -28,6 +28,7 @@
#include "unicast.h"
#include "soft-interface.h"
#include "bridge_loop_avoidance.h"
+#include "network-coding.h"
/* hash class keys */
static struct lock_class_key batadv_orig_hash_lock_class_key;
@@ -142,6 +143,9 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
spin_unlock_bh(&orig_node->neigh_list_lock);
+ /* Free nc_nodes */
+ batadv_nc_purge_orig(orig_node->bat_priv, orig_node, NULL);
+
batadv_frag_list_free(&orig_node->frag_list);
batadv_tt_global_del_orig(orig_node->bat_priv, orig_node,
"originator timed out");
@@ -152,12 +156,28 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
kfree(orig_node);
}
+/**
+ * batadv_orig_node_free_ref - decrement the orig node refcounter and possibly
+ * schedule an rcu callback for freeing it
+ * @orig_node: the orig node to free
+ */
void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node)
{
if (atomic_dec_and_test(&orig_node->refcount))
call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu);
}
+/**
+ * batadv_orig_node_free_ref_now - decrement the orig node refcounter and
+ * possibly free it (without rcu callback)
+ * @orig_node: the orig node to free
+ */
+void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node)
+{
+ if (atomic_dec_and_test(&orig_node->refcount))
+ batadv_orig_node_free_rcu(&orig_node->rcu);
+}
+
void batadv_originator_free(struct batadv_priv *bat_priv)
{
struct batadv_hashtable *hash = bat_priv->orig_hash;
@@ -219,6 +239,8 @@ struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv,
spin_lock_init(&orig_node->neigh_list_lock);
spin_lock_init(&orig_node->tt_buff_lock);
+ batadv_nc_init_orig(orig_node);
+
/* extra reference for return */
atomic_set(&orig_node->refcount, 2);
@@ -459,7 +481,7 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
neigh_node_tmp->tq_avg);
}
- seq_printf(seq, "\n");
+ seq_puts(seq, "\n");
batman_count++;
next:
@@ -469,7 +491,7 @@ next:
}
if (batman_count == 0)
- seq_printf(seq, "No batman nodes in range ...\n");
+ seq_puts(seq, "No batman nodes in range ...\n");
out:
if (primary_if)
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 7df48fa7669d..734e5a3d8a5b 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -26,6 +26,7 @@ int batadv_originator_init(struct batadv_priv *bat_priv);
void batadv_originator_free(struct batadv_priv *bat_priv);
void batadv_purge_orig_ref(struct batadv_priv *bat_priv);
void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node);
+void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node);
struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv,
const uint8_t *addr);
struct batadv_neigh_node *
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index ed0aa89bbf8b..a51ccfc39da4 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -30,6 +30,7 @@ enum batadv_packettype {
BATADV_TT_QUERY = 0x07,
BATADV_ROAM_ADV = 0x08,
BATADV_UNICAST_4ADDR = 0x09,
+ BATADV_CODED = 0x0a,
};
/**
@@ -278,4 +279,36 @@ struct batadv_tt_change {
uint8_t addr[ETH_ALEN];
} __packed;
+/**
+ * struct batadv_coded_packet - network coded packet
+ * @header: common batman packet header and ttl of first included packet
+ * @reserved: Align following fields to 2-byte boundaries
+ * @first_source: original source of first included packet
+ * @first_orig_dest: original destinal of first included packet
+ * @first_crc: checksum of first included packet
+ * @first_ttvn: tt-version number of first included packet
+ * @second_ttl: ttl of second packet
+ * @second_dest: second receiver of this coded packet
+ * @second_source: original source of second included packet
+ * @second_orig_dest: original destination of second included packet
+ * @second_crc: checksum of second included packet
+ * @second_ttvn: tt version number of second included packet
+ * @coded_len: length of network coded part of the payload
+ */
+struct batadv_coded_packet {
+ struct batadv_header header;
+ uint8_t first_ttvn;
+ /* uint8_t first_dest[ETH_ALEN]; - saved in mac header destination */
+ uint8_t first_source[ETH_ALEN];
+ uint8_t first_orig_dest[ETH_ALEN];
+ __be32 first_crc;
+ uint8_t second_ttl;
+ uint8_t second_ttvn;
+ uint8_t second_dest[ETH_ALEN];
+ uint8_t second_source[ETH_ALEN];
+ uint8_t second_orig_dest[ETH_ALEN];
+ __be32 second_crc;
+ __be16 coded_len;
+};
+
#endif /* _NET_BATMAN_ADV_PACKET_H_ */
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 319f2906c71a..b27a4d792d15 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -29,6 +29,7 @@
#include "unicast.h"
#include "bridge_loop_avoidance.h"
#include "distributed-arp-table.h"
+#include "network-coding.h"
static int batadv_route_unicast_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
@@ -548,6 +549,17 @@ batadv_find_ifalter_router(struct batadv_orig_node *primary_orig,
return router;
}
+/**
+ * batadv_check_unicast_packet - Check for malformed unicast packets
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: packet to check
+ * @hdr_size: size of header to pull
+ *
+ * Check for short header and bad addresses in given packet. Returns negative
+ * value when check fails and 0 otherwise. The negative value depends on the
+ * reason: -ENODATA for bad header, -EBADR for broadcast destination or source,
+ * and -EREMOTE for non-local (other host) destination.
+ */
static int batadv_check_unicast_packet(struct batadv_priv *bat_priv,
struct sk_buff *skb, int hdr_size)
{
@@ -555,21 +567,21 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv,
/* drop packet if it has not necessary minimum size */
if (unlikely(!pskb_may_pull(skb, hdr_size)))
- return -1;
+ return -ENODATA;
ethhdr = (struct ethhdr *)skb_mac_header(skb);
/* packet with unicast indication but broadcast recipient */
if (is_broadcast_ether_addr(ethhdr->h_dest))
- return -1;
+ return -EBADR;
/* packet with broadcast sender address */
if (is_broadcast_ether_addr(ethhdr->h_source))
- return -1;
+ return -EBADR;
/* not for me */
if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest))
- return -1;
+ return -EREMOTE;
return 0;
}
@@ -852,15 +864,18 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
/* decrement ttl */
unicast_packet->header.ttl--;
- /* Update stats counter */
- batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD);
- batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES,
- skb->len + ETH_HLEN);
-
- /* route it */
- if (batadv_send_skb_to_orig(skb, orig_node, recv_if))
+ /* network code packet if possible */
+ if (batadv_nc_skb_forward(skb, neigh_node, ethhdr)) {
+ ret = NET_RX_SUCCESS;
+ } else if (batadv_send_skb_to_orig(skb, orig_node, recv_if)) {
ret = NET_RX_SUCCESS;
+ /* Update stats counter */
+ batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD);
+ batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES,
+ skb->len + ETH_HLEN);
+ }
+
out:
if (neigh_node)
batadv_neigh_node_free_ref(neigh_node);
@@ -924,7 +939,7 @@ out:
}
static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
- struct sk_buff *skb) {
+ struct sk_buff *skb, int hdr_len) {
uint8_t curr_ttvn, old_ttvn;
struct batadv_orig_node *orig_node;
struct ethhdr *ethhdr;
@@ -933,7 +948,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
int is_old_ttvn;
/* check if there is enough data before accessing it */
- if (pskb_may_pull(skb, sizeof(*unicast_packet) + ETH_HLEN) < 0)
+ if (pskb_may_pull(skb, hdr_len + ETH_HLEN) < 0)
return 0;
/* create a copy of the skb (in case of for re-routing) to modify it. */
@@ -941,7 +956,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
return 0;
unicast_packet = (struct batadv_unicast_packet *)skb->data;
- ethhdr = (struct ethhdr *)(skb->data + sizeof(*unicast_packet));
+ ethhdr = (struct ethhdr *)(skb->data + hdr_len);
/* check if the destination client was served by this node and it is now
* roaming. In this case, it means that the node has got a ROAM_ADV
@@ -1035,7 +1050,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
struct batadv_unicast_4addr_packet *unicast_4addr_packet;
uint8_t *orig_addr;
struct batadv_orig_node *orig_node = NULL;
- int hdr_size = sizeof(*unicast_packet);
+ int check, hdr_size = sizeof(*unicast_packet);
bool is4addr;
unicast_packet = (struct batadv_unicast_packet *)skb->data;
@@ -1046,10 +1061,18 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
if (is4addr)
hdr_size = sizeof(*unicast_4addr_packet);
- if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0)
- return NET_RX_DROP;
+ /* function returns -EREMOTE for promiscuous packets */
+ check = batadv_check_unicast_packet(bat_priv, skb, hdr_size);
+
+ /* Even though the packet is not for us, we might save it to use for
+ * decoding a later received coded packet
+ */
+ if (check == -EREMOTE)
+ batadv_nc_skb_store_sniffed_unicast(bat_priv, skb);
- if (!batadv_check_unicast_ttvn(bat_priv, skb))
+ if (check < 0)
+ return NET_RX_DROP;
+ if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size))
return NET_RX_DROP;
/* packet for me */
@@ -1093,7 +1116,7 @@ int batadv_recv_ucast_frag_packet(struct sk_buff *skb,
if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0)
return NET_RX_DROP;
- if (!batadv_check_unicast_ttvn(bat_priv, skb))
+ if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size))
return NET_RX_DROP;
unicast_packet = (struct batadv_unicast_frag_packet *)skb->data;
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index a67cffde37ae..263cfd1ccee7 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -27,6 +27,7 @@
#include "vis.h"
#include "gateway_common.h"
#include "originator.h"
+#include "network-coding.h"
#include <linux/if_ether.h>
@@ -39,6 +40,7 @@ int batadv_send_skb_packet(struct sk_buff *skb,
struct batadv_hard_iface *hard_iface,
const uint8_t *dst_addr)
{
+ struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
struct ethhdr *ethhdr;
if (hard_iface->if_status != BATADV_IF_ACTIVE)
@@ -70,6 +72,9 @@ int batadv_send_skb_packet(struct sk_buff *skb,
skb->dev = hard_iface->net_dev;
+ /* Save a clone of the skb to use when decoding coded packets */
+ batadv_nc_skb_store_for_decoding(bat_priv, skb);
+
/* dev_queue_xmit() returns a negative result on error. However on
* congestion and traffic shaping, it drops and returns NET_XMIT_DROP
* (which is > 0). This will not be treated as an error.
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 2711e870f557..819dfb006cdf 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -37,6 +37,7 @@
#include <linux/if_ether.h>
#include "unicast.h"
#include "bridge_loop_avoidance.h"
+#include "network-coding.h"
static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd);
@@ -401,55 +402,6 @@ static void batadv_set_lockdep_class(struct net_device *dev)
}
/**
- * batadv_softif_init - Late stage initialization of soft interface
- * @dev: registered network device to modify
- *
- * Returns error code on failures
- */
-static int batadv_softif_init(struct net_device *dev)
-{
- batadv_set_lockdep_class(dev);
-
- return 0;
-}
-
-static const struct net_device_ops batadv_netdev_ops = {
- .ndo_init = batadv_softif_init,
- .ndo_open = batadv_interface_open,
- .ndo_stop = batadv_interface_release,
- .ndo_get_stats = batadv_interface_stats,
- .ndo_set_mac_address = batadv_interface_set_mac_addr,
- .ndo_change_mtu = batadv_interface_change_mtu,
- .ndo_start_xmit = batadv_interface_tx,
- .ndo_validate_addr = eth_validate_addr
-};
-
-static void batadv_interface_setup(struct net_device *dev)
-{
- struct batadv_priv *priv = netdev_priv(dev);
-
- ether_setup(dev);
-
- dev->netdev_ops = &batadv_netdev_ops;
- dev->destructor = free_netdev;
- dev->tx_queue_len = 0;
-
- /* can't call min_mtu, because the needed variables
- * have not been initialized yet
- */
- dev->mtu = ETH_DATA_LEN;
- /* reserve more space in the skbuff for our header */
- dev->hard_header_len = BATADV_HEADER_LEN;
-
- /* generate random address */
- eth_hw_addr_random(dev);
-
- SET_ETHTOOL_OPS(dev, &batadv_ethtool_ops);
-
- memset(priv, 0, sizeof(*priv));
-}
-
-/**
* batadv_softif_destroy_finish - cleans up the remains of a softif
* @work: work queue item
*
@@ -465,7 +417,6 @@ static void batadv_softif_destroy_finish(struct work_struct *work)
cleanup_work);
soft_iface = bat_priv->soft_iface;
- batadv_debugfs_del_meshif(soft_iface);
batadv_sysfs_del_meshif(soft_iface);
rtnl_lock();
@@ -473,21 +424,22 @@ static void batadv_softif_destroy_finish(struct work_struct *work)
rtnl_unlock();
}
-struct net_device *batadv_softif_create(const char *name)
+/**
+ * batadv_softif_init_late - late stage initialization of soft interface
+ * @dev: registered network device to modify
+ *
+ * Returns error code on failures
+ */
+static int batadv_softif_init_late(struct net_device *dev)
{
- struct net_device *soft_iface;
struct batadv_priv *bat_priv;
int ret;
size_t cnt_len = sizeof(uint64_t) * BATADV_CNT_NUM;
- soft_iface = alloc_netdev(sizeof(*bat_priv), name,
- batadv_interface_setup);
-
- if (!soft_iface)
- goto out;
+ batadv_set_lockdep_class(dev);
- bat_priv = netdev_priv(soft_iface);
- bat_priv->soft_iface = soft_iface;
+ bat_priv = netdev_priv(dev);
+ bat_priv->soft_iface = dev;
INIT_WORK(&bat_priv->cleanup_work, batadv_softif_destroy_finish);
/* batadv_interface_stats() needs to be available as soon as
@@ -495,14 +447,7 @@ struct net_device *batadv_softif_create(const char *name)
*/
bat_priv->bat_counters = __alloc_percpu(cnt_len, __alignof__(uint64_t));
if (!bat_priv->bat_counters)
- goto free_soft_iface;
-
- ret = register_netdevice(soft_iface);
- if (ret < 0) {
- pr_err("Unable to register the batman interface '%s': %i\n",
- name, ret);
- goto free_bat_counters;
- }
+ return -ENOMEM;
atomic_set(&bat_priv->aggregated_ogms, 1);
atomic_set(&bat_priv->bonding, 0);
@@ -540,49 +485,197 @@ struct net_device *batadv_softif_create(const char *name)
bat_priv->primary_if = NULL;
bat_priv->num_ifaces = 0;
- ret = batadv_algo_select(bat_priv, batadv_routing_algo);
- if (ret < 0)
- goto unreg_soft_iface;
+ batadv_nc_init_bat_priv(bat_priv);
- ret = batadv_sysfs_add_meshif(soft_iface);
+ ret = batadv_algo_select(bat_priv, batadv_routing_algo);
if (ret < 0)
- goto unreg_soft_iface;
+ goto free_bat_counters;
- ret = batadv_debugfs_add_meshif(soft_iface);
+ ret = batadv_debugfs_add_meshif(dev);
if (ret < 0)
- goto unreg_sysfs;
+ goto free_bat_counters;
- ret = batadv_mesh_init(soft_iface);
+ ret = batadv_mesh_init(dev);
if (ret < 0)
goto unreg_debugfs;
- return soft_iface;
+ return 0;
unreg_debugfs:
- batadv_debugfs_del_meshif(soft_iface);
-unreg_sysfs:
- batadv_sysfs_del_meshif(soft_iface);
-unreg_soft_iface:
- free_percpu(bat_priv->bat_counters);
- unregister_netdevice(soft_iface);
- return NULL;
-
+ batadv_debugfs_del_meshif(dev);
free_bat_counters:
free_percpu(bat_priv->bat_counters);
-free_soft_iface:
- free_netdev(soft_iface);
+ bat_priv->bat_counters = NULL;
+
+ return ret;
+}
+
+/**
+ * batadv_softif_slave_add - Add a slave interface to a batadv_soft_interface
+ * @dev: batadv_soft_interface used as master interface
+ * @slave_dev: net_device which should become the slave interface
+ *
+ * Return 0 if successful or error otherwise.
+ */
+static int batadv_softif_slave_add(struct net_device *dev,
+ struct net_device *slave_dev)
+{
+ struct batadv_hard_iface *hard_iface;
+ int ret = -EINVAL;
+
+ hard_iface = batadv_hardif_get_by_netdev(slave_dev);
+ if (!hard_iface || hard_iface->soft_iface != NULL)
+ goto out;
+
+ ret = batadv_hardif_enable_interface(hard_iface, dev->name);
+
out:
- return NULL;
+ if (hard_iface)
+ batadv_hardif_free_ref(hard_iface);
+ return ret;
}
-void batadv_softif_destroy(struct net_device *soft_iface)
+/**
+ * batadv_softif_slave_del - Delete a slave iface from a batadv_soft_interface
+ * @dev: batadv_soft_interface used as master interface
+ * @slave_dev: net_device which should be removed from the master interface
+ *
+ * Return 0 if successful or error otherwise.
+ */
+static int batadv_softif_slave_del(struct net_device *dev,
+ struct net_device *slave_dev)
+{
+ struct batadv_hard_iface *hard_iface;
+ int ret = -EINVAL;
+
+ hard_iface = batadv_hardif_get_by_netdev(slave_dev);
+
+ if (!hard_iface || hard_iface->soft_iface != dev)
+ goto out;
+
+ batadv_hardif_disable_interface(hard_iface, BATADV_IF_CLEANUP_KEEP);
+ ret = 0;
+
+out:
+ if (hard_iface)
+ batadv_hardif_free_ref(hard_iface);
+ return ret;
+}
+
+static const struct net_device_ops batadv_netdev_ops = {
+ .ndo_init = batadv_softif_init_late,
+ .ndo_open = batadv_interface_open,
+ .ndo_stop = batadv_interface_release,
+ .ndo_get_stats = batadv_interface_stats,
+ .ndo_set_mac_address = batadv_interface_set_mac_addr,
+ .ndo_change_mtu = batadv_interface_change_mtu,
+ .ndo_start_xmit = batadv_interface_tx,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_add_slave = batadv_softif_slave_add,
+ .ndo_del_slave = batadv_softif_slave_del,
+};
+
+/**
+ * batadv_softif_free - Deconstructor of batadv_soft_interface
+ * @dev: Device to cleanup and remove
+ */
+static void batadv_softif_free(struct net_device *dev)
+{
+ batadv_debugfs_del_meshif(dev);
+ batadv_mesh_free(dev);
+
+ /* some scheduled RCU callbacks need the bat_priv struct to accomplish
+ * their tasks. Wait for them all to be finished before freeing the
+ * netdev and its private data (bat_priv)
+ */
+ rcu_barrier();
+
+ free_netdev(dev);
+}
+
+/**
+ * batadv_softif_init_early - early stage initialization of soft interface
+ * @dev: registered network device to modify
+ */
+static void batadv_softif_init_early(struct net_device *dev)
+{
+ struct batadv_priv *priv = netdev_priv(dev);
+
+ ether_setup(dev);
+
+ dev->netdev_ops = &batadv_netdev_ops;
+ dev->destructor = batadv_softif_free;
+ dev->tx_queue_len = 0;
+
+ /* can't call min_mtu, because the needed variables
+ * have not been initialized yet
+ */
+ dev->mtu = ETH_DATA_LEN;
+ /* reserve more space in the skbuff for our header */
+ dev->hard_header_len = BATADV_HEADER_LEN;
+
+ /* generate random address */
+ eth_hw_addr_random(dev);
+
+ SET_ETHTOOL_OPS(dev, &batadv_ethtool_ops);
+
+ memset(priv, 0, sizeof(*priv));
+}
+
+struct net_device *batadv_softif_create(const char *name)
+{
+ struct net_device *soft_iface;
+ int ret;
+
+ soft_iface = alloc_netdev(sizeof(struct batadv_priv), name,
+ batadv_softif_init_early);
+ if (!soft_iface)
+ return NULL;
+
+ soft_iface->rtnl_link_ops = &batadv_link_ops;
+
+ ret = register_netdevice(soft_iface);
+ if (ret < 0) {
+ pr_err("Unable to register the batman interface '%s': %i\n",
+ name, ret);
+ free_netdev(soft_iface);
+ return NULL;
+ }
+
+ return soft_iface;
+}
+
+/**
+ * batadv_softif_destroy_sysfs - deletion of batadv_soft_interface via sysfs
+ * @soft_iface: the to-be-removed batman-adv interface
+ */
+void batadv_softif_destroy_sysfs(struct net_device *soft_iface)
{
struct batadv_priv *bat_priv = netdev_priv(soft_iface);
- batadv_mesh_free(soft_iface);
queue_work(batadv_event_workqueue, &bat_priv->cleanup_work);
}
+/**
+ * batadv_softif_destroy_netlink - deletion of batadv_soft_interface via netlink
+ * @soft_iface: the to-be-removed batman-adv interface
+ * @head: list pointer
+ */
+static void batadv_softif_destroy_netlink(struct net_device *soft_iface,
+ struct list_head *head)
+{
+ struct batadv_hard_iface *hard_iface;
+
+ list_for_each_entry(hard_iface, &batadv_hardif_list, list) {
+ if (hard_iface->soft_iface == soft_iface)
+ batadv_hardif_disable_interface(hard_iface,
+ BATADV_IF_CLEANUP_KEEP);
+ }
+
+ batadv_sysfs_del_meshif(soft_iface);
+ unregister_netdevice_queue(soft_iface, head);
+}
+
int batadv_softif_is_valid(const struct net_device *net_dev)
{
if (net_dev->netdev_ops->ndo_start_xmit == batadv_interface_tx)
@@ -591,6 +684,13 @@ int batadv_softif_is_valid(const struct net_device *net_dev)
return 0;
}
+struct rtnl_link_ops batadv_link_ops __read_mostly = {
+ .kind = "batadv",
+ .priv_size = sizeof(struct batadv_priv),
+ .setup = batadv_softif_init_early,
+ .dellink = batadv_softif_destroy_netlink,
+};
+
/* ethtool */
static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
{
@@ -662,6 +762,17 @@ static const struct {
{ "dat_put_rx" },
{ "dat_cached_reply_tx" },
#endif
+#ifdef CONFIG_BATMAN_ADV_NC
+ { "nc_code" },
+ { "nc_code_bytes" },
+ { "nc_recode" },
+ { "nc_recode_bytes" },
+ { "nc_buffer" },
+ { "nc_decode" },
+ { "nc_decode_bytes" },
+ { "nc_decode_failed" },
+ { "nc_sniffed" },
+#endif
};
static void batadv_get_strings(struct net_device *dev, uint32_t stringset,
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 43182e5e603a..2f2472c2ea0d 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -25,7 +25,8 @@ void batadv_interface_rx(struct net_device *soft_iface,
struct sk_buff *skb, struct batadv_hard_iface *recv_if,
int hdr_size, struct batadv_orig_node *orig_node);
struct net_device *batadv_softif_create(const char *name);
-void batadv_softif_destroy(struct net_device *soft_iface);
+void batadv_softif_destroy_sysfs(struct net_device *soft_iface);
int batadv_softif_is_valid(const struct net_device *net_dev);
+extern struct rtnl_link_ops batadv_link_ops;
#endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index afbba319d73a..929e304dacb2 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -442,6 +442,9 @@ static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth,
#ifdef CONFIG_BATMAN_ADV_DEBUG
BATADV_ATTR_SIF_UINT(log_level, S_IRUGO | S_IWUSR, 0, BATADV_DBG_ALL, NULL);
#endif
+#ifdef CONFIG_BATMAN_ADV_NC
+BATADV_ATTR_SIF_BOOL(network_coding, S_IRUGO | S_IWUSR, NULL);
+#endif
static struct batadv_attribute *batadv_mesh_attrs[] = {
&batadv_attr_aggregated_ogms,
@@ -464,6 +467,9 @@ static struct batadv_attribute *batadv_mesh_attrs[] = {
#ifdef CONFIG_BATMAN_ADV_DEBUG
&batadv_attr_log_level,
#endif
+#ifdef CONFIG_BATMAN_ADV_NC
+ &batadv_attr_network_coding,
+#endif
NULL,
};
@@ -576,19 +582,18 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
(strncmp(hard_iface->soft_iface->name, buff, IFNAMSIZ) == 0))
goto out;
- if (!rtnl_trylock()) {
- ret = -ERESTARTSYS;
- goto out;
- }
+ rtnl_lock();
if (status_tmp == BATADV_IF_NOT_IN_USE) {
- batadv_hardif_disable_interface(hard_iface);
+ batadv_hardif_disable_interface(hard_iface,
+ BATADV_IF_CLEANUP_AUTO);
goto unlock;
}
/* if the interface already is in use */
if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
- batadv_hardif_disable_interface(hard_iface);
+ batadv_hardif_disable_interface(hard_iface,
+ BATADV_IF_CLEANUP_AUTO);
ret = batadv_hardif_enable_interface(hard_iface, buff);
@@ -688,15 +693,10 @@ int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type,
enum batadv_uev_action action, const char *data)
{
int ret = -ENOMEM;
- struct batadv_hard_iface *primary_if;
struct kobject *bat_kobj;
char *uevent_env[4] = { NULL, NULL, NULL, NULL };
- primary_if = batadv_primary_if_get_selected(bat_priv);
- if (!primary_if)
- goto out;
-
- bat_kobj = &primary_if->soft_iface->dev.kobj;
+ bat_kobj = &bat_priv->soft_iface->dev.kobj;
uevent_env[0] = kmalloc(strlen(BATADV_UEV_TYPE_VAR) +
strlen(batadv_uev_type_str[type]) + 1,
@@ -732,9 +732,6 @@ out:
kfree(uevent_env[1]);
kfree(uevent_env[2]);
- if (primary_if)
- batadv_hardif_free_ref(primary_if);
-
if (ret)
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Impossible to send uevent for (%s,%s,%s) event (err: %d)\n",
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 7abee19567e9..9e8748575845 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -144,7 +144,12 @@ static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu)
struct batadv_tt_orig_list_entry *orig_entry;
orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu);
- batadv_orig_node_free_ref(orig_entry->orig_node);
+
+ /* We are in an rcu callback here, therefore we cannot use
+ * batadv_orig_node_free_ref() and its call_rcu():
+ * An rcu_barrier() wouldn't wait for that to finish
+ */
+ batadv_orig_node_free_ref_now(orig_entry->orig_node);
kfree(orig_entry);
}
@@ -385,25 +390,19 @@ static void batadv_tt_prepare_packet_buff(struct batadv_priv *bat_priv,
int *packet_buff_len,
int min_packet_len)
{
- struct batadv_hard_iface *primary_if;
int req_len;
- primary_if = batadv_primary_if_get_selected(bat_priv);
-
req_len = min_packet_len;
req_len += batadv_tt_len(atomic_read(&bat_priv->tt.local_changes));
/* if we have too many changes for one packet don't send any
* and wait for the tt table request which will be fragmented
*/
- if ((!primary_if) || (req_len > primary_if->soft_iface->mtu))
+ if (req_len > bat_priv->soft_iface->mtu)
req_len = min_packet_len;
batadv_tt_realloc_packet_buff(packet_buff, packet_buff_len,
min_packet_len, req_len);
-
- if (primary_if)
- batadv_hardif_free_ref(primary_if);
}
static int batadv_tt_changes_fill_buff(struct batadv_priv *bat_priv,
@@ -908,7 +907,7 @@ out_remove:
/* remove address from local hash if present */
local_flags = batadv_tt_local_remove(bat_priv, tt_addr,
"global tt received",
- !!(flags & BATADV_TT_CLIENT_ROAM));
+ flags & BATADV_TT_CLIENT_ROAM);
tt_global_entry->common.flags |= local_flags & BATADV_TT_CLIENT_WIFI;
if (!(flags & BATADV_TT_CLIENT_ROAM))
@@ -1580,7 +1579,7 @@ static int batadv_tt_global_valid(const void *entry_ptr,
static struct sk_buff *
batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,
struct batadv_hashtable *hash,
- struct batadv_hard_iface *primary_if,
+ struct batadv_priv *bat_priv,
int (*valid_cb)(const void *, const void *),
void *cb_data)
{
@@ -1594,8 +1593,8 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,
uint32_t i;
size_t len;
- if (tt_query_size + tt_len > primary_if->soft_iface->mtu) {
- tt_len = primary_if->soft_iface->mtu - tt_query_size;
+ if (tt_query_size + tt_len > bat_priv->soft_iface->mtu) {
+ tt_len = bat_priv->soft_iface->mtu - tt_query_size;
tt_len -= tt_len % sizeof(struct batadv_tt_change);
}
tt_tot = tt_len / sizeof(struct batadv_tt_change);
@@ -1715,7 +1714,6 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,
{
struct batadv_orig_node *req_dst_orig_node;
struct batadv_orig_node *res_dst_orig_node = NULL;
- struct batadv_hard_iface *primary_if = NULL;
uint8_t orig_ttvn, req_ttvn, ttvn;
int ret = false;
unsigned char *tt_buff;
@@ -1740,10 +1738,6 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,
if (!res_dst_orig_node)
goto out;
- primary_if = batadv_primary_if_get_selected(bat_priv);
- if (!primary_if)
- goto out;
-
orig_ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn);
req_ttvn = tt_request->ttvn;
@@ -1791,7 +1785,7 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,
skb = batadv_tt_response_fill_table(tt_len, ttvn,
bat_priv->tt.global_hash,
- primary_if,
+ bat_priv,
batadv_tt_global_valid,
req_dst_orig_node);
if (!skb)
@@ -1828,8 +1822,6 @@ out:
batadv_orig_node_free_ref(res_dst_orig_node);
if (req_dst_orig_node)
batadv_orig_node_free_ref(req_dst_orig_node);
- if (primary_if)
- batadv_hardif_free_ref(primary_if);
if (!ret)
kfree_skb(skb);
return ret;
@@ -1907,7 +1899,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv,
skb = batadv_tt_response_fill_table(tt_len, ttvn,
bat_priv->tt.local_hash,
- primary_if,
+ bat_priv,
batadv_tt_local_valid_entry,
NULL);
if (!skb)
@@ -2528,7 +2520,7 @@ bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv,
if (!tt_global_entry)
goto out;
- ret = !!(tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM);
+ ret = tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM;
batadv_tt_global_entry_free_ref(tt_global_entry);
out:
return ret;
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 4cd87a0b5b80..aba8364c3689 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -128,6 +128,10 @@ struct batadv_hard_iface {
* @bond_list: list of bonding candidates
* @refcount: number of contexts the object is used
* @rcu: struct used for freeing in an RCU-safe manner
+ * @in_coding_list: list of nodes this orig can hear
+ * @out_coding_list: list of nodes that can hear this orig
+ * @in_coding_list_lock: protects in_coding_list
+ * @out_coding_list_lock: protects out_coding_list
*/
struct batadv_orig_node {
uint8_t orig[ETH_ALEN];
@@ -171,6 +175,12 @@ struct batadv_orig_node {
struct list_head bond_list;
atomic_t refcount;
struct rcu_head rcu;
+#ifdef CONFIG_BATMAN_ADV_NC
+ struct list_head in_coding_list;
+ struct list_head out_coding_list;
+ spinlock_t in_coding_list_lock; /* Protects in_coding_list */
+ spinlock_t out_coding_list_lock; /* Protects out_coding_list */
+#endif
};
/**
@@ -265,6 +275,17 @@ struct batadv_bcast_duplist_entry {
* @BATADV_CNT_DAT_PUT_RX: received dht PUT traffic packet counter
* @BATADV_CNT_DAT_CACHED_REPLY_TX: transmitted dat cache reply traffic packet
* counter
+ * @BATADV_CNT_NC_CODE: transmitted nc-combined traffic packet counter
+ * @BATADV_CNT_NC_CODE_BYTES: transmitted nc-combined traffic bytes counter
+ * @BATADV_CNT_NC_RECODE: transmitted nc-recombined traffic packet counter
+ * @BATADV_CNT_NC_RECODE_BYTES: transmitted nc-recombined traffic bytes counter
+ * @BATADV_CNT_NC_BUFFER: counter for packets buffered for later nc decoding
+ * @BATADV_CNT_NC_DECODE: received and nc-decoded traffic packet counter
+ * @BATADV_CNT_NC_DECODE_BYTES: received and nc-decoded traffic bytes counter
+ * @BATADV_CNT_NC_DECODE_FAILED: received and decode-failed traffic packet
+ * counter
+ * @BATADV_CNT_NC_SNIFFED: counter for nc-decoded packets received in promisc
+ * mode.
* @BATADV_CNT_NUM: number of traffic counters
*/
enum batadv_counters {
@@ -292,6 +313,17 @@ enum batadv_counters {
BATADV_CNT_DAT_PUT_RX,
BATADV_CNT_DAT_CACHED_REPLY_TX,
#endif
+#ifdef CONFIG_BATMAN_ADV_NC
+ BATADV_CNT_NC_CODE,
+ BATADV_CNT_NC_CODE_BYTES,
+ BATADV_CNT_NC_RECODE,
+ BATADV_CNT_NC_RECODE_BYTES,
+ BATADV_CNT_NC_BUFFER,
+ BATADV_CNT_NC_DECODE,
+ BATADV_CNT_NC_DECODE_BYTES,
+ BATADV_CNT_NC_DECODE_FAILED,
+ BATADV_CNT_NC_SNIFFED,
+#endif
BATADV_CNT_NUM,
};
@@ -428,6 +460,35 @@ struct batadv_priv_dat {
#endif
/**
+ * struct batadv_priv_nc - per mesh interface network coding private data
+ * @work: work queue callback item for cleanup
+ * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs
+ * @min_tq: only consider neighbors for encoding if neigh_tq > min_tq
+ * @max_fwd_delay: maximum packet forward delay to allow coding of packets
+ * @max_buffer_time: buffer time for sniffed packets used to decoding
+ * @timestamp_fwd_flush: timestamp of last forward packet queue flush
+ * @timestamp_sniffed_purge: timestamp of last sniffed packet queue purge
+ * @coding_hash: Hash table used to buffer skbs while waiting for another
+ * incoming skb to code it with. Skbs are added to the buffer just before being
+ * forwarded in routing.c
+ * @decoding_hash: Hash table used to buffer skbs that might be needed to decode
+ * a received coded skb. The buffer is used for 1) skbs arriving on the
+ * soft-interface; 2) skbs overheard on the hard-interface; and 3) skbs
+ * forwarded by batman-adv.
+ */
+struct batadv_priv_nc {
+ struct delayed_work work;
+ struct dentry *debug_dir;
+ u8 min_tq;
+ u32 max_fwd_delay;
+ u32 max_buffer_time;
+ unsigned long timestamp_fwd_flush;
+ unsigned long timestamp_sniffed_purge;
+ struct batadv_hashtable *coding_hash;
+ struct batadv_hashtable *decoding_hash;
+};
+
+/**
* struct batadv_priv - per mesh interface data
* @mesh_state: current status of the mesh (inactive/active/deactivating)
* @soft_iface: net device which holds this struct as private data
@@ -470,6 +531,8 @@ struct batadv_priv_dat {
* @tt: translation table data
* @vis: vis data
* @dat: distributed arp table data
+ * @network_coding: bool indicating whether network coding is enabled
+ * @batadv_priv_nc: network coding data
*/
struct batadv_priv {
atomic_t mesh_state;
@@ -522,6 +585,10 @@ struct batadv_priv {
#ifdef CONFIG_BATMAN_ADV_DAT
struct batadv_priv_dat dat;
#endif
+#ifdef CONFIG_BATMAN_ADV_NC
+ atomic_t network_coding;
+ struct batadv_priv_nc nc;
+#endif /* CONFIG_BATMAN_ADV_NC */
};
/**
@@ -702,6 +769,75 @@ struct batadv_tt_roam_node {
};
/**
+ * struct batadv_nc_node - network coding node
+ * @list: next and prev pointer for the list handling
+ * @addr: the node's mac address
+ * @refcount: number of contexts the object is used by
+ * @rcu: struct used for freeing in an RCU-safe manner
+ * @orig_node: pointer to corresponding orig node struct
+ * @last_seen: timestamp of last ogm received from this node
+ */
+struct batadv_nc_node {
+ struct list_head list;
+ uint8_t addr[ETH_ALEN];
+ atomic_t refcount;
+ struct rcu_head rcu;
+ struct batadv_orig_node *orig_node;
+ unsigned long last_seen;
+};
+
+/**
+ * struct batadv_nc_path - network coding path
+ * @hash_entry: next and prev pointer for the list handling
+ * @rcu: struct used for freeing in an RCU-safe manner
+ * @refcount: number of contexts the object is used by
+ * @packet_list: list of buffered packets for this path
+ * @packet_list_lock: access lock for packet list
+ * @next_hop: next hop (destination) of path
+ * @prev_hop: previous hop (source) of path
+ * @last_valid: timestamp for last validation of path
+ */
+struct batadv_nc_path {
+ struct hlist_node hash_entry;
+ struct rcu_head rcu;
+ atomic_t refcount;
+ struct list_head packet_list;
+ spinlock_t packet_list_lock; /* Protects packet_list */
+ uint8_t next_hop[ETH_ALEN];
+ uint8_t prev_hop[ETH_ALEN];
+ unsigned long last_valid;
+};
+
+/**
+ * struct batadv_nc_packet - network coding packet used when coding and
+ * decoding packets
+ * @list: next and prev pointer for the list handling
+ * @packet_id: crc32 checksum of skb data
+ * @timestamp: field containing the info when the packet was added to path
+ * @neigh_node: pointer to original next hop neighbor of skb
+ * @skb: skb which can be encoded or used for decoding
+ * @nc_path: pointer to path this nc packet is attached to
+ */
+struct batadv_nc_packet {
+ struct list_head list;
+ __be32 packet_id;
+ unsigned long timestamp;
+ struct batadv_neigh_node *neigh_node;
+ struct sk_buff *skb;
+ struct batadv_nc_path *nc_path;
+};
+
+/**
+ * batadv_skb_cb - control buffer structure used to store private data relevant
+ * to batman-adv in the skb->cb buffer in skbs.
+ * @decoded: Marks a skb as decoded, which is checked when searching for coding
+ * opportunities in network-coding.c
+ */
+struct batadv_skb_cb {
+ bool decoded;
+};
+
+/**
* struct batadv_forw_packet - structure for bcast packets to be sent/forwarded
* @list: list node for batadv_socket_client::queue_list
* @send_time: execution time for delayed_work (packet sending)
diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c
index 50e079f00be6..0bb3b5982f94 100644
--- a/net/batman-adv/unicast.c
+++ b/net/batman-adv/unicast.c
@@ -122,7 +122,7 @@ batadv_frag_search_packet(struct list_head *head,
{
struct batadv_frag_packet_list_entry *tfp;
struct batadv_unicast_frag_packet *tmp_up = NULL;
- int is_head_tmp, is_head;
+ bool is_head_tmp, is_head;
uint16_t search_seqno;
if (up->flags & BATADV_UNI_FRAG_HEAD)
@@ -130,7 +130,7 @@ batadv_frag_search_packet(struct list_head *head,
else
search_seqno = ntohs(up->seqno)-1;
- is_head = !!(up->flags & BATADV_UNI_FRAG_HEAD);
+ is_head = up->flags & BATADV_UNI_FRAG_HEAD;
list_for_each_entry(tfp, head, list) {
if (!tfp->skb)
@@ -142,7 +142,7 @@ batadv_frag_search_packet(struct list_head *head,
tmp_up = (struct batadv_unicast_frag_packet *)tfp->skb->data;
if (tfp->seqno == search_seqno) {
- is_head_tmp = !!(tmp_up->flags & BATADV_UNI_FRAG_HEAD);
+ is_head_tmp = tmp_up->flags & BATADV_UNI_FRAG_HEAD;
if (is_head_tmp != is_head)
return tfp;
else
diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
index 6a1e646be96d..1625e5793a89 100644
--- a/net/batman-adv/vis.c
+++ b/net/batman-adv/vis.c
@@ -149,7 +149,7 @@ static void batadv_vis_data_read_prim_sec(struct seq_file *seq,
hlist_for_each_entry(entry, if_list, list) {
if (entry->primary)
- seq_printf(seq, "PRIMARY, ");
+ seq_puts(seq, "PRIMARY, ");
else
seq_printf(seq, "SEC %pM, ", entry->addr);
}
@@ -207,7 +207,7 @@ static void batadv_vis_data_read_entries(struct seq_file *seq,
if (batadv_compare_eth(entry->addr, packet->vis_orig))
batadv_vis_data_read_prim_sec(seq, list);
- seq_printf(seq, "\n");
+ seq_puts(seq, "\n");
}
}
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index eb0f4b16ff09..17f33a62f6db 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -397,13 +397,12 @@ static int a2mp_getampassoc_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
if (ctrl) {
u8 *assoc;
- assoc = kzalloc(assoc_len, GFP_KERNEL);
+ assoc = kmemdup(rsp->amp_assoc, assoc_len, GFP_KERNEL);
if (!assoc) {
amp_ctrl_put(ctrl);
return -ENOMEM;
}
- memcpy(assoc, rsp->amp_assoc, assoc_len);
ctrl->assoc = assoc;
ctrl->assoc_len = assoc_len;
ctrl->assoc_rem_len = assoc_len;
@@ -472,13 +471,12 @@ static int a2mp_createphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
size_t assoc_len = le16_to_cpu(hdr->len) - sizeof(*req);
u8 *assoc;
- assoc = kzalloc(assoc_len, GFP_KERNEL);
+ assoc = kmemdup(req->amp_assoc, assoc_len, GFP_KERNEL);
if (!assoc) {
amp_ctrl_put(ctrl);
return -ENOMEM;
}
- memcpy(assoc, req->amp_assoc, assoc_len);
ctrl->assoc = assoc;
ctrl->assoc_len = assoc_len;
ctrl->assoc_rem_len = assoc_len;
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 0d1b08cc76e1..9096137c889c 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -92,23 +92,14 @@ int bt_sock_register(int proto, const struct net_proto_family *ops)
}
EXPORT_SYMBOL(bt_sock_register);
-int bt_sock_unregister(int proto)
+void bt_sock_unregister(int proto)
{
- int err = 0;
-
if (proto < 0 || proto >= BT_MAX_PROTO)
- return -EINVAL;
+ return;
write_lock(&bt_proto_lock);
-
- if (!bt_proto[proto])
- err = -ENOENT;
- else
- bt_proto[proto] = NULL;
-
+ bt_proto[proto] = NULL;
write_unlock(&bt_proto_lock);
-
- return err;
}
EXPORT_SYMBOL(bt_sock_unregister);
@@ -422,7 +413,8 @@ unsigned int bt_sock_poll(struct file *file, struct socket *sock,
return bt_accept_poll(sk);
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR;
+ mask |= POLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= POLLRDHUP | POLLIN | POLLRDNORM;
@@ -617,7 +609,7 @@ static int bt_seq_open(struct inode *inode, struct file *file)
struct bt_sock_list *sk_list;
struct bt_seq_state *s;
- sk_list = PDE(inode)->data;
+ sk_list = PDE_DATA(inode);
s = __seq_open_private(file, &bt_seq_ops,
sizeof(struct bt_seq_state));
if (!s)
@@ -627,26 +619,21 @@ static int bt_seq_open(struct inode *inode, struct file *file)
return 0;
}
-int bt_procfs_init(struct module* module, struct net *net, const char *name,
+static const struct file_operations bt_fops = {
+ .open = bt_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private
+};
+
+int bt_procfs_init(struct net *net, const char *name,
struct bt_sock_list* sk_list,
int (* seq_show)(struct seq_file *, void *))
{
- struct proc_dir_entry * pde;
-
sk_list->custom_seq_show = seq_show;
- sk_list->fops.owner = module;
- sk_list->fops.open = bt_seq_open;
- sk_list->fops.read = seq_read;
- sk_list->fops.llseek = seq_lseek;
- sk_list->fops.release = seq_release_private;
-
- pde = proc_create(name, 0, net->proc_net, &sk_list->fops);
- if (!pde)
+ if (!proc_create_data(name, 0, net->proc_net, &bt_fops, sk_list))
return -ENOMEM;
-
- pde->data = sk_list;
-
return 0;
}
@@ -655,7 +642,7 @@ void bt_procfs_cleanup(struct net *net, const char *name)
remove_proc_entry(name, net->proc_net);
}
#else
-int bt_procfs_init(struct module* module, struct net *net, const char *name,
+int bt_procfs_init(struct net *net, const char *name,
struct bt_sock_list* sk_list,
int (* seq_show)(struct seq_file *, void *))
{
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index e58c8b32589c..4b488ec26105 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -136,7 +136,7 @@ static u16 bnep_net_eth_proto(struct sk_buff *skb)
struct ethhdr *eh = (void *) skb->data;
u16 proto = ntohs(eh->h_proto);
- if (proto >= 1536)
+ if (proto >= ETH_P_802_3_MIN)
return proto;
if (get_unaligned((__be16 *) skb->data) == htons(0xFFFF))
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index e7154a58465f..5f051290daba 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -234,7 +234,7 @@ int __init bnep_sock_init(void)
goto error;
}
- err = bt_procfs_init(THIS_MODULE, &init_net, "bnep", &bnep_sk_list, NULL);
+ err = bt_procfs_init(&init_net, "bnep", &bnep_sk_list, NULL);
if (err < 0) {
BT_ERR("Failed to create BNEP proc file");
bt_sock_unregister(BTPROTO_BNEP);
@@ -253,8 +253,6 @@ error:
void __exit bnep_sock_cleanup(void)
{
bt_procfs_cleanup(&init_net, "bnep");
- if (bt_sock_unregister(BTPROTO_BNEP) < 0)
- BT_ERR("Can't unregister BNEP socket");
-
+ bt_sock_unregister(BTPROTO_BNEP);
proto_unregister(&bnep_proto);
}
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index a4a9d4b6816c..cd75e4d64b90 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -539,7 +539,7 @@ static int cmtp_proc_show(struct seq_file *m, void *v)
static int cmtp_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, cmtp_proc_show, PDE(inode)->data);
+ return single_open(file, cmtp_proc_show, PDE_DATA(inode));
}
static const struct file_operations cmtp_proc_fops = {
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index 1c57482112b6..d82787d417bd 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -245,7 +245,7 @@ int cmtp_init_sockets(void)
goto error;
}
- err = bt_procfs_init(THIS_MODULE, &init_net, "cmtp", &cmtp_sk_list, NULL);
+ err = bt_procfs_init(&init_net, "cmtp", &cmtp_sk_list, NULL);
if (err < 0) {
BT_ERR("Failed to create CMTP proc file");
bt_sock_unregister(BTPROTO_HIDP);
@@ -264,8 +264,6 @@ error:
void cmtp_cleanup_sockets(void)
{
bt_procfs_cleanup(&init_net, "cmtp");
- if (bt_sock_unregister(BTPROTO_CMTP) < 0)
- BT_ERR("Can't unregister CMTP socket");
-
+ bt_sock_unregister(BTPROTO_CMTP);
proto_unregister(&cmtp_proto);
}
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 4925a02ae7e4..6c7f36379722 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -117,7 +117,17 @@ static void hci_acl_create_connection_cancel(struct hci_conn *conn)
hci_send_cmd(conn->hdev, HCI_OP_CREATE_CONN_CANCEL, sizeof(cp), &cp);
}
-void hci_acl_disconn(struct hci_conn *conn, __u8 reason)
+static void hci_reject_sco(struct hci_conn *conn)
+{
+ struct hci_cp_reject_sync_conn_req cp;
+
+ cp.reason = HCI_ERROR_REMOTE_USER_TERM;
+ bacpy(&cp.bdaddr, &conn->dst);
+
+ hci_send_cmd(conn->hdev, HCI_OP_REJECT_SYNC_CONN_REQ, sizeof(cp), &cp);
+}
+
+void hci_disconnect(struct hci_conn *conn, __u8 reason)
{
struct hci_cp_disconnect cp;
@@ -253,7 +263,7 @@ static void hci_conn_disconnect(struct hci_conn *conn)
hci_amp_disconn(conn, reason);
break;
default:
- hci_acl_disconn(conn, reason);
+ hci_disconnect(conn, reason);
break;
}
}
@@ -276,6 +286,8 @@ static void hci_conn_timeout(struct work_struct *work)
hci_acl_create_connection_cancel(conn);
else if (conn->type == LE_LINK)
hci_le_create_connection_cancel(conn);
+ } else if (conn->type == SCO_LINK || conn->type == ESCO_LINK) {
+ hci_reject_sco(conn);
}
break;
case BT_CONFIG:
@@ -398,8 +410,6 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
if (hdev->notify)
hdev->notify(hdev, HCI_NOTIFY_CONN_ADD);
- atomic_set(&conn->devref, 0);
-
hci_conn_init_sysfs(conn);
return conn;
@@ -433,7 +443,7 @@ int hci_conn_del(struct hci_conn *conn)
struct hci_conn *acl = conn->link;
if (acl) {
acl->link = NULL;
- hci_conn_put(acl);
+ hci_conn_drop(acl);
}
}
@@ -448,12 +458,11 @@ int hci_conn_del(struct hci_conn *conn)
skb_queue_purge(&conn->data_q);
- hci_conn_put_device(conn);
+ hci_conn_del_sysfs(conn);
hci_dev_put(hdev);
- if (conn->handle == 0)
- kfree(conn);
+ hci_conn_put(conn);
return 0;
}
@@ -565,7 +574,7 @@ static struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type,
if (!sco) {
sco = hci_conn_add(hdev, type, dst);
if (!sco) {
- hci_conn_put(acl);
+ hci_conn_drop(acl);
return ERR_PTR(-ENOMEM);
}
}
@@ -835,19 +844,6 @@ void hci_conn_check_pending(struct hci_dev *hdev)
hci_dev_unlock(hdev);
}
-void hci_conn_hold_device(struct hci_conn *conn)
-{
- atomic_inc(&conn->devref);
-}
-EXPORT_SYMBOL(hci_conn_hold_device);
-
-void hci_conn_put_device(struct hci_conn *conn)
-{
- if (atomic_dec_and_test(&conn->devref))
- hci_conn_del_sysfs(conn);
-}
-EXPORT_SYMBOL(hci_conn_put_device);
-
int hci_get_conn_list(void __user *arg)
{
struct hci_conn *c;
@@ -980,7 +976,7 @@ void hci_chan_del(struct hci_chan *chan)
synchronize_rcu();
- hci_conn_put(conn);
+ hci_conn_drop(conn);
skb_queue_purge(&chan->data_q);
kfree(chan);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 60793e7b768b..db7de80b88a2 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -57,36 +57,9 @@ static void hci_notify(struct hci_dev *hdev, int event)
/* ---- HCI requests ---- */
-void hci_req_complete(struct hci_dev *hdev, __u16 cmd, int result)
+static void hci_req_sync_complete(struct hci_dev *hdev, u8 result)
{
- BT_DBG("%s command 0x%4.4x result 0x%2.2x", hdev->name, cmd, result);
-
- /* If this is the init phase check if the completed command matches
- * the last init command, and if not just return.
- */
- if (test_bit(HCI_INIT, &hdev->flags) && hdev->init_last_cmd != cmd) {
- struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data;
- u16 opcode = __le16_to_cpu(sent->opcode);
- struct sk_buff *skb;
-
- /* Some CSR based controllers generate a spontaneous
- * reset complete event during init and any pending
- * command will never be completed. In such a case we
- * need to resend whatever was the last sent
- * command.
- */
-
- if (cmd != HCI_OP_RESET || opcode == HCI_OP_RESET)
- return;
-
- skb = skb_clone(hdev->sent_cmd, GFP_ATOMIC);
- if (skb) {
- skb_queue_head(&hdev->cmd_q, skb);
- queue_work(hdev->workqueue, &hdev->cmd_work);
- }
-
- return;
- }
+ BT_DBG("%s result 0x%2.2x", hdev->name, result);
if (hdev->req_status == HCI_REQ_PEND) {
hdev->req_result = result;
@@ -106,22 +79,158 @@ static void hci_req_cancel(struct hci_dev *hdev, int err)
}
}
+static struct sk_buff *hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode,
+ u8 event)
+{
+ struct hci_ev_cmd_complete *ev;
+ struct hci_event_hdr *hdr;
+ struct sk_buff *skb;
+
+ hci_dev_lock(hdev);
+
+ skb = hdev->recv_evt;
+ hdev->recv_evt = NULL;
+
+ hci_dev_unlock(hdev);
+
+ if (!skb)
+ return ERR_PTR(-ENODATA);
+
+ if (skb->len < sizeof(*hdr)) {
+ BT_ERR("Too short HCI event");
+ goto failed;
+ }
+
+ hdr = (void *) skb->data;
+ skb_pull(skb, HCI_EVENT_HDR_SIZE);
+
+ if (event) {
+ if (hdr->evt != event)
+ goto failed;
+ return skb;
+ }
+
+ if (hdr->evt != HCI_EV_CMD_COMPLETE) {
+ BT_DBG("Last event is not cmd complete (0x%2.2x)", hdr->evt);
+ goto failed;
+ }
+
+ if (skb->len < sizeof(*ev)) {
+ BT_ERR("Too short cmd_complete event");
+ goto failed;
+ }
+
+ ev = (void *) skb->data;
+ skb_pull(skb, sizeof(*ev));
+
+ if (opcode == __le16_to_cpu(ev->opcode))
+ return skb;
+
+ BT_DBG("opcode doesn't match (0x%2.2x != 0x%2.2x)", opcode,
+ __le16_to_cpu(ev->opcode));
+
+failed:
+ kfree_skb(skb);
+ return ERR_PTR(-ENODATA);
+}
+
+struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen,
+ const void *param, u8 event, u32 timeout)
+{
+ DECLARE_WAITQUEUE(wait, current);
+ struct hci_request req;
+ int err = 0;
+
+ BT_DBG("%s", hdev->name);
+
+ hci_req_init(&req, hdev);
+
+ hci_req_add_ev(&req, opcode, plen, param, event);
+
+ hdev->req_status = HCI_REQ_PEND;
+
+ err = hci_req_run(&req, hci_req_sync_complete);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ add_wait_queue(&hdev->req_wait_q, &wait);
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ schedule_timeout(timeout);
+
+ remove_wait_queue(&hdev->req_wait_q, &wait);
+
+ if (signal_pending(current))
+ return ERR_PTR(-EINTR);
+
+ switch (hdev->req_status) {
+ case HCI_REQ_DONE:
+ err = -bt_to_errno(hdev->req_result);
+ break;
+
+ case HCI_REQ_CANCELED:
+ err = -hdev->req_result;
+ break;
+
+ default:
+ err = -ETIMEDOUT;
+ break;
+ }
+
+ hdev->req_status = hdev->req_result = 0;
+
+ BT_DBG("%s end: err %d", hdev->name, err);
+
+ if (err < 0)
+ return ERR_PTR(err);
+
+ return hci_get_cmd_complete(hdev, opcode, event);
+}
+EXPORT_SYMBOL(__hci_cmd_sync_ev);
+
+struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
+ const void *param, u32 timeout)
+{
+ return __hci_cmd_sync_ev(hdev, opcode, plen, param, 0, timeout);
+}
+EXPORT_SYMBOL(__hci_cmd_sync);
+
/* Execute request and wait for completion. */
-static int __hci_request(struct hci_dev *hdev,
- void (*req)(struct hci_dev *hdev, unsigned long opt),
- unsigned long opt, __u32 timeout)
+static int __hci_req_sync(struct hci_dev *hdev,
+ void (*func)(struct hci_request *req,
+ unsigned long opt),
+ unsigned long opt, __u32 timeout)
{
+ struct hci_request req;
DECLARE_WAITQUEUE(wait, current);
int err = 0;
BT_DBG("%s start", hdev->name);
+ hci_req_init(&req, hdev);
+
hdev->req_status = HCI_REQ_PEND;
+ func(&req, opt);
+
+ err = hci_req_run(&req, hci_req_sync_complete);
+ if (err < 0) {
+ hdev->req_status = 0;
+
+ /* ENODATA means the HCI request command queue is empty.
+ * This can happen when a request with conditionals doesn't
+ * trigger any commands to be sent. This is normal behavior
+ * and should not trigger an error return.
+ */
+ if (err == -ENODATA)
+ return 0;
+
+ return err;
+ }
+
add_wait_queue(&hdev->req_wait_q, &wait);
set_current_state(TASK_INTERRUPTIBLE);
- req(hdev, opt);
schedule_timeout(timeout);
remove_wait_queue(&hdev->req_wait_q, &wait);
@@ -150,9 +259,10 @@ static int __hci_request(struct hci_dev *hdev,
return err;
}
-static int hci_request(struct hci_dev *hdev,
- void (*req)(struct hci_dev *hdev, unsigned long opt),
- unsigned long opt, __u32 timeout)
+static int hci_req_sync(struct hci_dev *hdev,
+ void (*req)(struct hci_request *req,
+ unsigned long opt),
+ unsigned long opt, __u32 timeout)
{
int ret;
@@ -161,75 +271,66 @@ static int hci_request(struct hci_dev *hdev,
/* Serialize all requests */
hci_req_lock(hdev);
- ret = __hci_request(hdev, req, opt, timeout);
+ ret = __hci_req_sync(hdev, req, opt, timeout);
hci_req_unlock(hdev);
return ret;
}
-static void hci_reset_req(struct hci_dev *hdev, unsigned long opt)
+static void hci_reset_req(struct hci_request *req, unsigned long opt)
{
- BT_DBG("%s %ld", hdev->name, opt);
+ BT_DBG("%s %ld", req->hdev->name, opt);
/* Reset device */
- set_bit(HCI_RESET, &hdev->flags);
- hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL);
+ set_bit(HCI_RESET, &req->hdev->flags);
+ hci_req_add(req, HCI_OP_RESET, 0, NULL);
}
-static void bredr_init(struct hci_dev *hdev)
+static void bredr_init(struct hci_request *req)
{
- hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_PACKET_BASED;
+ req->hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_PACKET_BASED;
/* Read Local Supported Features */
- hci_send_cmd(hdev, HCI_OP_READ_LOCAL_FEATURES, 0, NULL);
+ hci_req_add(req, HCI_OP_READ_LOCAL_FEATURES, 0, NULL);
/* Read Local Version */
- hci_send_cmd(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL);
+ hci_req_add(req, HCI_OP_READ_LOCAL_VERSION, 0, NULL);
+
+ /* Read BD Address */
+ hci_req_add(req, HCI_OP_READ_BD_ADDR, 0, NULL);
}
-static void amp_init(struct hci_dev *hdev)
+static void amp_init(struct hci_request *req)
{
- hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED;
+ req->hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED;
/* Read Local Version */
- hci_send_cmd(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL);
+ hci_req_add(req, HCI_OP_READ_LOCAL_VERSION, 0, NULL);
/* Read Local AMP Info */
- hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL);
+ hci_req_add(req, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL);
/* Read Data Blk size */
- hci_send_cmd(hdev, HCI_OP_READ_DATA_BLOCK_SIZE, 0, NULL);
+ hci_req_add(req, HCI_OP_READ_DATA_BLOCK_SIZE, 0, NULL);
}
-static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
+static void hci_init1_req(struct hci_request *req, unsigned long opt)
{
- struct sk_buff *skb;
+ struct hci_dev *hdev = req->hdev;
BT_DBG("%s %ld", hdev->name, opt);
- /* Driver initialization */
-
- /* Special commands */
- while ((skb = skb_dequeue(&hdev->driver_init))) {
- bt_cb(skb)->pkt_type = HCI_COMMAND_PKT;
- skb->dev = (void *) hdev;
-
- skb_queue_tail(&hdev->cmd_q, skb);
- queue_work(hdev->workqueue, &hdev->cmd_work);
- }
- skb_queue_purge(&hdev->driver_init);
-
/* Reset */
if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks))
- hci_reset_req(hdev, 0);
+ hci_reset_req(req, 0);
switch (hdev->dev_type) {
case HCI_BREDR:
- bredr_init(hdev);
+ bredr_init(req);
break;
case HCI_AMP:
- amp_init(hdev);
+ amp_init(req);
break;
default:
@@ -238,44 +339,352 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
}
}
-static void hci_scan_req(struct hci_dev *hdev, unsigned long opt)
+static void bredr_setup(struct hci_request *req)
+{
+ __le16 param;
+ __u8 flt_type;
+
+ /* Read Buffer Size (ACL mtu, max pkt, etc.) */
+ hci_req_add(req, HCI_OP_READ_BUFFER_SIZE, 0, NULL);
+
+ /* Read Class of Device */
+ hci_req_add(req, HCI_OP_READ_CLASS_OF_DEV, 0, NULL);
+
+ /* Read Local Name */
+ hci_req_add(req, HCI_OP_READ_LOCAL_NAME, 0, NULL);
+
+ /* Read Voice Setting */
+ hci_req_add(req, HCI_OP_READ_VOICE_SETTING, 0, NULL);
+
+ /* Clear Event Filters */
+ flt_type = HCI_FLT_CLEAR_ALL;
+ hci_req_add(req, HCI_OP_SET_EVENT_FLT, 1, &flt_type);
+
+ /* Connection accept timeout ~20 secs */
+ param = __constant_cpu_to_le16(0x7d00);
+ hci_req_add(req, HCI_OP_WRITE_CA_TIMEOUT, 2, &param);
+
+ /* Read page scan parameters */
+ if (req->hdev->hci_ver > BLUETOOTH_VER_1_1) {
+ hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL);
+ hci_req_add(req, HCI_OP_READ_PAGE_SCAN_TYPE, 0, NULL);
+ }
+}
+
+static void le_setup(struct hci_request *req)
+{
+ struct hci_dev *hdev = req->hdev;
+
+ /* Read LE Buffer Size */
+ hci_req_add(req, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL);
+
+ /* Read LE Local Supported Features */
+ hci_req_add(req, HCI_OP_LE_READ_LOCAL_FEATURES, 0, NULL);
+
+ /* Read LE Advertising Channel TX Power */
+ hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL);
+
+ /* Read LE White List Size */
+ hci_req_add(req, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL);
+
+ /* Read LE Supported States */
+ hci_req_add(req, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL);
+
+ /* LE-only controllers have LE implicitly enabled */
+ if (!lmp_bredr_capable(hdev))
+ set_bit(HCI_LE_ENABLED, &hdev->dev_flags);
+}
+
+static u8 hci_get_inquiry_mode(struct hci_dev *hdev)
+{
+ if (lmp_ext_inq_capable(hdev))
+ return 0x02;
+
+ if (lmp_inq_rssi_capable(hdev))
+ return 0x01;
+
+ if (hdev->manufacturer == 11 && hdev->hci_rev == 0x00 &&
+ hdev->lmp_subver == 0x0757)
+ return 0x01;
+
+ if (hdev->manufacturer == 15) {
+ if (hdev->hci_rev == 0x03 && hdev->lmp_subver == 0x6963)
+ return 0x01;
+ if (hdev->hci_rev == 0x09 && hdev->lmp_subver == 0x6963)
+ return 0x01;
+ if (hdev->hci_rev == 0x00 && hdev->lmp_subver == 0x6965)
+ return 0x01;
+ }
+
+ if (hdev->manufacturer == 31 && hdev->hci_rev == 0x2005 &&
+ hdev->lmp_subver == 0x1805)
+ return 0x01;
+
+ return 0x00;
+}
+
+static void hci_setup_inquiry_mode(struct hci_request *req)
+{
+ u8 mode;
+
+ mode = hci_get_inquiry_mode(req->hdev);
+
+ hci_req_add(req, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode);
+}
+
+static void hci_setup_event_mask(struct hci_request *req)
+{
+ struct hci_dev *hdev = req->hdev;
+
+ /* The second byte is 0xff instead of 0x9f (two reserved bits
+ * disabled) since a Broadcom 1.2 dongle doesn't respond to the
+ * command otherwise.
+ */
+ u8 events[8] = { 0xff, 0xff, 0xfb, 0xff, 0x00, 0x00, 0x00, 0x00 };
+
+ /* CSR 1.1 dongles does not accept any bitfield so don't try to set
+ * any event mask for pre 1.2 devices.
+ */
+ if (hdev->hci_ver < BLUETOOTH_VER_1_2)
+ return;
+
+ if (lmp_bredr_capable(hdev)) {
+ events[4] |= 0x01; /* Flow Specification Complete */
+ events[4] |= 0x02; /* Inquiry Result with RSSI */
+ events[4] |= 0x04; /* Read Remote Extended Features Complete */
+ events[5] |= 0x08; /* Synchronous Connection Complete */
+ events[5] |= 0x10; /* Synchronous Connection Changed */
+ }
+
+ if (lmp_inq_rssi_capable(hdev))
+ events[4] |= 0x02; /* Inquiry Result with RSSI */
+
+ if (lmp_sniffsubr_capable(hdev))
+ events[5] |= 0x20; /* Sniff Subrating */
+
+ if (lmp_pause_enc_capable(hdev))
+ events[5] |= 0x80; /* Encryption Key Refresh Complete */
+
+ if (lmp_ext_inq_capable(hdev))
+ events[5] |= 0x40; /* Extended Inquiry Result */
+
+ if (lmp_no_flush_capable(hdev))
+ events[7] |= 0x01; /* Enhanced Flush Complete */
+
+ if (lmp_lsto_capable(hdev))
+ events[6] |= 0x80; /* Link Supervision Timeout Changed */
+
+ if (lmp_ssp_capable(hdev)) {
+ events[6] |= 0x01; /* IO Capability Request */
+ events[6] |= 0x02; /* IO Capability Response */
+ events[6] |= 0x04; /* User Confirmation Request */
+ events[6] |= 0x08; /* User Passkey Request */
+ events[6] |= 0x10; /* Remote OOB Data Request */
+ events[6] |= 0x20; /* Simple Pairing Complete */
+ events[7] |= 0x04; /* User Passkey Notification */
+ events[7] |= 0x08; /* Keypress Notification */
+ events[7] |= 0x10; /* Remote Host Supported
+ * Features Notification
+ */
+ }
+
+ if (lmp_le_capable(hdev))
+ events[7] |= 0x20; /* LE Meta-Event */
+
+ hci_req_add(req, HCI_OP_SET_EVENT_MASK, sizeof(events), events);
+
+ if (lmp_le_capable(hdev)) {
+ memset(events, 0, sizeof(events));
+ events[0] = 0x1f;
+ hci_req_add(req, HCI_OP_LE_SET_EVENT_MASK,
+ sizeof(events), events);
+ }
+}
+
+static void hci_init2_req(struct hci_request *req, unsigned long opt)
+{
+ struct hci_dev *hdev = req->hdev;
+
+ if (lmp_bredr_capable(hdev))
+ bredr_setup(req);
+
+ if (lmp_le_capable(hdev))
+ le_setup(req);
+
+ hci_setup_event_mask(req);
+
+ if (hdev->hci_ver > BLUETOOTH_VER_1_1)
+ hci_req_add(req, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL);
+
+ if (lmp_ssp_capable(hdev)) {
+ if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) {
+ u8 mode = 0x01;
+ hci_req_add(req, HCI_OP_WRITE_SSP_MODE,
+ sizeof(mode), &mode);
+ } else {
+ struct hci_cp_write_eir cp;
+
+ memset(hdev->eir, 0, sizeof(hdev->eir));
+ memset(&cp, 0, sizeof(cp));
+
+ hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
+ }
+ }
+
+ if (lmp_inq_rssi_capable(hdev))
+ hci_setup_inquiry_mode(req);
+
+ if (lmp_inq_tx_pwr_capable(hdev))
+ hci_req_add(req, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL);
+
+ if (lmp_ext_feat_capable(hdev)) {
+ struct hci_cp_read_local_ext_features cp;
+
+ cp.page = 0x01;
+ hci_req_add(req, HCI_OP_READ_LOCAL_EXT_FEATURES,
+ sizeof(cp), &cp);
+ }
+
+ if (test_bit(HCI_LINK_SECURITY, &hdev->dev_flags)) {
+ u8 enable = 1;
+ hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, sizeof(enable),
+ &enable);
+ }
+}
+
+static void hci_setup_link_policy(struct hci_request *req)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct hci_cp_write_def_link_policy cp;
+ u16 link_policy = 0;
+
+ if (lmp_rswitch_capable(hdev))
+ link_policy |= HCI_LP_RSWITCH;
+ if (lmp_hold_capable(hdev))
+ link_policy |= HCI_LP_HOLD;
+ if (lmp_sniff_capable(hdev))
+ link_policy |= HCI_LP_SNIFF;
+ if (lmp_park_capable(hdev))
+ link_policy |= HCI_LP_PARK;
+
+ cp.policy = cpu_to_le16(link_policy);
+ hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, sizeof(cp), &cp);
+}
+
+static void hci_set_le_support(struct hci_request *req)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct hci_cp_write_le_host_supported cp;
+
+ /* LE-only devices do not support explicit enablement */
+ if (!lmp_bredr_capable(hdev))
+ return;
+
+ memset(&cp, 0, sizeof(cp));
+
+ if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
+ cp.le = 0x01;
+ cp.simul = lmp_le_br_capable(hdev);
+ }
+
+ if (cp.le != lmp_host_le_capable(hdev))
+ hci_req_add(req, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(cp),
+ &cp);
+}
+
+static void hci_init3_req(struct hci_request *req, unsigned long opt)
+{
+ struct hci_dev *hdev = req->hdev;
+ u8 p;
+
+ /* Only send HCI_Delete_Stored_Link_Key if it is supported */
+ if (hdev->commands[6] & 0x80) {
+ struct hci_cp_delete_stored_link_key cp;
+
+ bacpy(&cp.bdaddr, BDADDR_ANY);
+ cp.delete_all = 0x01;
+ hci_req_add(req, HCI_OP_DELETE_STORED_LINK_KEY,
+ sizeof(cp), &cp);
+ }
+
+ if (hdev->commands[5] & 0x10)
+ hci_setup_link_policy(req);
+
+ if (lmp_le_capable(hdev)) {
+ hci_set_le_support(req);
+ hci_update_ad(req);
+ }
+
+ /* Read features beyond page 1 if available */
+ for (p = 2; p < HCI_MAX_PAGES && p <= hdev->max_page; p++) {
+ struct hci_cp_read_local_ext_features cp;
+
+ cp.page = p;
+ hci_req_add(req, HCI_OP_READ_LOCAL_EXT_FEATURES,
+ sizeof(cp), &cp);
+ }
+}
+
+static int __hci_init(struct hci_dev *hdev)
+{
+ int err;
+
+ err = __hci_req_sync(hdev, hci_init1_req, 0, HCI_INIT_TIMEOUT);
+ if (err < 0)
+ return err;
+
+ /* HCI_BREDR covers both single-mode LE, BR/EDR and dual-mode
+ * BR/EDR/LE type controllers. AMP controllers only need the
+ * first stage init.
+ */
+ if (hdev->dev_type != HCI_BREDR)
+ return 0;
+
+ err = __hci_req_sync(hdev, hci_init2_req, 0, HCI_INIT_TIMEOUT);
+ if (err < 0)
+ return err;
+
+ return __hci_req_sync(hdev, hci_init3_req, 0, HCI_INIT_TIMEOUT);
+}
+
+static void hci_scan_req(struct hci_request *req, unsigned long opt)
{
__u8 scan = opt;
- BT_DBG("%s %x", hdev->name, scan);
+ BT_DBG("%s %x", req->hdev->name, scan);
/* Inquiry and Page scans */
- hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+ hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
}
-static void hci_auth_req(struct hci_dev *hdev, unsigned long opt)
+static void hci_auth_req(struct hci_request *req, unsigned long opt)
{
__u8 auth = opt;
- BT_DBG("%s %x", hdev->name, auth);
+ BT_DBG("%s %x", req->hdev->name, auth);
/* Authentication */
- hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth);
+ hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth);
}
-static void hci_encrypt_req(struct hci_dev *hdev, unsigned long opt)
+static void hci_encrypt_req(struct hci_request *req, unsigned long opt)
{
__u8 encrypt = opt;
- BT_DBG("%s %x", hdev->name, encrypt);
+ BT_DBG("%s %x", req->hdev->name, encrypt);
/* Encryption */
- hci_send_cmd(hdev, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt);
+ hci_req_add(req, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt);
}
-static void hci_linkpol_req(struct hci_dev *hdev, unsigned long opt)
+static void hci_linkpol_req(struct hci_request *req, unsigned long opt)
{
__le16 policy = cpu_to_le16(opt);
- BT_DBG("%s %x", hdev->name, policy);
+ BT_DBG("%s %x", req->hdev->name, policy);
/* Default link policy */
- hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy);
+ hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy);
}
/* Get HCI device by index.
@@ -512,9 +921,10 @@ static int inquiry_cache_dump(struct hci_dev *hdev, int num, __u8 *buf)
return copied;
}
-static void hci_inq_req(struct hci_dev *hdev, unsigned long opt)
+static void hci_inq_req(struct hci_request *req, unsigned long opt)
{
struct hci_inquiry_req *ir = (struct hci_inquiry_req *) opt;
+ struct hci_dev *hdev = req->hdev;
struct hci_cp_inquiry cp;
BT_DBG("%s", hdev->name);
@@ -526,7 +936,13 @@ static void hci_inq_req(struct hci_dev *hdev, unsigned long opt)
memcpy(&cp.lap, &ir->lap, 3);
cp.length = ir->length;
cp.num_rsp = ir->num_rsp;
- hci_send_cmd(hdev, HCI_OP_INQUIRY, sizeof(cp), &cp);
+ hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp);
+}
+
+static int wait_inquiry(void *word)
+{
+ schedule();
+ return signal_pending(current);
}
int hci_inquiry(void __user *arg)
@@ -556,9 +972,17 @@ int hci_inquiry(void __user *arg)
timeo = ir.length * msecs_to_jiffies(2000);
if (do_inquiry) {
- err = hci_request(hdev, hci_inq_req, (unsigned long)&ir, timeo);
+ err = hci_req_sync(hdev, hci_inq_req, (unsigned long) &ir,
+ timeo);
if (err < 0)
goto done;
+
+ /* Wait until Inquiry procedure finishes (HCI_INQUIRY flag is
+ * cleared). If it is interrupted by a signal, return -EINTR.
+ */
+ if (wait_on_bit(&hdev->flags, HCI_INQUIRY, wait_inquiry,
+ TASK_INTERRUPTIBLE))
+ return -EINTR;
}
/* for unlimited number of responses we will use buffer with
@@ -654,39 +1078,29 @@ static u8 create_ad(struct hci_dev *hdev, u8 *ptr)
return ad_len;
}
-int hci_update_ad(struct hci_dev *hdev)
+void hci_update_ad(struct hci_request *req)
{
+ struct hci_dev *hdev = req->hdev;
struct hci_cp_le_set_adv_data cp;
u8 len;
- int err;
-
- hci_dev_lock(hdev);
- if (!lmp_le_capable(hdev)) {
- err = -EINVAL;
- goto unlock;
- }
+ if (!lmp_le_capable(hdev))
+ return;
memset(&cp, 0, sizeof(cp));
len = create_ad(hdev, cp.data);
if (hdev->adv_data_len == len &&
- memcmp(cp.data, hdev->adv_data, len) == 0) {
- err = 0;
- goto unlock;
- }
+ memcmp(cp.data, hdev->adv_data, len) == 0)
+ return;
memcpy(hdev->adv_data, cp.data, sizeof(cp.data));
hdev->adv_data_len = len;
cp.length = len;
- err = hci_send_cmd(hdev, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp);
-
-unlock:
- hci_dev_unlock(hdev);
- return err;
+ hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp);
}
/* ---- HCI ioctl helpers ---- */
@@ -719,34 +1133,37 @@ int hci_dev_open(__u16 dev)
goto done;
}
- if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
- set_bit(HCI_RAW, &hdev->flags);
-
- /* Treat all non BR/EDR controllers as raw devices if
- enable_hs is not set */
- if (hdev->dev_type != HCI_BREDR && !enable_hs)
- set_bit(HCI_RAW, &hdev->flags);
-
if (hdev->open(hdev)) {
ret = -EIO;
goto done;
}
- if (!test_bit(HCI_RAW, &hdev->flags)) {
- atomic_set(&hdev->cmd_cnt, 1);
- set_bit(HCI_INIT, &hdev->flags);
- hdev->init_last_cmd = 0;
+ atomic_set(&hdev->cmd_cnt, 1);
+ set_bit(HCI_INIT, &hdev->flags);
- ret = __hci_request(hdev, hci_init_req, 0, HCI_INIT_TIMEOUT);
+ if (hdev->setup && test_bit(HCI_SETUP, &hdev->dev_flags))
+ ret = hdev->setup(hdev);
- clear_bit(HCI_INIT, &hdev->flags);
+ if (!ret) {
+ /* Treat all non BR/EDR controllers as raw devices if
+ * enable_hs is not set.
+ */
+ if (hdev->dev_type != HCI_BREDR && !enable_hs)
+ set_bit(HCI_RAW, &hdev->flags);
+
+ if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
+ set_bit(HCI_RAW, &hdev->flags);
+
+ if (!test_bit(HCI_RAW, &hdev->flags))
+ ret = __hci_init(hdev);
}
+ clear_bit(HCI_INIT, &hdev->flags);
+
if (!ret) {
hci_dev_hold(hdev);
set_bit(HCI_UP, &hdev->flags);
hci_notify(hdev, HCI_DEV_UP);
- hci_update_ad(hdev);
if (!test_bit(HCI_SETUP, &hdev->dev_flags) &&
mgmt_valid_hdev(hdev)) {
hci_dev_lock(hdev);
@@ -828,7 +1245,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
if (!test_bit(HCI_RAW, &hdev->flags) &&
test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) {
set_bit(HCI_INIT, &hdev->flags);
- __hci_request(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT);
+ __hci_req_sync(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT);
clear_bit(HCI_INIT, &hdev->flags);
}
@@ -847,10 +1264,17 @@ static int hci_dev_do_close(struct hci_dev *hdev)
hdev->sent_cmd = NULL;
}
+ kfree_skb(hdev->recv_evt);
+ hdev->recv_evt = NULL;
+
/* After this point our queues are empty
* and no tasks are scheduled. */
hdev->close(hdev);
+ /* Clear flags */
+ hdev->flags = 0;
+ hdev->dev_flags &= ~HCI_PERSISTENT_MASK;
+
if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags) &&
mgmt_valid_hdev(hdev)) {
hci_dev_lock(hdev);
@@ -858,9 +1282,6 @@ static int hci_dev_do_close(struct hci_dev *hdev)
hci_dev_unlock(hdev);
}
- /* Clear flags */
- hdev->flags = 0;
-
/* Controller radio is available but is currently powered down */
hdev->amp_status = 0;
@@ -921,7 +1342,7 @@ int hci_dev_reset(__u16 dev)
hdev->acl_cnt = 0; hdev->sco_cnt = 0; hdev->le_cnt = 0;
if (!test_bit(HCI_RAW, &hdev->flags))
- ret = __hci_request(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT);
+ ret = __hci_req_sync(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT);
done:
hci_req_unlock(hdev);
@@ -960,8 +1381,8 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
switch (cmd) {
case HCISETAUTH:
- err = hci_request(hdev, hci_auth_req, dr.dev_opt,
- HCI_INIT_TIMEOUT);
+ err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt,
+ HCI_INIT_TIMEOUT);
break;
case HCISETENCRYPT:
@@ -972,24 +1393,24 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
if (!test_bit(HCI_AUTH, &hdev->flags)) {
/* Auth must be enabled first */
- err = hci_request(hdev, hci_auth_req, dr.dev_opt,
- HCI_INIT_TIMEOUT);
+ err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt,
+ HCI_INIT_TIMEOUT);
if (err)
break;
}
- err = hci_request(hdev, hci_encrypt_req, dr.dev_opt,
- HCI_INIT_TIMEOUT);
+ err = hci_req_sync(hdev, hci_encrypt_req, dr.dev_opt,
+ HCI_INIT_TIMEOUT);
break;
case HCISETSCAN:
- err = hci_request(hdev, hci_scan_req, dr.dev_opt,
- HCI_INIT_TIMEOUT);
+ err = hci_req_sync(hdev, hci_scan_req, dr.dev_opt,
+ HCI_INIT_TIMEOUT);
break;
case HCISETLINKPOL:
- err = hci_request(hdev, hci_linkpol_req, dr.dev_opt,
- HCI_INIT_TIMEOUT);
+ err = hci_req_sync(hdev, hci_linkpol_req, dr.dev_opt,
+ HCI_INIT_TIMEOUT);
break;
case HCISETLINKMODE:
@@ -1139,11 +1560,15 @@ static const struct rfkill_ops hci_rfkill_ops = {
static void hci_power_on(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev, power_on);
+ int err;
BT_DBG("%s", hdev->name);
- if (hci_dev_open(hdev->id) < 0)
+ err = hci_dev_open(hdev->id);
+ if (err < 0) {
+ mgmt_set_powered_failed(hdev, err);
return;
+ }
if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags))
queue_delayed_work(hdev->req_workqueue, &hdev->power_off,
@@ -1566,7 +1991,7 @@ int hci_blacklist_del(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
return mgmt_device_unblocked(hdev, bdaddr, type);
}
-static void le_scan_param_req(struct hci_dev *hdev, unsigned long opt)
+static void le_scan_param_req(struct hci_request *req, unsigned long opt)
{
struct le_scan_params *param = (struct le_scan_params *) opt;
struct hci_cp_le_set_scan_param cp;
@@ -1576,18 +2001,18 @@ static void le_scan_param_req(struct hci_dev *hdev, unsigned long opt)
cp.interval = cpu_to_le16(param->interval);
cp.window = cpu_to_le16(param->window);
- hci_send_cmd(hdev, HCI_OP_LE_SET_SCAN_PARAM, sizeof(cp), &cp);
+ hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(cp), &cp);
}
-static void le_scan_enable_req(struct hci_dev *hdev, unsigned long opt)
+static void le_scan_enable_req(struct hci_request *req, unsigned long opt)
{
struct hci_cp_le_set_scan_enable cp;
memset(&cp, 0, sizeof(cp));
- cp.enable = 1;
- cp.filter_dup = 1;
+ cp.enable = LE_SCAN_ENABLE;
+ cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
- hci_send_cmd(hdev, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
+ hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
}
static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval,
@@ -1608,10 +2033,10 @@ static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval,
hci_req_lock(hdev);
- err = __hci_request(hdev, le_scan_param_req, (unsigned long) &param,
- timeo);
+ err = __hci_req_sync(hdev, le_scan_param_req, (unsigned long) &param,
+ timeo);
if (!err)
- err = __hci_request(hdev, le_scan_enable_req, 0, timeo);
+ err = __hci_req_sync(hdev, le_scan_enable_req, 0, timeo);
hci_req_unlock(hdev);
@@ -1619,7 +2044,7 @@ static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval,
return err;
queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable,
- msecs_to_jiffies(timeout));
+ timeout);
return 0;
}
@@ -1729,7 +2154,6 @@ struct hci_dev *hci_alloc_dev(void)
INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off);
INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work);
- skb_queue_head_init(&hdev->driver_init);
skb_queue_head_init(&hdev->rx_q);
skb_queue_head_init(&hdev->cmd_q);
skb_queue_head_init(&hdev->raw_q);
@@ -1748,8 +2172,6 @@ EXPORT_SYMBOL(hci_alloc_dev);
/* Free HCI device */
void hci_free_dev(struct hci_dev *hdev)
{
- skb_queue_purge(&hdev->driver_init);
-
/* will free via device release */
put_device(&hdev->dev);
}
@@ -1789,16 +2211,15 @@ int hci_register_dev(struct hci_dev *hdev)
list_add(&hdev->list, &hci_dev_list);
write_unlock(&hci_dev_list_lock);
- hdev->workqueue = alloc_workqueue(hdev->name, WQ_HIGHPRI | WQ_UNBOUND |
- WQ_MEM_RECLAIM, 1);
+ hdev->workqueue = alloc_workqueue("%s", WQ_HIGHPRI | WQ_UNBOUND |
+ WQ_MEM_RECLAIM, 1, hdev->name);
if (!hdev->workqueue) {
error = -ENOMEM;
goto err;
}
- hdev->req_workqueue = alloc_workqueue(hdev->name,
- WQ_HIGHPRI | WQ_UNBOUND |
- WQ_MEM_RECLAIM, 1);
+ hdev->req_workqueue = alloc_workqueue("%s", WQ_HIGHPRI | WQ_UNBOUND |
+ WQ_MEM_RECLAIM, 1, hdev->name);
if (!hdev->req_workqueue) {
destroy_workqueue(hdev->workqueue);
error = -ENOMEM;
@@ -2160,20 +2581,55 @@ static int hci_send_frame(struct sk_buff *skb)
return hdev->send(skb);
}
-/* Send HCI command */
-int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param)
+void hci_req_init(struct hci_request *req, struct hci_dev *hdev)
+{
+ skb_queue_head_init(&req->cmd_q);
+ req->hdev = hdev;
+ req->err = 0;
+}
+
+int hci_req_run(struct hci_request *req, hci_req_complete_t complete)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct sk_buff *skb;
+ unsigned long flags;
+
+ BT_DBG("length %u", skb_queue_len(&req->cmd_q));
+
+ /* If an error occured during request building, remove all HCI
+ * commands queued on the HCI request queue.
+ */
+ if (req->err) {
+ skb_queue_purge(&req->cmd_q);
+ return req->err;
+ }
+
+ /* Do not allow empty requests */
+ if (skb_queue_empty(&req->cmd_q))
+ return -ENODATA;
+
+ skb = skb_peek_tail(&req->cmd_q);
+ bt_cb(skb)->req.complete = complete;
+
+ spin_lock_irqsave(&hdev->cmd_q.lock, flags);
+ skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q);
+ spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
+
+ queue_work(hdev->workqueue, &hdev->cmd_work);
+
+ return 0;
+}
+
+static struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode,
+ u32 plen, const void *param)
{
int len = HCI_COMMAND_HDR_SIZE + plen;
struct hci_command_hdr *hdr;
struct sk_buff *skb;
- BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen);
-
skb = bt_skb_alloc(len, GFP_ATOMIC);
- if (!skb) {
- BT_ERR("%s no memory for command", hdev->name);
- return -ENOMEM;
- }
+ if (!skb)
+ return NULL;
hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE);
hdr->opcode = cpu_to_le16(opcode);
@@ -2187,8 +2643,27 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param)
bt_cb(skb)->pkt_type = HCI_COMMAND_PKT;
skb->dev = (void *) hdev;
- if (test_bit(HCI_INIT, &hdev->flags))
- hdev->init_last_cmd = opcode;
+ return skb;
+}
+
+/* Send HCI command */
+int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
+ const void *param)
+{
+ struct sk_buff *skb;
+
+ BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen);
+
+ skb = hci_prepare_cmd(hdev, opcode, plen, param);
+ if (!skb) {
+ BT_ERR("%s no memory for command", hdev->name);
+ return -ENOMEM;
+ }
+
+ /* Stand-alone HCI commands must be flaged as
+ * single-command requests.
+ */
+ bt_cb(skb)->req.start = true;
skb_queue_tail(&hdev->cmd_q, skb);
queue_work(hdev->workqueue, &hdev->cmd_work);
@@ -2196,6 +2671,43 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param)
return 0;
}
+/* Queue a command to an asynchronous HCI request */
+void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
+ const void *param, u8 event)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct sk_buff *skb;
+
+ BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen);
+
+ /* If an error occured during request building, there is no point in
+ * queueing the HCI command. We can simply return.
+ */
+ if (req->err)
+ return;
+
+ skb = hci_prepare_cmd(hdev, opcode, plen, param);
+ if (!skb) {
+ BT_ERR("%s no memory for command (opcode 0x%4.4x)",
+ hdev->name, opcode);
+ req->err = -ENOMEM;
+ return;
+ }
+
+ if (skb_queue_empty(&req->cmd_q))
+ bt_cb(skb)->req.start = true;
+
+ bt_cb(skb)->req.event = event;
+
+ skb_queue_tail(&req->cmd_q, skb);
+}
+
+void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
+ const void *param)
+{
+ hci_req_add_ev(req, opcode, plen, param, 0);
+}
+
/* Get data from the previously sent command */
void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode)
{
@@ -2398,7 +2910,7 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type)
if (c->type == type && c->sent) {
BT_ERR("%s killing stalled connection %pMR",
hdev->name, &c->dst);
- hci_acl_disconn(c, HCI_ERROR_REMOTE_USER_TERM);
+ hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM);
}
}
@@ -2860,6 +3372,97 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb)
kfree_skb(skb);
}
+static bool hci_req_is_complete(struct hci_dev *hdev)
+{
+ struct sk_buff *skb;
+
+ skb = skb_peek(&hdev->cmd_q);
+ if (!skb)
+ return true;
+
+ return bt_cb(skb)->req.start;
+}
+
+static void hci_resend_last(struct hci_dev *hdev)
+{
+ struct hci_command_hdr *sent;
+ struct sk_buff *skb;
+ u16 opcode;
+
+ if (!hdev->sent_cmd)
+ return;
+
+ sent = (void *) hdev->sent_cmd->data;
+ opcode = __le16_to_cpu(sent->opcode);
+ if (opcode == HCI_OP_RESET)
+ return;
+
+ skb = skb_clone(hdev->sent_cmd, GFP_KERNEL);
+ if (!skb)
+ return;
+
+ skb_queue_head(&hdev->cmd_q, skb);
+ queue_work(hdev->workqueue, &hdev->cmd_work);
+}
+
+void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status)
+{
+ hci_req_complete_t req_complete = NULL;
+ struct sk_buff *skb;
+ unsigned long flags;
+
+ BT_DBG("opcode 0x%04x status 0x%02x", opcode, status);
+
+ /* If the completed command doesn't match the last one that was
+ * sent we need to do special handling of it.
+ */
+ if (!hci_sent_cmd_data(hdev, opcode)) {
+ /* Some CSR based controllers generate a spontaneous
+ * reset complete event during init and any pending
+ * command will never be completed. In such a case we
+ * need to resend whatever was the last sent
+ * command.
+ */
+ if (test_bit(HCI_INIT, &hdev->flags) && opcode == HCI_OP_RESET)
+ hci_resend_last(hdev);
+
+ return;
+ }
+
+ /* If the command succeeded and there's still more commands in
+ * this request the request is not yet complete.
+ */
+ if (!status && !hci_req_is_complete(hdev))
+ return;
+
+ /* If this was the last command in a request the complete
+ * callback would be found in hdev->sent_cmd instead of the
+ * command queue (hdev->cmd_q).
+ */
+ if (hdev->sent_cmd) {
+ req_complete = bt_cb(hdev->sent_cmd)->req.complete;
+ if (req_complete)
+ goto call_complete;
+ }
+
+ /* Remove all pending commands belonging to this request */
+ spin_lock_irqsave(&hdev->cmd_q.lock, flags);
+ while ((skb = __skb_dequeue(&hdev->cmd_q))) {
+ if (bt_cb(skb)->req.start) {
+ __skb_queue_head(&hdev->cmd_q, skb);
+ break;
+ }
+
+ req_complete = bt_cb(skb)->req.complete;
+ kfree_skb(skb);
+ }
+ spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
+
+call_complete:
+ if (req_complete)
+ req_complete(hdev, status);
+}
+
static void hci_rx_work(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev, rx_work);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 477726a63512..b93cd2eb5d58 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -48,13 +48,13 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb)
}
clear_bit(HCI_INQUIRY, &hdev->flags);
+ smp_mb__after_clear_bit(); /* wake_up_bit advises about this barrier */
+ wake_up_bit(&hdev->flags, HCI_INQUIRY);
hci_dev_lock(hdev);
hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
hci_dev_unlock(hdev);
- hci_req_complete(hdev, HCI_OP_INQUIRY_CANCEL, status);
-
hci_conn_check_pending(hdev);
}
@@ -183,8 +183,6 @@ static void hci_cc_write_def_link_policy(struct hci_dev *hdev,
if (!status)
hdev->link_policy = get_unaligned_le16(sent);
-
- hci_req_complete(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, status);
}
static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb)
@@ -195,11 +193,8 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb)
clear_bit(HCI_RESET, &hdev->flags);
- hci_req_complete(hdev, HCI_OP_RESET, status);
-
/* Reset all non-persistent flags */
- hdev->dev_flags &= ~(BIT(HCI_LE_SCAN) | BIT(HCI_PENDING_CLASS) |
- BIT(HCI_PERIODIC_INQ));
+ hdev->dev_flags &= ~HCI_PERSISTENT_MASK;
hdev->discovery.state = DISCOVERY_STOPPED;
hdev->inq_tx_power = HCI_TX_POWER_INVALID;
@@ -228,11 +223,6 @@ static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb)
memcpy(hdev->dev_name, sent, HCI_MAX_NAME_LENGTH);
hci_dev_unlock(hdev);
-
- if (!status && !test_bit(HCI_INIT, &hdev->flags))
- hci_update_ad(hdev);
-
- hci_req_complete(hdev, HCI_OP_WRITE_LOCAL_NAME, status);
}
static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb)
@@ -270,8 +260,6 @@ static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb)
if (test_bit(HCI_MGMT, &hdev->dev_flags))
mgmt_auth_enable_complete(hdev, status);
-
- hci_req_complete(hdev, HCI_OP_WRITE_AUTH_ENABLE, status);
}
static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb)
@@ -293,8 +281,6 @@ static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb)
else
clear_bit(HCI_ENCRYPT, &hdev->flags);
}
-
- hci_req_complete(hdev, HCI_OP_WRITE_ENCRYPT_MODE, status);
}
static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb)
@@ -343,7 +329,6 @@ static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb)
done:
hci_dev_unlock(hdev);
- hci_req_complete(hdev, HCI_OP_WRITE_SCAN_ENABLE, status);
}
static void hci_cc_read_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb)
@@ -435,15 +420,6 @@ static void hci_cc_write_voice_setting(struct hci_dev *hdev,
hdev->notify(hdev, HCI_NOTIFY_VOICE_SETTING);
}
-static void hci_cc_host_buffer_size(struct hci_dev *hdev, struct sk_buff *skb)
-{
- __u8 status = *((__u8 *) skb->data);
-
- BT_DBG("%s status 0x%2.2x", hdev->name, status);
-
- hci_req_complete(hdev, HCI_OP_HOST_BUFFER_SIZE, status);
-}
-
static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
{
__u8 status = *((__u8 *) skb->data);
@@ -457,9 +433,9 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
if (!status) {
if (sent->mode)
- hdev->host_features[0] |= LMP_HOST_SSP;
+ hdev->features[1][0] |= LMP_HOST_SSP;
else
- hdev->host_features[0] &= ~LMP_HOST_SSP;
+ hdev->features[1][0] &= ~LMP_HOST_SSP;
}
if (test_bit(HCI_MGMT, &hdev->dev_flags))
@@ -472,211 +448,6 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
}
}
-static u8 hci_get_inquiry_mode(struct hci_dev *hdev)
-{
- if (lmp_ext_inq_capable(hdev))
- return 2;
-
- if (lmp_inq_rssi_capable(hdev))
- return 1;
-
- if (hdev->manufacturer == 11 && hdev->hci_rev == 0x00 &&
- hdev->lmp_subver == 0x0757)
- return 1;
-
- if (hdev->manufacturer == 15) {
- if (hdev->hci_rev == 0x03 && hdev->lmp_subver == 0x6963)
- return 1;
- if (hdev->hci_rev == 0x09 && hdev->lmp_subver == 0x6963)
- return 1;
- if (hdev->hci_rev == 0x00 && hdev->lmp_subver == 0x6965)
- return 1;
- }
-
- if (hdev->manufacturer == 31 && hdev->hci_rev == 0x2005 &&
- hdev->lmp_subver == 0x1805)
- return 1;
-
- return 0;
-}
-
-static void hci_setup_inquiry_mode(struct hci_dev *hdev)
-{
- u8 mode;
-
- mode = hci_get_inquiry_mode(hdev);
-
- hci_send_cmd(hdev, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode);
-}
-
-static void hci_setup_event_mask(struct hci_dev *hdev)
-{
- /* The second byte is 0xff instead of 0x9f (two reserved bits
- * disabled) since a Broadcom 1.2 dongle doesn't respond to the
- * command otherwise */
- u8 events[8] = { 0xff, 0xff, 0xfb, 0xff, 0x00, 0x00, 0x00, 0x00 };
-
- /* CSR 1.1 dongles does not accept any bitfield so don't try to set
- * any event mask for pre 1.2 devices */
- if (hdev->hci_ver < BLUETOOTH_VER_1_2)
- return;
-
- if (lmp_bredr_capable(hdev)) {
- events[4] |= 0x01; /* Flow Specification Complete */
- events[4] |= 0x02; /* Inquiry Result with RSSI */
- events[4] |= 0x04; /* Read Remote Extended Features Complete */
- events[5] |= 0x08; /* Synchronous Connection Complete */
- events[5] |= 0x10; /* Synchronous Connection Changed */
- }
-
- if (lmp_inq_rssi_capable(hdev))
- events[4] |= 0x02; /* Inquiry Result with RSSI */
-
- if (lmp_sniffsubr_capable(hdev))
- events[5] |= 0x20; /* Sniff Subrating */
-
- if (lmp_pause_enc_capable(hdev))
- events[5] |= 0x80; /* Encryption Key Refresh Complete */
-
- if (lmp_ext_inq_capable(hdev))
- events[5] |= 0x40; /* Extended Inquiry Result */
-
- if (lmp_no_flush_capable(hdev))
- events[7] |= 0x01; /* Enhanced Flush Complete */
-
- if (lmp_lsto_capable(hdev))
- events[6] |= 0x80; /* Link Supervision Timeout Changed */
-
- if (lmp_ssp_capable(hdev)) {
- events[6] |= 0x01; /* IO Capability Request */
- events[6] |= 0x02; /* IO Capability Response */
- events[6] |= 0x04; /* User Confirmation Request */
- events[6] |= 0x08; /* User Passkey Request */
- events[6] |= 0x10; /* Remote OOB Data Request */
- events[6] |= 0x20; /* Simple Pairing Complete */
- events[7] |= 0x04; /* User Passkey Notification */
- events[7] |= 0x08; /* Keypress Notification */
- events[7] |= 0x10; /* Remote Host Supported
- * Features Notification */
- }
-
- if (lmp_le_capable(hdev))
- events[7] |= 0x20; /* LE Meta-Event */
-
- hci_send_cmd(hdev, HCI_OP_SET_EVENT_MASK, sizeof(events), events);
-
- if (lmp_le_capable(hdev)) {
- memset(events, 0, sizeof(events));
- events[0] = 0x1f;
- hci_send_cmd(hdev, HCI_OP_LE_SET_EVENT_MASK,
- sizeof(events), events);
- }
-}
-
-static void bredr_setup(struct hci_dev *hdev)
-{
- struct hci_cp_delete_stored_link_key cp;
- __le16 param;
- __u8 flt_type;
-
- /* Read Buffer Size (ACL mtu, max pkt, etc.) */
- hci_send_cmd(hdev, HCI_OP_READ_BUFFER_SIZE, 0, NULL);
-
- /* Read Class of Device */
- hci_send_cmd(hdev, HCI_OP_READ_CLASS_OF_DEV, 0, NULL);
-
- /* Read Local Name */
- hci_send_cmd(hdev, HCI_OP_READ_LOCAL_NAME, 0, NULL);
-
- /* Read Voice Setting */
- hci_send_cmd(hdev, HCI_OP_READ_VOICE_SETTING, 0, NULL);
-
- /* Clear Event Filters */
- flt_type = HCI_FLT_CLEAR_ALL;
- hci_send_cmd(hdev, HCI_OP_SET_EVENT_FLT, 1, &flt_type);
-
- /* Connection accept timeout ~20 secs */
- param = __constant_cpu_to_le16(0x7d00);
- hci_send_cmd(hdev, HCI_OP_WRITE_CA_TIMEOUT, 2, &param);
-
- bacpy(&cp.bdaddr, BDADDR_ANY);
- cp.delete_all = 1;
- hci_send_cmd(hdev, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp);
-}
-
-static void le_setup(struct hci_dev *hdev)
-{
- /* Read LE Buffer Size */
- hci_send_cmd(hdev, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL);
-
- /* Read LE Local Supported Features */
- hci_send_cmd(hdev, HCI_OP_LE_READ_LOCAL_FEATURES, 0, NULL);
-
- /* Read LE Advertising Channel TX Power */
- hci_send_cmd(hdev, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL);
-
- /* Read LE White List Size */
- hci_send_cmd(hdev, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL);
-
- /* Read LE Supported States */
- hci_send_cmd(hdev, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL);
-}
-
-static void hci_setup(struct hci_dev *hdev)
-{
- if (hdev->dev_type != HCI_BREDR)
- return;
-
- /* Read BD Address */
- hci_send_cmd(hdev, HCI_OP_READ_BD_ADDR, 0, NULL);
-
- if (lmp_bredr_capable(hdev))
- bredr_setup(hdev);
-
- if (lmp_le_capable(hdev))
- le_setup(hdev);
-
- hci_setup_event_mask(hdev);
-
- if (hdev->hci_ver > BLUETOOTH_VER_1_1)
- hci_send_cmd(hdev, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL);
-
- if (lmp_ssp_capable(hdev)) {
- if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) {
- u8 mode = 0x01;
- hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE,
- sizeof(mode), &mode);
- } else {
- struct hci_cp_write_eir cp;
-
- memset(hdev->eir, 0, sizeof(hdev->eir));
- memset(&cp, 0, sizeof(cp));
-
- hci_send_cmd(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
- }
- }
-
- if (lmp_inq_rssi_capable(hdev))
- hci_setup_inquiry_mode(hdev);
-
- if (lmp_inq_tx_pwr_capable(hdev))
- hci_send_cmd(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL);
-
- if (lmp_ext_feat_capable(hdev)) {
- struct hci_cp_read_local_ext_features cp;
-
- cp.page = 0x01;
- hci_send_cmd(hdev, HCI_OP_READ_LOCAL_EXT_FEATURES, sizeof(cp),
- &cp);
- }
-
- if (test_bit(HCI_LINK_SECURITY, &hdev->dev_flags)) {
- u8 enable = 1;
- hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE, sizeof(enable),
- &enable);
- }
-}
-
static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_rp_read_local_version *rp = (void *) skb->data;
@@ -684,7 +455,7 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
if (rp->status)
- goto done;
+ return;
hdev->hci_ver = rp->hci_ver;
hdev->hci_rev = __le16_to_cpu(rp->hci_rev);
@@ -694,30 +465,6 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
BT_DBG("%s manufacturer 0x%4.4x hci ver %d:%d", hdev->name,
hdev->manufacturer, hdev->hci_ver, hdev->hci_rev);
-
- if (test_bit(HCI_INIT, &hdev->flags))
- hci_setup(hdev);
-
-done:
- hci_req_complete(hdev, HCI_OP_READ_LOCAL_VERSION, rp->status);
-}
-
-static void hci_setup_link_policy(struct hci_dev *hdev)
-{
- struct hci_cp_write_def_link_policy cp;
- u16 link_policy = 0;
-
- if (lmp_rswitch_capable(hdev))
- link_policy |= HCI_LP_RSWITCH;
- if (lmp_hold_capable(hdev))
- link_policy |= HCI_LP_HOLD;
- if (lmp_sniff_capable(hdev))
- link_policy |= HCI_LP_SNIFF;
- if (lmp_park_capable(hdev))
- link_policy |= HCI_LP_PARK;
-
- cp.policy = cpu_to_le16(link_policy);
- hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, sizeof(cp), &cp);
}
static void hci_cc_read_local_commands(struct hci_dev *hdev,
@@ -727,16 +474,8 @@ static void hci_cc_read_local_commands(struct hci_dev *hdev,
BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
- if (rp->status)
- goto done;
-
- memcpy(hdev->commands, rp->commands, sizeof(hdev->commands));
-
- if (test_bit(HCI_INIT, &hdev->flags) && (hdev->commands[5] & 0x10))
- hci_setup_link_policy(hdev);
-
-done:
- hci_req_complete(hdev, HCI_OP_READ_LOCAL_COMMANDS, rp->status);
+ if (!rp->status)
+ memcpy(hdev->commands, rp->commands, sizeof(hdev->commands));
}
static void hci_cc_read_local_features(struct hci_dev *hdev,
@@ -754,18 +493,18 @@ static void hci_cc_read_local_features(struct hci_dev *hdev,
/* Adjust default settings according to features
* supported by device. */
- if (hdev->features[0] & LMP_3SLOT)
+ if (hdev->features[0][0] & LMP_3SLOT)
hdev->pkt_type |= (HCI_DM3 | HCI_DH3);
- if (hdev->features[0] & LMP_5SLOT)
+ if (hdev->features[0][0] & LMP_5SLOT)
hdev->pkt_type |= (HCI_DM5 | HCI_DH5);
- if (hdev->features[1] & LMP_HV2) {
+ if (hdev->features[0][1] & LMP_HV2) {
hdev->pkt_type |= (HCI_HV2);
hdev->esco_type |= (ESCO_HV2);
}
- if (hdev->features[1] & LMP_HV3) {
+ if (hdev->features[0][1] & LMP_HV3) {
hdev->pkt_type |= (HCI_HV3);
hdev->esco_type |= (ESCO_HV3);
}
@@ -773,42 +512,26 @@ static void hci_cc_read_local_features(struct hci_dev *hdev,
if (lmp_esco_capable(hdev))
hdev->esco_type |= (ESCO_EV3);
- if (hdev->features[4] & LMP_EV4)
+ if (hdev->features[0][4] & LMP_EV4)
hdev->esco_type |= (ESCO_EV4);
- if (hdev->features[4] & LMP_EV5)
+ if (hdev->features[0][4] & LMP_EV5)
hdev->esco_type |= (ESCO_EV5);
- if (hdev->features[5] & LMP_EDR_ESCO_2M)
+ if (hdev->features[0][5] & LMP_EDR_ESCO_2M)
hdev->esco_type |= (ESCO_2EV3);
- if (hdev->features[5] & LMP_EDR_ESCO_3M)
+ if (hdev->features[0][5] & LMP_EDR_ESCO_3M)
hdev->esco_type |= (ESCO_3EV3);
- if (hdev->features[5] & LMP_EDR_3S_ESCO)
+ if (hdev->features[0][5] & LMP_EDR_3S_ESCO)
hdev->esco_type |= (ESCO_2EV5 | ESCO_3EV5);
BT_DBG("%s features 0x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x", hdev->name,
- hdev->features[0], hdev->features[1],
- hdev->features[2], hdev->features[3],
- hdev->features[4], hdev->features[5],
- hdev->features[6], hdev->features[7]);
-}
-
-static void hci_set_le_support(struct hci_dev *hdev)
-{
- struct hci_cp_write_le_host_supported cp;
-
- memset(&cp, 0, sizeof(cp));
-
- if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
- cp.le = 1;
- cp.simul = lmp_le_br_capable(hdev);
- }
-
- if (cp.le != lmp_host_le_capable(hdev))
- hci_send_cmd(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(cp),
- &cp);
+ hdev->features[0][0], hdev->features[0][1],
+ hdev->features[0][2], hdev->features[0][3],
+ hdev->features[0][4], hdev->features[0][5],
+ hdev->features[0][6], hdev->features[0][7]);
}
static void hci_cc_read_local_ext_features(struct hci_dev *hdev,
@@ -819,22 +542,12 @@ static void hci_cc_read_local_ext_features(struct hci_dev *hdev,
BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
if (rp->status)
- goto done;
-
- switch (rp->page) {
- case 0:
- memcpy(hdev->features, rp->features, 8);
- break;
- case 1:
- memcpy(hdev->host_features, rp->features, 8);
- break;
- }
+ return;
- if (test_bit(HCI_INIT, &hdev->flags) && lmp_le_capable(hdev))
- hci_set_le_support(hdev);
+ hdev->max_page = rp->max_page;
-done:
- hci_req_complete(hdev, HCI_OP_READ_LOCAL_EXT_FEATURES, rp->status);
+ if (rp->page < HCI_MAX_PAGES)
+ memcpy(hdev->features[rp->page], rp->features, 8);
}
static void hci_cc_read_flow_control_mode(struct hci_dev *hdev,
@@ -844,12 +557,8 @@ static void hci_cc_read_flow_control_mode(struct hci_dev *hdev,
BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
- if (rp->status)
- return;
-
- hdev->flow_ctl_mode = rp->mode;
-
- hci_req_complete(hdev, HCI_OP_READ_FLOW_CONTROL_MODE, rp->status);
+ if (!rp->status)
+ hdev->flow_ctl_mode = rp->mode;
}
static void hci_cc_read_buffer_size(struct hci_dev *hdev, struct sk_buff *skb)
@@ -886,8 +595,65 @@ static void hci_cc_read_bd_addr(struct hci_dev *hdev, struct sk_buff *skb)
if (!rp->status)
bacpy(&hdev->bdaddr, &rp->bdaddr);
+}
+
+static void hci_cc_read_page_scan_activity(struct hci_dev *hdev,
+ struct sk_buff *skb)
+{
+ struct hci_rp_read_page_scan_activity *rp = (void *) skb->data;
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
- hci_req_complete(hdev, HCI_OP_READ_BD_ADDR, rp->status);
+ if (test_bit(HCI_INIT, &hdev->flags) && !rp->status) {
+ hdev->page_scan_interval = __le16_to_cpu(rp->interval);
+ hdev->page_scan_window = __le16_to_cpu(rp->window);
+ }
+}
+
+static void hci_cc_write_page_scan_activity(struct hci_dev *hdev,
+ struct sk_buff *skb)
+{
+ u8 status = *((u8 *) skb->data);
+ struct hci_cp_write_page_scan_activity *sent;
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+ if (status)
+ return;
+
+ sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY);
+ if (!sent)
+ return;
+
+ hdev->page_scan_interval = __le16_to_cpu(sent->interval);
+ hdev->page_scan_window = __le16_to_cpu(sent->window);
+}
+
+static void hci_cc_read_page_scan_type(struct hci_dev *hdev,
+ struct sk_buff *skb)
+{
+ struct hci_rp_read_page_scan_type *rp = (void *) skb->data;
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
+
+ if (test_bit(HCI_INIT, &hdev->flags) && !rp->status)
+ hdev->page_scan_type = rp->type;
+}
+
+static void hci_cc_write_page_scan_type(struct hci_dev *hdev,
+ struct sk_buff *skb)
+{
+ u8 status = *((u8 *) skb->data);
+ u8 *type;
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+ if (status)
+ return;
+
+ type = hci_sent_cmd_data(hdev, HCI_OP_WRITE_PAGE_SCAN_TYPE);
+ if (type)
+ hdev->page_scan_type = *type;
}
static void hci_cc_read_data_block_size(struct hci_dev *hdev,
@@ -908,17 +674,6 @@ static void hci_cc_read_data_block_size(struct hci_dev *hdev,
BT_DBG("%s blk mtu %d cnt %d len %d", hdev->name, hdev->block_mtu,
hdev->block_cnt, hdev->block_len);
-
- hci_req_complete(hdev, HCI_OP_READ_DATA_BLOCK_SIZE, rp->status);
-}
-
-static void hci_cc_write_ca_timeout(struct hci_dev *hdev, struct sk_buff *skb)
-{
- __u8 status = *((__u8 *) skb->data);
-
- BT_DBG("%s status 0x%2.2x", hdev->name, status);
-
- hci_req_complete(hdev, HCI_OP_WRITE_CA_TIMEOUT, status);
}
static void hci_cc_read_local_amp_info(struct hci_dev *hdev,
@@ -942,8 +697,6 @@ static void hci_cc_read_local_amp_info(struct hci_dev *hdev,
hdev->amp_be_flush_to = __le32_to_cpu(rp->be_flush_to);
hdev->amp_max_flush_to = __le32_to_cpu(rp->max_flush_to);
- hci_req_complete(hdev, HCI_OP_READ_LOCAL_AMP_INFO, rp->status);
-
a2mp_rsp:
a2mp_send_getinfo_rsp(hdev);
}
@@ -985,35 +738,6 @@ a2mp_rsp:
a2mp_send_create_phy_link_req(hdev, rp->status);
}
-static void hci_cc_delete_stored_link_key(struct hci_dev *hdev,
- struct sk_buff *skb)
-{
- __u8 status = *((__u8 *) skb->data);
-
- BT_DBG("%s status 0x%2.2x", hdev->name, status);
-
- hci_req_complete(hdev, HCI_OP_DELETE_STORED_LINK_KEY, status);
-}
-
-static void hci_cc_set_event_mask(struct hci_dev *hdev, struct sk_buff *skb)
-{
- __u8 status = *((__u8 *) skb->data);
-
- BT_DBG("%s status 0x%2.2x", hdev->name, status);
-
- hci_req_complete(hdev, HCI_OP_SET_EVENT_MASK, status);
-}
-
-static void hci_cc_write_inquiry_mode(struct hci_dev *hdev,
- struct sk_buff *skb)
-{
- __u8 status = *((__u8 *) skb->data);
-
- BT_DBG("%s status 0x%2.2x", hdev->name, status);
-
- hci_req_complete(hdev, HCI_OP_WRITE_INQUIRY_MODE, status);
-}
-
static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev,
struct sk_buff *skb)
{
@@ -1023,17 +747,6 @@ static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev,
if (!rp->status)
hdev->inq_tx_power = rp->tx_power;
-
- hci_req_complete(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, rp->status);
-}
-
-static void hci_cc_set_event_flt(struct hci_dev *hdev, struct sk_buff *skb)
-{
- __u8 status = *((__u8 *) skb->data);
-
- BT_DBG("%s status 0x%2.2x", hdev->name, status);
-
- hci_req_complete(hdev, HCI_OP_SET_EVENT_FLT, status);
}
static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1095,8 +808,6 @@ static void hci_cc_le_read_buffer_size(struct hci_dev *hdev,
hdev->le_cnt = hdev->le_pkts;
BT_DBG("%s le mtu %d:%d", hdev->name, hdev->le_mtu, hdev->le_pkts);
-
- hci_req_complete(hdev, HCI_OP_LE_READ_BUFFER_SIZE, rp->status);
}
static void hci_cc_le_read_local_features(struct hci_dev *hdev,
@@ -1108,8 +819,6 @@ static void hci_cc_le_read_local_features(struct hci_dev *hdev,
if (!rp->status)
memcpy(hdev->le_features, rp->features, 8);
-
- hci_req_complete(hdev, HCI_OP_LE_READ_LOCAL_FEATURES, rp->status);
}
static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev,
@@ -1119,22 +828,8 @@ static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev,
BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
- if (!rp->status) {
+ if (!rp->status)
hdev->adv_tx_power = rp->tx_power;
- if (!test_bit(HCI_INIT, &hdev->flags))
- hci_update_ad(hdev);
- }
-
- hci_req_complete(hdev, HCI_OP_LE_READ_ADV_TX_POWER, rp->status);
-}
-
-static void hci_cc_le_set_event_mask(struct hci_dev *hdev, struct sk_buff *skb)
-{
- __u8 status = *((__u8 *) skb->data);
-
- BT_DBG("%s status 0x%2.2x", hdev->name, status);
-
- hci_req_complete(hdev, HCI_OP_LE_SET_EVENT_MASK, status);
}
static void hci_cc_user_confirm_reply(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1231,12 +926,15 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb)
clear_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags);
}
- hci_dev_unlock(hdev);
+ if (!test_bit(HCI_INIT, &hdev->flags)) {
+ struct hci_request req;
- if (!test_bit(HCI_INIT, &hdev->flags))
- hci_update_ad(hdev);
+ hci_req_init(&req, hdev);
+ hci_update_ad(&req);
+ hci_req_run(&req, NULL);
+ }
- hci_req_complete(hdev, HCI_OP_LE_SET_ADV_ENABLE, status);
+ hci_dev_unlock(hdev);
}
static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1245,8 +943,6 @@ static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb)
BT_DBG("%s status 0x%2.2x", hdev->name, status);
- hci_req_complete(hdev, HCI_OP_LE_SET_SCAN_PARAM, status);
-
if (status) {
hci_dev_lock(hdev);
mgmt_start_discovery_failed(hdev, status);
@@ -1268,9 +964,7 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
return;
switch (cp->enable) {
- case LE_SCANNING_ENABLED:
- hci_req_complete(hdev, HCI_OP_LE_SET_SCAN_ENABLE, status);
-
+ case LE_SCAN_ENABLE:
if (status) {
hci_dev_lock(hdev);
mgmt_start_discovery_failed(hdev, status);
@@ -1285,7 +979,7 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
hci_dev_unlock(hdev);
break;
- case LE_SCANNING_DISABLED:
+ case LE_SCAN_DISABLE:
if (status) {
hci_dev_lock(hdev);
mgmt_stop_discovery_failed(hdev, status);
@@ -1321,32 +1015,6 @@ static void hci_cc_le_read_white_list_size(struct hci_dev *hdev,
if (!rp->status)
hdev->le_white_list_size = rp->size;
-
- hci_req_complete(hdev, HCI_OP_LE_READ_WHITE_LIST_SIZE, rp->status);
-}
-
-static void hci_cc_le_ltk_reply(struct hci_dev *hdev, struct sk_buff *skb)
-{
- struct hci_rp_le_ltk_reply *rp = (void *) skb->data;
-
- BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
-
- if (rp->status)
- return;
-
- hci_req_complete(hdev, HCI_OP_LE_LTK_REPLY, rp->status);
-}
-
-static void hci_cc_le_ltk_neg_reply(struct hci_dev *hdev, struct sk_buff *skb)
-{
- struct hci_rp_le_ltk_neg_reply *rp = (void *) skb->data;
-
- BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
-
- if (rp->status)
- return;
-
- hci_req_complete(hdev, HCI_OP_LE_LTK_NEG_REPLY, rp->status);
}
static void hci_cc_le_read_supported_states(struct hci_dev *hdev,
@@ -1358,8 +1026,6 @@ static void hci_cc_le_read_supported_states(struct hci_dev *hdev,
if (!rp->status)
memcpy(hdev->le_states, rp->le_states, 8);
-
- hci_req_complete(hdev, HCI_OP_LE_READ_SUPPORTED_STATES, rp->status);
}
static void hci_cc_write_le_host_supported(struct hci_dev *hdev,
@@ -1376,21 +1042,19 @@ static void hci_cc_write_le_host_supported(struct hci_dev *hdev,
if (!status) {
if (sent->le)
- hdev->host_features[0] |= LMP_HOST_LE;
+ hdev->features[1][0] |= LMP_HOST_LE;
else
- hdev->host_features[0] &= ~LMP_HOST_LE;
+ hdev->features[1][0] &= ~LMP_HOST_LE;
if (sent->simul)
- hdev->host_features[0] |= LMP_HOST_LE_BREDR;
+ hdev->features[1][0] |= LMP_HOST_LE_BREDR;
else
- hdev->host_features[0] &= ~LMP_HOST_LE_BREDR;
+ hdev->features[1][0] &= ~LMP_HOST_LE_BREDR;
}
if (test_bit(HCI_MGMT, &hdev->dev_flags) &&
!test_bit(HCI_INIT, &hdev->flags))
mgmt_le_enable_complete(hdev, sent->le, status);
-
- hci_req_complete(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED, status);
}
static void hci_cc_write_remote_amp_assoc(struct hci_dev *hdev,
@@ -1412,7 +1076,6 @@ static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status)
BT_DBG("%s status 0x%2.2x", hdev->name, status);
if (status) {
- hci_req_complete(hdev, HCI_OP_INQUIRY, status);
hci_conn_check_pending(hdev);
hci_dev_lock(hdev);
if (test_bit(HCI_MGMT, &hdev->dev_flags))
@@ -1523,7 +1186,7 @@ static void hci_cs_auth_requested(struct hci_dev *hdev, __u8 status)
if (conn) {
if (conn->state == BT_CONFIG) {
hci_proto_connect_cfm(conn, status);
- hci_conn_put(conn);
+ hci_conn_drop(conn);
}
}
@@ -1550,7 +1213,7 @@ static void hci_cs_set_conn_encrypt(struct hci_dev *hdev, __u8 status)
if (conn) {
if (conn->state == BT_CONFIG) {
hci_proto_connect_cfm(conn, status);
- hci_conn_put(conn);
+ hci_conn_drop(conn);
}
}
@@ -1712,7 +1375,7 @@ static void hci_cs_read_remote_features(struct hci_dev *hdev, __u8 status)
if (conn) {
if (conn->state == BT_CONFIG) {
hci_proto_connect_cfm(conn, status);
- hci_conn_put(conn);
+ hci_conn_drop(conn);
}
}
@@ -1739,7 +1402,7 @@ static void hci_cs_read_remote_ext_features(struct hci_dev *hdev, __u8 status)
if (conn) {
if (conn->state == BT_CONFIG) {
hci_proto_connect_cfm(conn, status);
- hci_conn_put(conn);
+ hci_conn_drop(conn);
}
}
@@ -1884,11 +1547,6 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, __u8 status)
}
}
-static void hci_cs_le_start_enc(struct hci_dev *hdev, u8 status)
-{
- BT_DBG("%s status 0x%2.2x", hdev->name, status);
-}
-
static void hci_cs_create_phylink(struct hci_dev *hdev, u8 status)
{
struct hci_cp_create_phy_link *cp;
@@ -1930,11 +1588,6 @@ static void hci_cs_accept_phylink(struct hci_dev *hdev, u8 status)
amp_write_remote_assoc(hdev, cp->phy_handle);
}
-static void hci_cs_create_logical_link(struct hci_dev *hdev, u8 status)
-{
- BT_DBG("%s status 0x%2.2x", hdev->name, status);
-}
-
static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
__u8 status = *((__u8 *) skb->data);
@@ -1943,13 +1596,14 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
BT_DBG("%s status 0x%2.2x", hdev->name, status);
- hci_req_complete(hdev, HCI_OP_INQUIRY, status);
-
hci_conn_check_pending(hdev);
if (!test_and_clear_bit(HCI_INQUIRY, &hdev->flags))
return;
+ smp_mb__after_clear_bit(); /* wake_up_bit advises about this barrier */
+ wake_up_bit(&hdev->flags, HCI_INQUIRY);
+
if (!test_bit(HCI_MGMT, &hdev->dev_flags))
return;
@@ -2048,7 +1702,6 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
} else
conn->state = BT_CONNECTED;
- hci_conn_hold_device(conn);
hci_conn_add_sysfs(conn);
if (test_bit(HCI_AUTH, &hdev->flags))
@@ -2095,42 +1748,6 @@ unlock:
hci_conn_check_pending(hdev);
}
-void hci_conn_accept(struct hci_conn *conn, int mask)
-{
- struct hci_dev *hdev = conn->hdev;
-
- BT_DBG("conn %p", conn);
-
- conn->state = BT_CONFIG;
-
- if (!lmp_esco_capable(hdev)) {
- struct hci_cp_accept_conn_req cp;
-
- bacpy(&cp.bdaddr, &conn->dst);
-
- if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER))
- cp.role = 0x00; /* Become master */
- else
- cp.role = 0x01; /* Remain slave */
-
- hci_send_cmd(hdev, HCI_OP_ACCEPT_CONN_REQ, sizeof(cp), &cp);
- } else /* lmp_esco_capable(hdev)) */ {
- struct hci_cp_accept_sync_conn_req cp;
-
- bacpy(&cp.bdaddr, &conn->dst);
- cp.pkt_type = cpu_to_le16(conn->pkt_type);
-
- cp.tx_bandwidth = __constant_cpu_to_le32(0x00001f40);
- cp.rx_bandwidth = __constant_cpu_to_le32(0x00001f40);
- cp.max_latency = __constant_cpu_to_le16(0xffff);
- cp.content_format = cpu_to_le16(hdev->voice_setting);
- cp.retrans_effort = 0xff;
-
- hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ,
- sizeof(cp), &cp);
- }
-}
-
static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_ev_conn_request *ev = (void *) skb->data;
@@ -2202,7 +1819,6 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
} else {
conn->state = BT_CONNECT2;
hci_proto_connect_cfm(conn, 0);
- hci_conn_put(conn);
}
} else {
/* Connection rejected */
@@ -2309,14 +1925,14 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
} else {
conn->state = BT_CONNECTED;
hci_proto_connect_cfm(conn, ev->status);
- hci_conn_put(conn);
+ hci_conn_drop(conn);
}
} else {
hci_auth_cfm(conn, ev->status);
hci_conn_hold(conn);
conn->disc_timeout = HCI_DISCONN_TIMEOUT;
- hci_conn_put(conn);
+ hci_conn_drop(conn);
}
if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags)) {
@@ -2399,8 +2015,8 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
if (ev->status && conn->state == BT_CONNECTED) {
- hci_acl_disconn(conn, HCI_ERROR_AUTH_FAILURE);
- hci_conn_put(conn);
+ hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE);
+ hci_conn_drop(conn);
goto unlock;
}
@@ -2409,7 +2025,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
conn->state = BT_CONNECTED;
hci_proto_connect_cfm(conn, ev->status);
- hci_conn_put(conn);
+ hci_conn_drop(conn);
} else
hci_encrypt_cfm(conn, ev->status, ev->encrypt);
}
@@ -2456,7 +2072,7 @@ static void hci_remote_features_evt(struct hci_dev *hdev,
goto unlock;
if (!ev->status)
- memcpy(conn->features, ev->features, 8);
+ memcpy(conn->features[0], ev->features, 8);
if (conn->state != BT_CONFIG)
goto unlock;
@@ -2484,27 +2100,17 @@ static void hci_remote_features_evt(struct hci_dev *hdev,
if (!hci_outgoing_auth_needed(hdev, conn)) {
conn->state = BT_CONNECTED;
hci_proto_connect_cfm(conn, ev->status);
- hci_conn_put(conn);
+ hci_conn_drop(conn);
}
unlock:
hci_dev_unlock(hdev);
}
-static void hci_remote_version_evt(struct hci_dev *hdev, struct sk_buff *skb)
-{
- BT_DBG("%s", hdev->name);
-}
-
-static void hci_qos_setup_complete_evt(struct hci_dev *hdev,
- struct sk_buff *skb)
-{
- BT_DBG("%s", hdev->name);
-}
-
static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_ev_cmd_complete *ev = (void *) skb->data;
+ u8 status = skb->data[sizeof(*ev)];
__u16 opcode;
skb_pull(skb, sizeof(*ev));
@@ -2588,10 +2194,6 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_cc_write_voice_setting(hdev, skb);
break;
- case HCI_OP_HOST_BUFFER_SIZE:
- hci_cc_host_buffer_size(hdev, skb);
- break;
-
case HCI_OP_WRITE_SSP_MODE:
hci_cc_write_ssp_mode(hdev, skb);
break;
@@ -2620,46 +2222,42 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_cc_read_bd_addr(hdev, skb);
break;
- case HCI_OP_READ_DATA_BLOCK_SIZE:
- hci_cc_read_data_block_size(hdev, skb);
+ case HCI_OP_READ_PAGE_SCAN_ACTIVITY:
+ hci_cc_read_page_scan_activity(hdev, skb);
break;
- case HCI_OP_WRITE_CA_TIMEOUT:
- hci_cc_write_ca_timeout(hdev, skb);
+ case HCI_OP_WRITE_PAGE_SCAN_ACTIVITY:
+ hci_cc_write_page_scan_activity(hdev, skb);
break;
- case HCI_OP_READ_FLOW_CONTROL_MODE:
- hci_cc_read_flow_control_mode(hdev, skb);
+ case HCI_OP_READ_PAGE_SCAN_TYPE:
+ hci_cc_read_page_scan_type(hdev, skb);
break;
- case HCI_OP_READ_LOCAL_AMP_INFO:
- hci_cc_read_local_amp_info(hdev, skb);
+ case HCI_OP_WRITE_PAGE_SCAN_TYPE:
+ hci_cc_write_page_scan_type(hdev, skb);
break;
- case HCI_OP_READ_LOCAL_AMP_ASSOC:
- hci_cc_read_local_amp_assoc(hdev, skb);
+ case HCI_OP_READ_DATA_BLOCK_SIZE:
+ hci_cc_read_data_block_size(hdev, skb);
break;
- case HCI_OP_DELETE_STORED_LINK_KEY:
- hci_cc_delete_stored_link_key(hdev, skb);
+ case HCI_OP_READ_FLOW_CONTROL_MODE:
+ hci_cc_read_flow_control_mode(hdev, skb);
break;
- case HCI_OP_SET_EVENT_MASK:
- hci_cc_set_event_mask(hdev, skb);
+ case HCI_OP_READ_LOCAL_AMP_INFO:
+ hci_cc_read_local_amp_info(hdev, skb);
break;
- case HCI_OP_WRITE_INQUIRY_MODE:
- hci_cc_write_inquiry_mode(hdev, skb);
+ case HCI_OP_READ_LOCAL_AMP_ASSOC:
+ hci_cc_read_local_amp_assoc(hdev, skb);
break;
case HCI_OP_READ_INQ_RSP_TX_POWER:
hci_cc_read_inq_rsp_tx_power(hdev, skb);
break;
- case HCI_OP_SET_EVENT_FLT:
- hci_cc_set_event_flt(hdev, skb);
- break;
-
case HCI_OP_PIN_CODE_REPLY:
hci_cc_pin_code_reply(hdev, skb);
break;
@@ -2684,10 +2282,6 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_cc_le_read_adv_tx_power(hdev, skb);
break;
- case HCI_OP_LE_SET_EVENT_MASK:
- hci_cc_le_set_event_mask(hdev, skb);
- break;
-
case HCI_OP_USER_CONFIRM_REPLY:
hci_cc_user_confirm_reply(hdev, skb);
break;
@@ -2720,14 +2314,6 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_cc_le_read_white_list_size(hdev, skb);
break;
- case HCI_OP_LE_LTK_REPLY:
- hci_cc_le_ltk_reply(hdev, skb);
- break;
-
- case HCI_OP_LE_LTK_NEG_REPLY:
- hci_cc_le_ltk_neg_reply(hdev, skb);
- break;
-
case HCI_OP_LE_READ_SUPPORTED_STATES:
hci_cc_le_read_supported_states(hdev, skb);
break;
@@ -2745,9 +2331,11 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
break;
}
- if (ev->opcode != HCI_OP_NOP)
+ if (opcode != HCI_OP_NOP)
del_timer(&hdev->cmd_timer);
+ hci_req_cmd_complete(hdev, opcode, status);
+
if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) {
atomic_set(&hdev->cmd_cnt, 1);
if (!skb_queue_empty(&hdev->cmd_q))
@@ -2817,10 +2405,6 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_cs_le_create_conn(hdev, ev->status);
break;
- case HCI_OP_LE_START_ENC:
- hci_cs_le_start_enc(hdev, ev->status);
- break;
-
case HCI_OP_CREATE_PHY_LINK:
hci_cs_create_phylink(hdev, ev->status);
break;
@@ -2829,18 +2413,18 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_cs_accept_phylink(hdev, ev->status);
break;
- case HCI_OP_CREATE_LOGICAL_LINK:
- hci_cs_create_logical_link(hdev, ev->status);
- break;
-
default:
BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode);
break;
}
- if (ev->opcode != HCI_OP_NOP)
+ if (opcode != HCI_OP_NOP)
del_timer(&hdev->cmd_timer);
+ if (ev->status ||
+ (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req.event))
+ hci_req_cmd_complete(hdev, opcode, ev->status);
+
if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) {
atomic_set(&hdev->cmd_cnt, 1);
if (!skb_queue_empty(&hdev->cmd_q))
@@ -3056,7 +2640,7 @@ static void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
if (conn->state == BT_CONNECTED) {
hci_conn_hold(conn);
conn->disc_timeout = HCI_PAIRING_TIMEOUT;
- hci_conn_put(conn);
+ hci_conn_drop(conn);
}
if (!test_bit(HCI_PAIRABLE, &hdev->dev_flags))
@@ -3159,7 +2743,7 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb)
if (ev->key_type != HCI_LK_CHANGED_COMBINATION)
conn->key_type = ev->key_type;
- hci_conn_put(conn);
+ hci_conn_drop(conn);
}
if (test_bit(HCI_LINK_KEYS, &hdev->dev_flags))
@@ -3300,6 +2884,9 @@ static void hci_remote_ext_features_evt(struct hci_dev *hdev,
if (!conn)
goto unlock;
+ if (ev->page < HCI_MAX_PAGES)
+ memcpy(conn->features[ev->page], ev->features, 8);
+
if (!ev->status && ev->page == 0x01) {
struct inquiry_entry *ie;
@@ -3307,8 +2894,19 @@ static void hci_remote_ext_features_evt(struct hci_dev *hdev,
if (ie)
ie->data.ssp_mode = (ev->features[0] & LMP_HOST_SSP);
- if (ev->features[0] & LMP_HOST_SSP)
+ if (ev->features[0] & LMP_HOST_SSP) {
set_bit(HCI_CONN_SSP_ENABLED, &conn->flags);
+ } else {
+ /* It is mandatory by the Bluetooth specification that
+ * Extended Inquiry Results are only used when Secure
+ * Simple Pairing is enabled, but some devices violate
+ * this.
+ *
+ * To make these devices work, the internal SSP
+ * enabled flag needs to be cleared if the remote host
+ * features do not indicate SSP support */
+ clear_bit(HCI_CONN_SSP_ENABLED, &conn->flags);
+ }
}
if (conn->state != BT_CONFIG)
@@ -3328,7 +2926,7 @@ static void hci_remote_ext_features_evt(struct hci_dev *hdev,
if (!hci_outgoing_auth_needed(hdev, conn)) {
conn->state = BT_CONNECTED;
hci_proto_connect_cfm(conn, ev->status);
- hci_conn_put(conn);
+ hci_conn_drop(conn);
}
unlock:
@@ -3362,7 +2960,6 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
conn->handle = __le16_to_cpu(ev->handle);
conn->state = BT_CONNECTED;
- hci_conn_hold_device(conn);
hci_conn_add_sysfs(conn);
break;
@@ -3391,18 +2988,6 @@ unlock:
hci_dev_unlock(hdev);
}
-static void hci_sync_conn_changed_evt(struct hci_dev *hdev, struct sk_buff *skb)
-{
- BT_DBG("%s", hdev->name);
-}
-
-static void hci_sniff_subrate_evt(struct hci_dev *hdev, struct sk_buff *skb)
-{
- struct hci_ev_sniff_subrate *ev = (void *) skb->data;
-
- BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
-}
-
static void hci_extended_inquiry_result_evt(struct hci_dev *hdev,
struct sk_buff *skb)
{
@@ -3472,8 +3057,8 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev,
clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
if (ev->status && conn->state == BT_CONNECTED) {
- hci_acl_disconn(conn, HCI_ERROR_AUTH_FAILURE);
- hci_conn_put(conn);
+ hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE);
+ hci_conn_drop(conn);
goto unlock;
}
@@ -3482,13 +3067,13 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev,
conn->state = BT_CONNECTED;
hci_proto_connect_cfm(conn, ev->status);
- hci_conn_put(conn);
+ hci_conn_drop(conn);
} else {
hci_auth_cfm(conn, ev->status);
hci_conn_hold(conn);
conn->disc_timeout = HCI_DISCONN_TIMEOUT;
- hci_conn_put(conn);
+ hci_conn_drop(conn);
}
unlock:
@@ -3749,7 +3334,7 @@ static void hci_simple_pair_complete_evt(struct hci_dev *hdev,
mgmt_auth_failed(hdev, &conn->dst, conn->type, conn->dst_type,
ev->status);
- hci_conn_put(conn);
+ hci_conn_drop(conn);
unlock:
hci_dev_unlock(hdev);
@@ -3760,11 +3345,16 @@ static void hci_remote_host_features_evt(struct hci_dev *hdev,
{
struct hci_ev_remote_host_features *ev = (void *) skb->data;
struct inquiry_entry *ie;
+ struct hci_conn *conn;
BT_DBG("%s", hdev->name);
hci_dev_lock(hdev);
+ conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
+ if (conn)
+ memcpy(conn->features[1], ev->features, 8);
+
ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr);
if (ie)
ie->data.ssp_mode = (ev->features[0] & LMP_HOST_SSP);
@@ -3837,9 +3427,8 @@ static void hci_phy_link_complete_evt(struct hci_dev *hdev,
hci_conn_hold(hcon);
hcon->disc_timeout = HCI_DISCONN_TIMEOUT;
- hci_conn_put(hcon);
+ hci_conn_drop(hcon);
- hci_conn_hold_device(hcon);
hci_conn_add_sysfs(hcon);
amp_physical_cfm(bredr_hcon, hcon);
@@ -3973,7 +3562,6 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
conn->handle = __le16_to_cpu(ev->handle);
conn->state = BT_CONNECTED;
- hci_conn_hold_device(conn);
hci_conn_add_sysfs(conn);
hci_proto_connect_cfm(conn, ev->status);
@@ -4087,8 +3675,27 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
struct hci_event_hdr *hdr = (void *) skb->data;
__u8 event = hdr->evt;
+ hci_dev_lock(hdev);
+
+ /* Received events are (currently) only needed when a request is
+ * ongoing so avoid unnecessary memory allocation.
+ */
+ if (hdev->req_status == HCI_REQ_PEND) {
+ kfree_skb(hdev->recv_evt);
+ hdev->recv_evt = skb_clone(skb, GFP_KERNEL);
+ }
+
+ hci_dev_unlock(hdev);
+
skb_pull(skb, HCI_EVENT_HDR_SIZE);
+ if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->req.event == event) {
+ struct hci_command_hdr *hdr = (void *) hdev->sent_cmd->data;
+ u16 opcode = __le16_to_cpu(hdr->opcode);
+
+ hci_req_cmd_complete(hdev, opcode, 0);
+ }
+
switch (event) {
case HCI_EV_INQUIRY_COMPLETE:
hci_inquiry_complete_evt(hdev, skb);
@@ -4130,14 +3737,6 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
hci_remote_features_evt(hdev, skb);
break;
- case HCI_EV_REMOTE_VERSION:
- hci_remote_version_evt(hdev, skb);
- break;
-
- case HCI_EV_QOS_SETUP_COMPLETE:
- hci_qos_setup_complete_evt(hdev, skb);
- break;
-
case HCI_EV_CMD_COMPLETE:
hci_cmd_complete_evt(hdev, skb);
break;
@@ -4194,14 +3793,6 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
hci_sync_conn_complete_evt(hdev, skb);
break;
- case HCI_EV_SYNC_CONN_CHANGED:
- hci_sync_conn_changed_evt(hdev, skb);
- break;
-
- case HCI_EV_SNIFF_SUBRATE:
- hci_sniff_subrate_evt(hdev, skb);
- break;
-
case HCI_EV_EXTENDED_INQUIRY_RESULT:
hci_extended_inquiry_result_evt(hdev, skb);
break;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 6a93614f2c49..9bd7d959e384 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -854,6 +854,11 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
skb_queue_tail(&hdev->raw_q, skb);
queue_work(hdev->workqueue, &hdev->tx_work);
} else {
+ /* Stand-alone HCI commands must be flaged as
+ * single-command requests.
+ */
+ bt_cb(skb)->req.start = true;
+
skb_queue_tail(&hdev->cmd_q, skb);
queue_work(hdev->workqueue, &hdev->cmd_work);
}
@@ -1102,7 +1107,7 @@ int __init hci_sock_init(void)
goto error;
}
- err = bt_procfs_init(THIS_MODULE, &init_net, "hci", &hci_sk_list, NULL);
+ err = bt_procfs_init(&init_net, "hci", &hci_sk_list, NULL);
if (err < 0) {
BT_ERR("Failed to create HCI proc file");
bt_sock_unregister(BTPROTO_HCI);
@@ -1121,8 +1126,6 @@ error:
void hci_sock_cleanup(void)
{
bt_procfs_cleanup(&init_net, "hci");
- if (bt_sock_unregister(BTPROTO_HCI) < 0)
- BT_ERR("HCI socket unregistration failed");
-
+ bt_sock_unregister(BTPROTO_HCI);
proto_unregister(&hci_sk_proto);
}
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 23b4e242a31a..7ad6ecf36f20 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -48,10 +48,10 @@ static ssize_t show_link_features(struct device *dev,
struct hci_conn *conn = to_hci_conn(dev);
return sprintf(buf, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
- conn->features[0], conn->features[1],
- conn->features[2], conn->features[3],
- conn->features[4], conn->features[5],
- conn->features[6], conn->features[7]);
+ conn->features[0][0], conn->features[0][1],
+ conn->features[0][2], conn->features[0][3],
+ conn->features[0][4], conn->features[0][5],
+ conn->features[0][6], conn->features[0][7]);
}
#define LINK_ATTR(_name, _mode, _show, _store) \
@@ -146,7 +146,6 @@ void hci_conn_del_sysfs(struct hci_conn *conn)
}
device_del(&conn->dev);
- put_device(&conn->dev);
hci_dev_put(hdev);
}
@@ -234,10 +233,10 @@ static ssize_t show_features(struct device *dev,
struct hci_dev *hdev = to_hci_dev(dev);
return sprintf(buf, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
- hdev->features[0], hdev->features[1],
- hdev->features[2], hdev->features[3],
- hdev->features[4], hdev->features[5],
- hdev->features[6], hdev->features[7]);
+ hdev->features[0][0], hdev->features[0][1],
+ hdev->features[0][2], hdev->features[0][3],
+ hdev->features[0][4], hdev->features[0][5],
+ hdev->features[0][6], hdev->features[0][7]);
}
static ssize_t show_manufacturer(struct device *dev,
@@ -590,10 +589,8 @@ int __init bt_sysfs_init(void)
bt_debugfs = debugfs_create_dir("bluetooth", NULL);
bt_class = class_create(THIS_MODULE, "bluetooth");
- if (IS_ERR(bt_class))
- return PTR_ERR(bt_class);
- return 0;
+ return PTR_RET(bt_class);
}
void bt_sysfs_cleanup(void)
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index a7352ff3fd1e..46c6a148f0b3 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -1,6 +1,7 @@
/*
HIDP implementation for Linux Bluetooth stack (BlueZ).
Copyright (C) 2003-2004 Marcel Holtmann <marcel@holtmann.org>
+ Copyright (C) 2013 David Herrmann <dh.herrmann@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License version 2 as
@@ -20,6 +21,7 @@
SOFTWARE IS DISCLAIMED.
*/
+#include <linux/kref.h>
#include <linux/module.h>
#include <linux/file.h>
#include <linux/kthread.h>
@@ -59,39 +61,20 @@ static unsigned char hidp_keycode[256] = {
static unsigned char hidp_mkeyspat[] = { 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 };
-static struct hidp_session *__hidp_get_session(bdaddr_t *bdaddr)
-{
- struct hidp_session *session;
-
- BT_DBG("");
-
- list_for_each_entry(session, &hidp_session_list, list) {
- if (!bacmp(bdaddr, &session->bdaddr))
- return session;
- }
-
- return NULL;
-}
-
-static void __hidp_link_session(struct hidp_session *session)
-{
- list_add(&session->list, &hidp_session_list);
-}
-
-static void __hidp_unlink_session(struct hidp_session *session)
-{
- hci_conn_put_device(session->conn);
-
- list_del(&session->list);
-}
+static int hidp_session_probe(struct l2cap_conn *conn,
+ struct l2cap_user *user);
+static void hidp_session_remove(struct l2cap_conn *conn,
+ struct l2cap_user *user);
+static int hidp_session_thread(void *arg);
+static void hidp_session_terminate(struct hidp_session *s);
-static void __hidp_copy_session(struct hidp_session *session, struct hidp_conninfo *ci)
+static void hidp_copy_session(struct hidp_session *session, struct hidp_conninfo *ci)
{
memset(ci, 0, sizeof(*ci));
bacpy(&ci->bdaddr, &session->bdaddr);
ci->flags = session->flags;
- ci->state = session->state;
+ ci->state = BT_CONNECTED;
ci->vendor = 0x0000;
ci->product = 0x0000;
@@ -115,58 +98,80 @@ static void __hidp_copy_session(struct hidp_session *session, struct hidp_connin
}
}
-static int hidp_queue_event(struct hidp_session *session, struct input_dev *dev,
- unsigned int type, unsigned int code, int value)
+/* assemble skb, queue message on @transmit and wake up the session thread */
+static int hidp_send_message(struct hidp_session *session, struct socket *sock,
+ struct sk_buff_head *transmit, unsigned char hdr,
+ const unsigned char *data, int size)
{
- unsigned char newleds;
struct sk_buff *skb;
+ struct sock *sk = sock->sk;
- BT_DBG("session %p type %d code %d value %d", session, type, code, value);
-
- if (type != EV_LED)
- return -1;
-
- newleds = (!!test_bit(LED_KANA, dev->led) << 3) |
- (!!test_bit(LED_COMPOSE, dev->led) << 3) |
- (!!test_bit(LED_SCROLLL, dev->led) << 2) |
- (!!test_bit(LED_CAPSL, dev->led) << 1) |
- (!!test_bit(LED_NUML, dev->led));
-
- if (session->leds == newleds)
- return 0;
+ BT_DBG("session %p data %p size %d", session, data, size);
- session->leds = newleds;
+ if (atomic_read(&session->terminate))
+ return -EIO;
- skb = alloc_skb(3, GFP_ATOMIC);
+ skb = alloc_skb(size + 1, GFP_ATOMIC);
if (!skb) {
BT_ERR("Can't allocate memory for new frame");
return -ENOMEM;
}
- *skb_put(skb, 1) = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT;
- *skb_put(skb, 1) = 0x01;
- *skb_put(skb, 1) = newleds;
-
- skb_queue_tail(&session->intr_transmit, skb);
+ *skb_put(skb, 1) = hdr;
+ if (data && size > 0)
+ memcpy(skb_put(skb, size), data, size);
- hidp_schedule(session);
+ skb_queue_tail(transmit, skb);
+ wake_up_interruptible(sk_sleep(sk));
return 0;
}
-static int hidp_hidinput_event(struct input_dev *dev, unsigned int type, unsigned int code, int value)
+static int hidp_send_ctrl_message(struct hidp_session *session,
+ unsigned char hdr, const unsigned char *data,
+ int size)
{
- struct hid_device *hid = input_get_drvdata(dev);
- struct hidp_session *session = hid->driver_data;
+ return hidp_send_message(session, session->ctrl_sock,
+ &session->ctrl_transmit, hdr, data, size);
+}
- return hidp_queue_event(session, dev, type, code, value);
+static int hidp_send_intr_message(struct hidp_session *session,
+ unsigned char hdr, const unsigned char *data,
+ int size)
+{
+ return hidp_send_message(session, session->intr_sock,
+ &session->intr_transmit, hdr, data, size);
}
-static int hidp_input_event(struct input_dev *dev, unsigned int type, unsigned int code, int value)
+static int hidp_input_event(struct input_dev *dev, unsigned int type,
+ unsigned int code, int value)
{
struct hidp_session *session = input_get_drvdata(dev);
+ unsigned char newleds;
+ unsigned char hdr, data[2];
+
+ BT_DBG("session %p type %d code %d value %d",
+ session, type, code, value);
+
+ if (type != EV_LED)
+ return -1;
- return hidp_queue_event(session, dev, type, code, value);
+ newleds = (!!test_bit(LED_KANA, dev->led) << 3) |
+ (!!test_bit(LED_COMPOSE, dev->led) << 3) |
+ (!!test_bit(LED_SCROLLL, dev->led) << 2) |
+ (!!test_bit(LED_CAPSL, dev->led) << 1) |
+ (!!test_bit(LED_NUML, dev->led));
+
+ if (session->leds == newleds)
+ return 0;
+
+ session->leds = newleds;
+
+ hdr = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT;
+ data[0] = 0x01;
+ data[1] = newleds;
+
+ return hidp_send_intr_message(session, hdr, data, 2);
}
static void hidp_input_report(struct hidp_session *session, struct sk_buff *skb)
@@ -224,71 +229,9 @@ static void hidp_input_report(struct hidp_session *session, struct sk_buff *skb)
input_sync(dev);
}
-static int __hidp_send_ctrl_message(struct hidp_session *session,
- unsigned char hdr, unsigned char *data,
- int size)
-{
- struct sk_buff *skb;
-
- BT_DBG("session %p data %p size %d", session, data, size);
-
- if (atomic_read(&session->terminate))
- return -EIO;
-
- skb = alloc_skb(size + 1, GFP_ATOMIC);
- if (!skb) {
- BT_ERR("Can't allocate memory for new frame");
- return -ENOMEM;
- }
-
- *skb_put(skb, 1) = hdr;
- if (data && size > 0)
- memcpy(skb_put(skb, size), data, size);
-
- skb_queue_tail(&session->ctrl_transmit, skb);
-
- return 0;
-}
-
-static int hidp_send_ctrl_message(struct hidp_session *session,
- unsigned char hdr, unsigned char *data, int size)
-{
- int err;
-
- err = __hidp_send_ctrl_message(session, hdr, data, size);
-
- hidp_schedule(session);
-
- return err;
-}
-
-static int hidp_queue_report(struct hidp_session *session,
- unsigned char *data, int size)
-{
- struct sk_buff *skb;
-
- BT_DBG("session %p hid %p data %p size %d", session, session->hid, data, size);
-
- skb = alloc_skb(size + 1, GFP_ATOMIC);
- if (!skb) {
- BT_ERR("Can't allocate memory for new frame");
- return -ENOMEM;
- }
-
- *skb_put(skb, 1) = 0xa2;
- if (size > 0)
- memcpy(skb_put(skb, size), data, size);
-
- skb_queue_tail(&session->intr_transmit, skb);
-
- hidp_schedule(session);
-
- return 0;
-}
-
static int hidp_send_report(struct hidp_session *session, struct hid_report *report)
{
- unsigned char buf[32];
+ unsigned char buf[32], hdr;
int rsize;
rsize = ((report->size - 1) >> 3) + 1 + (report->id > 0);
@@ -296,8 +239,9 @@ static int hidp_send_report(struct hidp_session *session, struct hid_report *rep
return -EIO;
hid_output_report(report, buf);
+ hdr = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT;
- return hidp_queue_report(session, buf, rsize);
+ return hidp_send_intr_message(session, hdr, buf, rsize);
}
static int hidp_get_raw_report(struct hid_device *hid,
@@ -311,6 +255,9 @@ static int hidp_get_raw_report(struct hid_device *hid,
int numbered_reports = hid->report_enum[report_type].numbered;
int ret;
+ if (atomic_read(&session->terminate))
+ return -EIO;
+
switch (report_type) {
case HID_FEATURE_REPORT:
report_type = HIDP_TRANS_GET_REPORT | HIDP_DATA_RTYPE_FEATURE;
@@ -333,17 +280,19 @@ static int hidp_get_raw_report(struct hid_device *hid,
session->waiting_report_number = numbered_reports ? report_number : -1;
set_bit(HIDP_WAITING_FOR_RETURN, &session->flags);
data[0] = report_number;
- ret = hidp_send_ctrl_message(hid->driver_data, report_type, data, 1);
+ ret = hidp_send_ctrl_message(session, report_type, data, 1);
if (ret)
goto err;
/* Wait for the return of the report. The returned report
gets put in session->report_return. */
- while (test_bit(HIDP_WAITING_FOR_RETURN, &session->flags)) {
+ while (test_bit(HIDP_WAITING_FOR_RETURN, &session->flags) &&
+ !atomic_read(&session->terminate)) {
int res;
res = wait_event_interruptible_timeout(session->report_queue,
- !test_bit(HIDP_WAITING_FOR_RETURN, &session->flags),
+ !test_bit(HIDP_WAITING_FOR_RETURN, &session->flags)
+ || atomic_read(&session->terminate),
5*HZ);
if (res == 0) {
/* timeout */
@@ -386,14 +335,11 @@ static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, s
struct hidp_session *session = hid->driver_data;
int ret;
- switch (report_type) {
- case HID_FEATURE_REPORT:
- report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_FEATURE;
- break;
- case HID_OUTPUT_REPORT:
- report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_OUPUT;
- break;
- default:
+ if (report_type == HID_OUTPUT_REPORT) {
+ report_type = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT;
+ return hidp_send_intr_message(session, report_type,
+ data, count);
+ } else if (report_type != HID_FEATURE_REPORT) {
return -EINVAL;
}
@@ -402,17 +348,19 @@ static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, s
/* Set up our wait, and send the report request to the device. */
set_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags);
- ret = hidp_send_ctrl_message(hid->driver_data, report_type, data,
- count);
+ report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_FEATURE;
+ ret = hidp_send_ctrl_message(session, report_type, data, count);
if (ret)
goto err;
/* Wait for the ACK from the device. */
- while (test_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags)) {
+ while (test_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags) &&
+ !atomic_read(&session->terminate)) {
int res;
res = wait_event_interruptible_timeout(session->report_queue,
- !test_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags),
+ !test_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags)
+ || atomic_read(&session->terminate),
10*HZ);
if (res == 0) {
/* timeout */
@@ -443,8 +391,7 @@ static void hidp_idle_timeout(unsigned long arg)
{
struct hidp_session *session = (struct hidp_session *) arg;
- atomic_inc(&session->terminate);
- wake_up_process(session->task);
+ hidp_session_terminate(session);
}
static void hidp_set_timer(struct hidp_session *session)
@@ -487,12 +434,12 @@ static void hidp_process_handshake(struct hidp_session *session,
case HIDP_HSHK_ERR_FATAL:
/* Device requests a reboot, as this is the only way this error
* can be recovered. */
- __hidp_send_ctrl_message(session,
+ hidp_send_ctrl_message(session,
HIDP_TRANS_HID_CONTROL | HIDP_CTRL_SOFT_RESET, NULL, 0);
break;
default:
- __hidp_send_ctrl_message(session,
+ hidp_send_ctrl_message(session,
HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_INVALID_PARAMETER, NULL, 0);
break;
}
@@ -512,8 +459,7 @@ static void hidp_process_hid_control(struct hidp_session *session,
skb_queue_purge(&session->ctrl_transmit);
skb_queue_purge(&session->intr_transmit);
- atomic_inc(&session->terminate);
- wake_up_process(current);
+ hidp_session_terminate(session);
}
}
@@ -541,7 +487,7 @@ static int hidp_process_data(struct hidp_session *session, struct sk_buff *skb,
break;
default:
- __hidp_send_ctrl_message(session,
+ hidp_send_ctrl_message(session,
HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_INVALID_PARAMETER, NULL, 0);
}
@@ -588,7 +534,7 @@ static void hidp_recv_ctrl_frame(struct hidp_session *session,
break;
default:
- __hidp_send_ctrl_message(session,
+ hidp_send_ctrl_message(session,
HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_UNSUPPORTED_REQUEST, NULL, 0);
break;
}
@@ -639,32 +585,24 @@ static int hidp_send_frame(struct socket *sock, unsigned char *data, int len)
return kernel_sendmsg(sock, &msg, &iv, 1, len);
}
-static void hidp_process_intr_transmit(struct hidp_session *session)
+/* dequeue message from @transmit and send via @sock */
+static void hidp_process_transmit(struct hidp_session *session,
+ struct sk_buff_head *transmit,
+ struct socket *sock)
{
struct sk_buff *skb;
+ int ret;
BT_DBG("session %p", session);
- while ((skb = skb_dequeue(&session->intr_transmit))) {
- if (hidp_send_frame(session->intr_sock, skb->data, skb->len) < 0) {
- skb_queue_head(&session->intr_transmit, skb);
+ while ((skb = skb_dequeue(transmit))) {
+ ret = hidp_send_frame(sock, skb->data, skb->len);
+ if (ret == -EAGAIN) {
+ skb_queue_head(transmit, skb);
break;
- }
-
- hidp_set_timer(session);
- kfree_skb(skb);
- }
-}
-
-static void hidp_process_ctrl_transmit(struct hidp_session *session)
-{
- struct sk_buff *skb;
-
- BT_DBG("session %p", session);
-
- while ((skb = skb_dequeue(&session->ctrl_transmit))) {
- if (hidp_send_frame(session->ctrl_sock, skb->data, skb->len) < 0) {
- skb_queue_head(&session->ctrl_transmit, skb);
+ } else if (ret < 0) {
+ hidp_session_terminate(session);
+ kfree_skb(skb);
break;
}
@@ -673,121 +611,6 @@ static void hidp_process_ctrl_transmit(struct hidp_session *session)
}
}
-static int hidp_session(void *arg)
-{
- struct hidp_session *session = arg;
- struct sock *ctrl_sk = session->ctrl_sock->sk;
- struct sock *intr_sk = session->intr_sock->sk;
- struct sk_buff *skb;
- wait_queue_t ctrl_wait, intr_wait;
-
- BT_DBG("session %p", session);
-
- __module_get(THIS_MODULE);
- set_user_nice(current, -15);
-
- init_waitqueue_entry(&ctrl_wait, current);
- init_waitqueue_entry(&intr_wait, current);
- add_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait);
- add_wait_queue(sk_sleep(intr_sk), &intr_wait);
- session->waiting_for_startup = 0;
- wake_up_interruptible(&session->startup_queue);
- set_current_state(TASK_INTERRUPTIBLE);
- while (!atomic_read(&session->terminate)) {
- if (ctrl_sk->sk_state != BT_CONNECTED ||
- intr_sk->sk_state != BT_CONNECTED)
- break;
-
- while ((skb = skb_dequeue(&intr_sk->sk_receive_queue))) {
- skb_orphan(skb);
- if (!skb_linearize(skb))
- hidp_recv_intr_frame(session, skb);
- else
- kfree_skb(skb);
- }
-
- hidp_process_intr_transmit(session);
-
- while ((skb = skb_dequeue(&ctrl_sk->sk_receive_queue))) {
- skb_orphan(skb);
- if (!skb_linearize(skb))
- hidp_recv_ctrl_frame(session, skb);
- else
- kfree_skb(skb);
- }
-
- hidp_process_ctrl_transmit(session);
-
- schedule();
- set_current_state(TASK_INTERRUPTIBLE);
- }
- set_current_state(TASK_RUNNING);
- remove_wait_queue(sk_sleep(intr_sk), &intr_wait);
- remove_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait);
-
- clear_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags);
- clear_bit(HIDP_WAITING_FOR_RETURN, &session->flags);
- wake_up_interruptible(&session->report_queue);
-
- down_write(&hidp_session_sem);
-
- hidp_del_timer(session);
-
- if (session->input) {
- input_unregister_device(session->input);
- session->input = NULL;
- }
-
- if (session->hid) {
- hid_destroy_device(session->hid);
- session->hid = NULL;
- }
-
- /* Wakeup user-space polling for socket errors */
- session->intr_sock->sk->sk_err = EUNATCH;
- session->ctrl_sock->sk->sk_err = EUNATCH;
-
- hidp_schedule(session);
-
- fput(session->intr_sock->file);
-
- wait_event_timeout(*(sk_sleep(ctrl_sk)),
- (ctrl_sk->sk_state == BT_CLOSED), msecs_to_jiffies(500));
-
- fput(session->ctrl_sock->file);
-
- __hidp_unlink_session(session);
-
- up_write(&hidp_session_sem);
-
- kfree(session->rd_data);
- kfree(session);
- module_put_and_exit(0);
- return 0;
-}
-
-static struct hci_conn *hidp_get_connection(struct hidp_session *session)
-{
- bdaddr_t *src = &bt_sk(session->ctrl_sock->sk)->src;
- bdaddr_t *dst = &bt_sk(session->ctrl_sock->sk)->dst;
- struct hci_conn *conn;
- struct hci_dev *hdev;
-
- hdev = hci_get_route(dst, src);
- if (!hdev)
- return NULL;
-
- hci_dev_lock(hdev);
- conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst);
- if (conn)
- hci_conn_hold_device(conn);
- hci_dev_unlock(hdev);
-
- hci_dev_put(hdev);
-
- return conn;
-}
-
static int hidp_setup_input(struct hidp_session *session,
struct hidp_connadd_req *req)
{
@@ -835,7 +658,7 @@ static int hidp_setup_input(struct hidp_session *session,
input->relbit[0] |= BIT_MASK(REL_WHEEL);
}
- input->dev.parent = &session->conn->dev;
+ input->dev.parent = &session->conn->hcon->dev;
input->event = hidp_input_event;
@@ -894,7 +717,6 @@ static struct hid_ll_driver hidp_hid_driver = {
.stop = hidp_stop,
.open = hidp_open,
.close = hidp_close,
- .hidinput_input_event = hidp_hidinput_event,
};
/* This function sets up the hid device. It does not add it
@@ -939,7 +761,7 @@ static int hidp_setup_hid(struct hidp_session *session,
snprintf(hid->uniq, sizeof(hid->uniq), "%pMR",
&bt_sk(session->ctrl_sock->sk)->dst);
- hid->dev.parent = &session->conn->dev;
+ hid->dev.parent = &session->conn->hcon->dev;
hid->ll_driver = &hidp_hid_driver;
hid->hid_get_raw_report = hidp_get_raw_report;
@@ -961,80 +783,241 @@ fault:
return err;
}
-int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock)
+/* initialize session devices */
+static int hidp_session_dev_init(struct hidp_session *session,
+ struct hidp_connadd_req *req)
{
- struct hidp_session *session, *s;
- int vendor, product;
- int err;
+ int ret;
- BT_DBG("");
+ if (req->rd_size > 0) {
+ ret = hidp_setup_hid(session, req);
+ if (ret && ret != -ENODEV)
+ return ret;
+ }
- if (bacmp(&bt_sk(ctrl_sock->sk)->src, &bt_sk(intr_sock->sk)->src) ||
- bacmp(&bt_sk(ctrl_sock->sk)->dst, &bt_sk(intr_sock->sk)->dst))
- return -ENOTUNIQ;
+ if (!session->hid) {
+ ret = hidp_setup_input(session, req);
+ if (ret < 0)
+ return ret;
+ }
- BT_DBG("rd_data %p rd_size %d", req->rd_data, req->rd_size);
+ return 0;
+}
- down_write(&hidp_session_sem);
+/* destroy session devices */
+static void hidp_session_dev_destroy(struct hidp_session *session)
+{
+ if (session->hid)
+ put_device(&session->hid->dev);
+ else if (session->input)
+ input_put_device(session->input);
- s = __hidp_get_session(&bt_sk(ctrl_sock->sk)->dst);
- if (s && s->state == BT_CONNECTED) {
- up_write(&hidp_session_sem);
- return -EEXIST;
- }
+ kfree(session->rd_data);
+ session->rd_data = NULL;
+}
- session = kzalloc(sizeof(struct hidp_session), GFP_KERNEL);
- if (!session) {
- up_write(&hidp_session_sem);
- return -ENOMEM;
+/* add HID/input devices to their underlying bus systems */
+static int hidp_session_dev_add(struct hidp_session *session)
+{
+ int ret;
+
+ /* Both HID and input systems drop a ref-count when unregistering the
+ * device but they don't take a ref-count when registering them. Work
+ * around this by explicitly taking a refcount during registration
+ * which is dropped automatically by unregistering the devices. */
+
+ if (session->hid) {
+ ret = hid_add_device(session->hid);
+ if (ret)
+ return ret;
+ get_device(&session->hid->dev);
+ } else if (session->input) {
+ ret = input_register_device(session->input);
+ if (ret)
+ return ret;
+ input_get_device(session->input);
}
- bacpy(&session->bdaddr, &bt_sk(ctrl_sock->sk)->dst);
+ return 0;
+}
- session->ctrl_mtu = min_t(uint, l2cap_pi(ctrl_sock->sk)->chan->omtu,
- l2cap_pi(ctrl_sock->sk)->chan->imtu);
- session->intr_mtu = min_t(uint, l2cap_pi(intr_sock->sk)->chan->omtu,
- l2cap_pi(intr_sock->sk)->chan->imtu);
+/* remove HID/input devices from their bus systems */
+static void hidp_session_dev_del(struct hidp_session *session)
+{
+ if (session->hid)
+ hid_destroy_device(session->hid);
+ else if (session->input)
+ input_unregister_device(session->input);
+}
- BT_DBG("ctrl mtu %d intr mtu %d", session->ctrl_mtu, session->intr_mtu);
+/*
+ * Asynchronous device registration
+ * HID device drivers might want to perform I/O during initialization to
+ * detect device types. Therefore, call device registration in a separate
+ * worker so the HIDP thread can schedule I/O operations.
+ * Note that this must be called after the worker thread was initialized
+ * successfully. This will then add the devices and increase session state
+ * on success, otherwise it will terminate the session thread.
+ */
+static void hidp_session_dev_work(struct work_struct *work)
+{
+ struct hidp_session *session = container_of(work,
+ struct hidp_session,
+ dev_init);
+ int ret;
- session->ctrl_sock = ctrl_sock;
- session->intr_sock = intr_sock;
- session->state = BT_CONNECTED;
+ ret = hidp_session_dev_add(session);
+ if (!ret)
+ atomic_inc(&session->state);
+ else
+ hidp_session_terminate(session);
+}
- session->conn = hidp_get_connection(session);
- if (!session->conn) {
- err = -ENOTCONN;
- goto failed;
- }
+/*
+ * Create new session object
+ * Allocate session object, initialize static fields, copy input data into the
+ * object and take a reference to all sub-objects.
+ * This returns 0 on success and puts a pointer to the new session object in
+ * \out. Otherwise, an error code is returned.
+ * The new session object has an initial ref-count of 1.
+ */
+static int hidp_session_new(struct hidp_session **out, const bdaddr_t *bdaddr,
+ struct socket *ctrl_sock,
+ struct socket *intr_sock,
+ struct hidp_connadd_req *req,
+ struct l2cap_conn *conn)
+{
+ struct hidp_session *session;
+ int ret;
+ struct bt_sock *ctrl, *intr;
- setup_timer(&session->timer, hidp_idle_timeout, (unsigned long)session);
+ ctrl = bt_sk(ctrl_sock->sk);
+ intr = bt_sk(intr_sock->sk);
+ session = kzalloc(sizeof(*session), GFP_KERNEL);
+ if (!session)
+ return -ENOMEM;
+
+ /* object and runtime management */
+ kref_init(&session->ref);
+ atomic_set(&session->state, HIDP_SESSION_IDLING);
+ init_waitqueue_head(&session->state_queue);
+ session->flags = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID);
+
+ /* connection management */
+ bacpy(&session->bdaddr, bdaddr);
+ session->conn = conn;
+ session->user.probe = hidp_session_probe;
+ session->user.remove = hidp_session_remove;
+ session->ctrl_sock = ctrl_sock;
+ session->intr_sock = intr_sock;
skb_queue_head_init(&session->ctrl_transmit);
skb_queue_head_init(&session->intr_transmit);
+ session->ctrl_mtu = min_t(uint, l2cap_pi(ctrl)->chan->omtu,
+ l2cap_pi(ctrl)->chan->imtu);
+ session->intr_mtu = min_t(uint, l2cap_pi(intr)->chan->omtu,
+ l2cap_pi(intr)->chan->imtu);
+ session->idle_to = req->idle_to;
+
+ /* device management */
+ INIT_WORK(&session->dev_init, hidp_session_dev_work);
+ setup_timer(&session->timer, hidp_idle_timeout,
+ (unsigned long)session);
+ /* session data */
mutex_init(&session->report_mutex);
init_waitqueue_head(&session->report_queue);
- init_waitqueue_head(&session->startup_queue);
- session->waiting_for_startup = 1;
- session->flags = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID);
- session->idle_to = req->idle_to;
- __hidp_link_session(session);
+ ret = hidp_session_dev_init(session, req);
+ if (ret)
+ goto err_free;
- if (req->rd_size > 0) {
- err = hidp_setup_hid(session, req);
- if (err && err != -ENODEV)
- goto purge;
- }
+ l2cap_conn_get(session->conn);
+ get_file(session->intr_sock->file);
+ get_file(session->ctrl_sock->file);
+ *out = session;
+ return 0;
- if (!session->hid) {
- err = hidp_setup_input(session, req);
- if (err < 0)
- goto purge;
+err_free:
+ kfree(session);
+ return ret;
+}
+
+/* increase ref-count of the given session by one */
+static void hidp_session_get(struct hidp_session *session)
+{
+ kref_get(&session->ref);
+}
+
+/* release callback */
+static void session_free(struct kref *ref)
+{
+ struct hidp_session *session = container_of(ref, struct hidp_session,
+ ref);
+
+ hidp_session_dev_destroy(session);
+ skb_queue_purge(&session->ctrl_transmit);
+ skb_queue_purge(&session->intr_transmit);
+ fput(session->intr_sock->file);
+ fput(session->ctrl_sock->file);
+ l2cap_conn_put(session->conn);
+ kfree(session);
+}
+
+/* decrease ref-count of the given session by one */
+static void hidp_session_put(struct hidp_session *session)
+{
+ kref_put(&session->ref, session_free);
+}
+
+/*
+ * Search the list of active sessions for a session with target address
+ * \bdaddr. You must hold at least a read-lock on \hidp_session_sem. As long as
+ * you do not release this lock, the session objects cannot vanish and you can
+ * safely take a reference to the session yourself.
+ */
+static struct hidp_session *__hidp_session_find(const bdaddr_t *bdaddr)
+{
+ struct hidp_session *session;
+
+ list_for_each_entry(session, &hidp_session_list, list) {
+ if (!bacmp(bdaddr, &session->bdaddr))
+ return session;
}
- hidp_set_timer(session);
+ return NULL;
+}
+
+/*
+ * Same as __hidp_session_find() but no locks must be held. This also takes a
+ * reference of the returned session (if non-NULL) so you must drop this
+ * reference if you no longer use the object.
+ */
+static struct hidp_session *hidp_session_find(const bdaddr_t *bdaddr)
+{
+ struct hidp_session *session;
+
+ down_read(&hidp_session_sem);
+
+ session = __hidp_session_find(bdaddr);
+ if (session)
+ hidp_session_get(session);
+
+ up_read(&hidp_session_sem);
+
+ return session;
+}
+
+/*
+ * Start session synchronously
+ * This starts a session thread and waits until initialization
+ * is done or returns an error if it couldn't be started.
+ * If this returns 0 the session thread is up and running. You must call
+ * hipd_session_stop_sync() before deleting any runtime resources.
+ */
+static int hidp_session_start_sync(struct hidp_session *session)
+{
+ unsigned int vendor, product;
if (session->hid) {
vendor = session->hid->vendor;
@@ -1047,98 +1030,334 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
product = 0x0000;
}
- session->task = kthread_run(hidp_session, session, "khidpd_%04x%04x",
- vendor, product);
- if (IS_ERR(session->task)) {
- err = PTR_ERR(session->task);
- goto unlink;
+ session->task = kthread_run(hidp_session_thread, session,
+ "khidpd_%04x%04x", vendor, product);
+ if (IS_ERR(session->task))
+ return PTR_ERR(session->task);
+
+ while (atomic_read(&session->state) <= HIDP_SESSION_IDLING)
+ wait_event(session->state_queue,
+ atomic_read(&session->state) > HIDP_SESSION_IDLING);
+
+ return 0;
+}
+
+/*
+ * Terminate session thread
+ * Wake up session thread and notify it to stop. This is asynchronous and
+ * returns immediately. Call this whenever a runtime error occurs and you want
+ * the session to stop.
+ * Note: wake_up_process() performs any necessary memory-barriers for us.
+ */
+static void hidp_session_terminate(struct hidp_session *session)
+{
+ atomic_inc(&session->terminate);
+ wake_up_process(session->task);
+}
+
+/*
+ * Probe HIDP session
+ * This is called from the l2cap_conn core when our l2cap_user object is bound
+ * to the hci-connection. We get the session via the \user object and can now
+ * start the session thread, link it into the global session list and
+ * schedule HID/input device registration.
+ * The global session-list owns its own reference to the session object so you
+ * can drop your own reference after registering the l2cap_user object.
+ */
+static int hidp_session_probe(struct l2cap_conn *conn,
+ struct l2cap_user *user)
+{
+ struct hidp_session *session = container_of(user,
+ struct hidp_session,
+ user);
+ struct hidp_session *s;
+ int ret;
+
+ down_write(&hidp_session_sem);
+
+ /* check that no other session for this device exists */
+ s = __hidp_session_find(&session->bdaddr);
+ if (s) {
+ ret = -EEXIST;
+ goto out_unlock;
}
- while (session->waiting_for_startup) {
- wait_event_interruptible(session->startup_queue,
- !session->waiting_for_startup);
+ if (session->input) {
+ ret = hidp_session_dev_add(session);
+ if (ret)
+ goto out_unlock;
}
- if (session->hid)
- err = hid_add_device(session->hid);
+ ret = hidp_session_start_sync(session);
+ if (ret)
+ goto out_del;
+
+ /* HID device registration is async to allow I/O during probe */
+ if (session->input)
+ atomic_inc(&session->state);
else
- err = input_register_device(session->input);
+ schedule_work(&session->dev_init);
- if (err < 0) {
- atomic_inc(&session->terminate);
- wake_up_process(session->task);
- up_write(&hidp_session_sem);
- return err;
- }
+ hidp_session_get(session);
+ list_add(&session->list, &hidp_session_list);
+ ret = 0;
+ goto out_unlock;
- if (session->input) {
- hidp_send_ctrl_message(session,
- HIDP_TRANS_SET_PROTOCOL | HIDP_PROTO_BOOT, NULL, 0);
- session->flags |= (1 << HIDP_BOOT_PROTOCOL_MODE);
+out_del:
+ if (session->input)
+ hidp_session_dev_del(session);
+out_unlock:
+ up_write(&hidp_session_sem);
+ return ret;
+}
- session->leds = 0xff;
- hidp_input_event(session->input, EV_LED, 0, 0);
- }
+/*
+ * Remove HIDP session
+ * Called from the l2cap_conn core when either we explicitly unregistered
+ * the l2cap_user object or if the underlying connection is shut down.
+ * We signal the hidp-session thread to shut down, unregister the HID/input
+ * devices and unlink the session from the global list.
+ * This drops the reference to the session that is owned by the global
+ * session-list.
+ * Note: We _must_ not synchronosly wait for the session-thread to shut down.
+ * This is, because the session-thread might be waiting for an HCI lock that is
+ * held while we are called. Therefore, we only unregister the devices and
+ * notify the session-thread to terminate. The thread itself owns a reference
+ * to the session object so it can safely shut down.
+ */
+static void hidp_session_remove(struct l2cap_conn *conn,
+ struct l2cap_user *user)
+{
+ struct hidp_session *session = container_of(user,
+ struct hidp_session,
+ user);
+
+ down_write(&hidp_session_sem);
+
+ hidp_session_terminate(session);
+
+ cancel_work_sync(&session->dev_init);
+ if (session->input ||
+ atomic_read(&session->state) > HIDP_SESSION_PREPARING)
+ hidp_session_dev_del(session);
+
+ list_del(&session->list);
up_write(&hidp_session_sem);
- return 0;
-unlink:
+ hidp_session_put(session);
+}
+
+/*
+ * Session Worker
+ * This performs the actual main-loop of the HIDP worker. We first check
+ * whether the underlying connection is still alive, then parse all pending
+ * messages and finally send all outstanding messages.
+ */
+static void hidp_session_run(struct hidp_session *session)
+{
+ struct sock *ctrl_sk = session->ctrl_sock->sk;
+ struct sock *intr_sk = session->intr_sock->sk;
+ struct sk_buff *skb;
+
+ for (;;) {
+ /*
+ * This thread can be woken up two ways:
+ * - You call hidp_session_terminate() which sets the
+ * session->terminate flag and wakes this thread up.
+ * - Via modifying the socket state of ctrl/intr_sock. This
+ * thread is woken up by ->sk_state_changed().
+ *
+ * Note: set_current_state() performs any necessary
+ * memory-barriers for us.
+ */
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ if (atomic_read(&session->terminate))
+ break;
+
+ if (ctrl_sk->sk_state != BT_CONNECTED ||
+ intr_sk->sk_state != BT_CONNECTED)
+ break;
+
+ /* parse incoming intr-skbs */
+ while ((skb = skb_dequeue(&intr_sk->sk_receive_queue))) {
+ skb_orphan(skb);
+ if (!skb_linearize(skb))
+ hidp_recv_intr_frame(session, skb);
+ else
+ kfree_skb(skb);
+ }
+
+ /* send pending intr-skbs */
+ hidp_process_transmit(session, &session->intr_transmit,
+ session->intr_sock);
+
+ /* parse incoming ctrl-skbs */
+ while ((skb = skb_dequeue(&ctrl_sk->sk_receive_queue))) {
+ skb_orphan(skb);
+ if (!skb_linearize(skb))
+ hidp_recv_ctrl_frame(session, skb);
+ else
+ kfree_skb(skb);
+ }
+
+ /* send pending ctrl-skbs */
+ hidp_process_transmit(session, &session->ctrl_transmit,
+ session->ctrl_sock);
+
+ schedule();
+ }
+
+ atomic_inc(&session->terminate);
+ set_current_state(TASK_RUNNING);
+}
+
+/*
+ * HIDP session thread
+ * This thread runs the I/O for a single HIDP session. Startup is synchronous
+ * which allows us to take references to ourself here instead of doing that in
+ * the caller.
+ * When we are ready to run we notify the caller and call hidp_session_run().
+ */
+static int hidp_session_thread(void *arg)
+{
+ struct hidp_session *session = arg;
+ wait_queue_t ctrl_wait, intr_wait;
+
+ BT_DBG("session %p", session);
+
+ /* initialize runtime environment */
+ hidp_session_get(session);
+ __module_get(THIS_MODULE);
+ set_user_nice(current, -15);
+ hidp_set_timer(session);
+
+ init_waitqueue_entry(&ctrl_wait, current);
+ init_waitqueue_entry(&intr_wait, current);
+ add_wait_queue(sk_sleep(session->ctrl_sock->sk), &ctrl_wait);
+ add_wait_queue(sk_sleep(session->intr_sock->sk), &intr_wait);
+ /* This memory barrier is paired with wq_has_sleeper(). See
+ * sock_poll_wait() for more information why this is needed. */
+ smp_mb();
+
+ /* notify synchronous startup that we're ready */
+ atomic_inc(&session->state);
+ wake_up(&session->state_queue);
+
+ /* run session */
+ hidp_session_run(session);
+
+ /* cleanup runtime environment */
+ remove_wait_queue(sk_sleep(session->intr_sock->sk), &intr_wait);
+ remove_wait_queue(sk_sleep(session->intr_sock->sk), &ctrl_wait);
+ wake_up_interruptible(&session->report_queue);
hidp_del_timer(session);
- if (session->input) {
- input_unregister_device(session->input);
- session->input = NULL;
+ /*
+ * If we stopped ourself due to any internal signal, we should try to
+ * unregister our own session here to avoid having it linger until the
+ * parent l2cap_conn dies or user-space cleans it up.
+ * This does not deadlock as we don't do any synchronous shutdown.
+ * Instead, this call has the same semantics as if user-space tried to
+ * delete the session.
+ */
+ l2cap_unregister_user(session->conn, &session->user);
+ hidp_session_put(session);
+
+ module_put_and_exit(0);
+ return 0;
+}
+
+static int hidp_verify_sockets(struct socket *ctrl_sock,
+ struct socket *intr_sock)
+{
+ struct bt_sock *ctrl, *intr;
+ struct hidp_session *session;
+
+ if (!l2cap_is_socket(ctrl_sock) || !l2cap_is_socket(intr_sock))
+ return -EINVAL;
+
+ ctrl = bt_sk(ctrl_sock->sk);
+ intr = bt_sk(intr_sock->sk);
+
+ if (bacmp(&ctrl->src, &intr->src) || bacmp(&ctrl->dst, &intr->dst))
+ return -ENOTUNIQ;
+ if (ctrl->sk.sk_state != BT_CONNECTED ||
+ intr->sk.sk_state != BT_CONNECTED)
+ return -EBADFD;
+
+ /* early session check, we check again during session registration */
+ session = hidp_session_find(&ctrl->dst);
+ if (session) {
+ hidp_session_put(session);
+ return -EEXIST;
}
- if (session->hid) {
- hid_destroy_device(session->hid);
- session->hid = NULL;
+ return 0;
+}
+
+int hidp_connection_add(struct hidp_connadd_req *req,
+ struct socket *ctrl_sock,
+ struct socket *intr_sock)
+{
+ struct hidp_session *session;
+ struct l2cap_conn *conn;
+ struct l2cap_chan *chan = l2cap_pi(ctrl_sock->sk)->chan;
+ int ret;
+
+ ret = hidp_verify_sockets(ctrl_sock, intr_sock);
+ if (ret)
+ return ret;
+
+ conn = NULL;
+ l2cap_chan_lock(chan);
+ if (chan->conn) {
+ l2cap_conn_get(chan->conn);
+ conn = chan->conn;
}
+ l2cap_chan_unlock(chan);
- kfree(session->rd_data);
- session->rd_data = NULL;
+ if (!conn)
+ return -EBADFD;
-purge:
- __hidp_unlink_session(session);
+ ret = hidp_session_new(&session, &bt_sk(ctrl_sock->sk)->dst, ctrl_sock,
+ intr_sock, req, conn);
+ if (ret)
+ goto out_conn;
- skb_queue_purge(&session->ctrl_transmit);
- skb_queue_purge(&session->intr_transmit);
+ ret = l2cap_register_user(conn, &session->user);
+ if (ret)
+ goto out_session;
-failed:
- up_write(&hidp_session_sem);
+ ret = 0;
- kfree(session);
- return err;
+out_session:
+ hidp_session_put(session);
+out_conn:
+ l2cap_conn_put(conn);
+ return ret;
}
-int hidp_del_connection(struct hidp_conndel_req *req)
+int hidp_connection_del(struct hidp_conndel_req *req)
{
struct hidp_session *session;
- int err = 0;
- BT_DBG("");
+ session = hidp_session_find(&req->bdaddr);
+ if (!session)
+ return -ENOENT;
- down_read(&hidp_session_sem);
+ if (req->flags & (1 << HIDP_VIRTUAL_CABLE_UNPLUG))
+ hidp_send_ctrl_message(session,
+ HIDP_TRANS_HID_CONTROL |
+ HIDP_CTRL_VIRTUAL_CABLE_UNPLUG,
+ NULL, 0);
+ else
+ l2cap_unregister_user(session->conn, &session->user);
- session = __hidp_get_session(&req->bdaddr);
- if (session) {
- if (req->flags & (1 << HIDP_VIRTUAL_CABLE_UNPLUG)) {
- hidp_send_ctrl_message(session,
- HIDP_TRANS_HID_CONTROL | HIDP_CTRL_VIRTUAL_CABLE_UNPLUG, NULL, 0);
- } else {
- /* Flush the transmit queues */
- skb_queue_purge(&session->ctrl_transmit);
- skb_queue_purge(&session->intr_transmit);
-
- atomic_inc(&session->terminate);
- wake_up_process(session->task);
- }
- } else
- err = -ENOENT;
+ hidp_session_put(session);
- up_read(&hidp_session_sem);
- return err;
+ return 0;
}
int hidp_get_connlist(struct hidp_connlist_req *req)
@@ -1153,7 +1372,7 @@ int hidp_get_connlist(struct hidp_connlist_req *req)
list_for_each_entry(session, &hidp_session_list, list) {
struct hidp_conninfo ci;
- __hidp_copy_session(session, &ci);
+ hidp_copy_session(session, &ci);
if (copy_to_user(req->ci, &ci, sizeof(ci))) {
err = -EFAULT;
@@ -1174,18 +1393,14 @@ int hidp_get_connlist(struct hidp_connlist_req *req)
int hidp_get_conninfo(struct hidp_conninfo *ci)
{
struct hidp_session *session;
- int err = 0;
-
- down_read(&hidp_session_sem);
- session = __hidp_get_session(&ci->bdaddr);
- if (session)
- __hidp_copy_session(session, ci);
- else
- err = -ENOENT;
+ session = hidp_session_find(&ci->bdaddr);
+ if (session) {
+ hidp_copy_session(session, ci);
+ hidp_session_put(session);
+ }
- up_read(&hidp_session_sem);
- return err;
+ return session ? 0 : -ENOENT;
}
static int __init hidp_init(void)
@@ -1204,6 +1419,7 @@ module_init(hidp_init);
module_exit(hidp_exit);
MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
+MODULE_AUTHOR("David Herrmann <dh.herrmann@gmail.com>");
MODULE_DESCRIPTION("Bluetooth HIDP ver " VERSION);
MODULE_VERSION(VERSION);
MODULE_LICENSE("GPL");
diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h
index af1bcc823f26..9e6cc3553105 100644
--- a/net/bluetooth/hidp/hidp.h
+++ b/net/bluetooth/hidp/hidp.h
@@ -24,7 +24,9 @@
#define __HIDP_H
#include <linux/types.h>
+#include <linux/kref.h>
#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/l2cap.h>
/* HIDP header masks */
#define HIDP_HEADER_TRANS_MASK 0xf0
@@ -119,43 +121,54 @@ struct hidp_connlist_req {
struct hidp_conninfo __user *ci;
};
-int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock);
-int hidp_del_connection(struct hidp_conndel_req *req);
+int hidp_connection_add(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock);
+int hidp_connection_del(struct hidp_conndel_req *req);
int hidp_get_connlist(struct hidp_connlist_req *req);
int hidp_get_conninfo(struct hidp_conninfo *ci);
+enum hidp_session_state {
+ HIDP_SESSION_IDLING,
+ HIDP_SESSION_PREPARING,
+ HIDP_SESSION_RUNNING,
+};
+
/* HIDP session defines */
struct hidp_session {
struct list_head list;
+ struct kref ref;
- struct hci_conn *conn;
+ /* runtime management */
+ atomic_t state;
+ wait_queue_head_t state_queue;
+ atomic_t terminate;
+ struct task_struct *task;
+ unsigned long flags;
+ /* connection management */
+ bdaddr_t bdaddr;
+ struct l2cap_conn *conn;
+ struct l2cap_user user;
struct socket *ctrl_sock;
struct socket *intr_sock;
-
- bdaddr_t bdaddr;
-
- unsigned long state;
- unsigned long flags;
- unsigned long idle_to;
-
+ struct sk_buff_head ctrl_transmit;
+ struct sk_buff_head intr_transmit;
uint ctrl_mtu;
uint intr_mtu;
+ unsigned long idle_to;
- atomic_t terminate;
- struct task_struct *task;
-
- unsigned char keys[8];
- unsigned char leds;
-
+ /* device management */
+ struct work_struct dev_init;
struct input_dev *input;
-
struct hid_device *hid;
-
struct timer_list timer;
- struct sk_buff_head ctrl_transmit;
- struct sk_buff_head intr_transmit;
+ /* Report descriptor */
+ __u8 *rd_data;
+ uint rd_size;
+
+ /* session data */
+ unsigned char keys[8];
+ unsigned char leds;
/* Used in hidp_get_raw_report() */
int waiting_report_type; /* HIDP_DATA_RTYPE_* */
@@ -166,24 +179,8 @@ struct hidp_session {
/* Used in hidp_output_raw_report() */
int output_report_success; /* boolean */
-
- /* Report descriptor */
- __u8 *rd_data;
- uint rd_size;
-
- wait_queue_head_t startup_queue;
- int waiting_for_startup;
};
-static inline void hidp_schedule(struct hidp_session *session)
-{
- struct sock *ctrl_sk = session->ctrl_sock->sk;
- struct sock *intr_sk = session->intr_sock->sk;
-
- wake_up_interruptible(sk_sleep(ctrl_sk));
- wake_up_interruptible(sk_sleep(intr_sk));
-}
-
/* HIDP init defines */
extern int __init hidp_init_sockets(void);
extern void __exit hidp_cleanup_sockets(void);
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index 82a829d90b0f..cb3fdde1968a 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -77,21 +77,12 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
return err;
}
- if (csock->sk->sk_state != BT_CONNECTED ||
- isock->sk->sk_state != BT_CONNECTED) {
- sockfd_put(csock);
- sockfd_put(isock);
- return -EBADFD;
- }
+ err = hidp_connection_add(&ca, csock, isock);
+ if (!err && copy_to_user(argp, &ca, sizeof(ca)))
+ err = -EFAULT;
- err = hidp_add_connection(&ca, csock, isock);
- if (!err) {
- if (copy_to_user(argp, &ca, sizeof(ca)))
- err = -EFAULT;
- } else {
- sockfd_put(csock);
- sockfd_put(isock);
- }
+ sockfd_put(csock);
+ sockfd_put(isock);
return err;
@@ -102,7 +93,7 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
if (copy_from_user(&cd, argp, sizeof(cd)))
return -EFAULT;
- return hidp_del_connection(&cd);
+ return hidp_connection_del(&cd);
case HIDPGETCONNLIST:
if (copy_from_user(&cl, argp, sizeof(cl)))
@@ -284,7 +275,7 @@ int __init hidp_init_sockets(void)
goto error;
}
- err = bt_procfs_init(THIS_MODULE, &init_net, "hidp", &hidp_sk_list, NULL);
+ err = bt_procfs_init(&init_net, "hidp", &hidp_sk_list, NULL);
if (err < 0) {
BT_ERR("Failed to create HIDP proc file");
bt_sock_unregister(BTPROTO_HIDP);
@@ -296,7 +287,6 @@ int __init hidp_init_sockets(void)
return 0;
error:
- BT_ERR("Can't register HIDP socket");
proto_unregister(&hidp_proto);
return err;
}
@@ -304,8 +294,6 @@ error:
void __exit hidp_cleanup_sockets(void)
{
bt_procfs_cleanup(&init_net, "hidp");
- if (bt_sock_unregister(BTPROTO_HIDP) < 0)
- BT_ERR("Can't unregister HIDP socket");
-
+ bt_sock_unregister(BTPROTO_HIDP);
proto_unregister(&hidp_proto);
}
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 7c7e9321f1ea..68843a28a7af 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -571,7 +571,7 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err)
chan->conn = NULL;
if (chan->chan_type != L2CAP_CHAN_CONN_FIX_A2MP)
- hci_conn_put(conn->hcon);
+ hci_conn_drop(conn->hcon);
if (mgr && mgr->bredr_chan == chan)
mgr->bredr_chan = NULL;
@@ -1446,6 +1446,89 @@ static void l2cap_info_timeout(struct work_struct *work)
l2cap_conn_start(conn);
}
+/*
+ * l2cap_user
+ * External modules can register l2cap_user objects on l2cap_conn. The ->probe
+ * callback is called during registration. The ->remove callback is called
+ * during unregistration.
+ * An l2cap_user object can either be explicitly unregistered or when the
+ * underlying l2cap_conn object is deleted. This guarantees that l2cap->hcon,
+ * l2cap->hchan, .. are valid as long as the remove callback hasn't been called.
+ * External modules must own a reference to the l2cap_conn object if they intend
+ * to call l2cap_unregister_user(). The l2cap_conn object might get destroyed at
+ * any time if they don't.
+ */
+
+int l2cap_register_user(struct l2cap_conn *conn, struct l2cap_user *user)
+{
+ struct hci_dev *hdev = conn->hcon->hdev;
+ int ret;
+
+ /* We need to check whether l2cap_conn is registered. If it is not, we
+ * must not register the l2cap_user. l2cap_conn_del() is unregisters
+ * l2cap_conn objects, but doesn't provide its own locking. Instead, it
+ * relies on the parent hci_conn object to be locked. This itself relies
+ * on the hci_dev object to be locked. So we must lock the hci device
+ * here, too. */
+
+ hci_dev_lock(hdev);
+
+ if (user->list.next || user->list.prev) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ /* conn->hchan is NULL after l2cap_conn_del() was called */
+ if (!conn->hchan) {
+ ret = -ENODEV;
+ goto out_unlock;
+ }
+
+ ret = user->probe(conn, user);
+ if (ret)
+ goto out_unlock;
+
+ list_add(&user->list, &conn->users);
+ ret = 0;
+
+out_unlock:
+ hci_dev_unlock(hdev);
+ return ret;
+}
+EXPORT_SYMBOL(l2cap_register_user);
+
+void l2cap_unregister_user(struct l2cap_conn *conn, struct l2cap_user *user)
+{
+ struct hci_dev *hdev = conn->hcon->hdev;
+
+ hci_dev_lock(hdev);
+
+ if (!user->list.next || !user->list.prev)
+ goto out_unlock;
+
+ list_del(&user->list);
+ user->list.next = NULL;
+ user->list.prev = NULL;
+ user->remove(conn, user);
+
+out_unlock:
+ hci_dev_unlock(hdev);
+}
+EXPORT_SYMBOL(l2cap_unregister_user);
+
+static void l2cap_unregister_all_users(struct l2cap_conn *conn)
+{
+ struct l2cap_user *user;
+
+ while (!list_empty(&conn->users)) {
+ user = list_first_entry(&conn->users, struct l2cap_user, list);
+ list_del(&user->list);
+ user->list.next = NULL;
+ user->list.prev = NULL;
+ user->remove(conn, user);
+ }
+}
+
static void l2cap_conn_del(struct hci_conn *hcon, int err)
{
struct l2cap_conn *conn = hcon->l2cap_data;
@@ -1458,6 +1541,8 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err)
kfree_skb(conn->rx_skb);
+ l2cap_unregister_all_users(conn);
+
mutex_lock(&conn->chan_lock);
/* Kill channels */
@@ -1486,7 +1571,8 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err)
}
hcon->l2cap_data = NULL;
- kfree(conn);
+ conn->hchan = NULL;
+ l2cap_conn_put(conn);
}
static void security_timeout(struct work_struct *work)
@@ -1502,12 +1588,12 @@ static void security_timeout(struct work_struct *work)
}
}
-static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
+static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon)
{
struct l2cap_conn *conn = hcon->l2cap_data;
struct hci_chan *hchan;
- if (conn || status)
+ if (conn)
return conn;
hchan = hci_chan_create(hcon);
@@ -1520,8 +1606,10 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
return NULL;
}
+ kref_init(&conn->ref);
hcon->l2cap_data = conn;
conn->hcon = hcon;
+ hci_conn_get(conn->hcon);
conn->hchan = hchan;
BT_DBG("hcon %p conn %p hchan %p", hcon, conn, hchan);
@@ -1547,6 +1635,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
mutex_init(&conn->chan_lock);
INIT_LIST_HEAD(&conn->chan_l);
+ INIT_LIST_HEAD(&conn->users);
if (hcon->type == LE_LINK)
INIT_DELAYED_WORK(&conn->security_timer, security_timeout);
@@ -1558,6 +1647,26 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
return conn;
}
+static void l2cap_conn_free(struct kref *ref)
+{
+ struct l2cap_conn *conn = container_of(ref, struct l2cap_conn, ref);
+
+ hci_conn_put(conn->hcon);
+ kfree(conn);
+}
+
+void l2cap_conn_get(struct l2cap_conn *conn)
+{
+ kref_get(&conn->ref);
+}
+EXPORT_SYMBOL(l2cap_conn_get);
+
+void l2cap_conn_put(struct l2cap_conn *conn)
+{
+ kref_put(&conn->ref, l2cap_conn_free);
+}
+EXPORT_SYMBOL(l2cap_conn_put);
+
/* ---- Socket interface ---- */
/* Find socket with psm and source / destination bdaddr.
@@ -1695,9 +1804,9 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
goto done;
}
- conn = l2cap_conn_add(hcon, 0);
+ conn = l2cap_conn_add(hcon);
if (!conn) {
- hci_conn_put(hcon);
+ hci_conn_drop(hcon);
err = -ENOMEM;
goto done;
}
@@ -1707,7 +1816,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
if (!list_empty(&conn->chan_l)) {
err = -EBUSY;
- hci_conn_put(hcon);
+ hci_conn_drop(hcon);
}
if (err)
@@ -2743,6 +2852,9 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, u8 code,
BT_DBG("conn %p, code 0x%2.2x, ident 0x%2.2x, len %u",
conn, code, ident, dlen);
+ if (conn->mtu < L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE)
+ return NULL;
+
len = L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE + dlen;
count = min_t(unsigned int, conn->mtu, len);
@@ -3568,10 +3680,14 @@ static void l2cap_conf_rfc_get(struct l2cap_chan *chan, void *rsp, int len)
}
static inline int l2cap_command_rej(struct l2cap_conn *conn,
- struct l2cap_cmd_hdr *cmd, u8 *data)
+ struct l2cap_cmd_hdr *cmd, u16 cmd_len,
+ u8 *data)
{
struct l2cap_cmd_rej_unk *rej = (struct l2cap_cmd_rej_unk *) data;
+ if (cmd_len < sizeof(*rej))
+ return -EPROTO;
+
if (rej->reason != L2CAP_REJ_NOT_UNDERSTOOD)
return 0;
@@ -3720,11 +3836,14 @@ sendresp:
}
static int l2cap_connect_req(struct l2cap_conn *conn,
- struct l2cap_cmd_hdr *cmd, u8 *data)
+ struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data)
{
struct hci_dev *hdev = conn->hcon->hdev;
struct hci_conn *hcon = conn->hcon;
+ if (cmd_len < sizeof(struct l2cap_conn_req))
+ return -EPROTO;
+
hci_dev_lock(hdev);
if (test_bit(HCI_MGMT, &hdev->dev_flags) &&
!test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &hcon->flags))
@@ -3738,7 +3857,8 @@ static int l2cap_connect_req(struct l2cap_conn *conn,
}
static int l2cap_connect_create_rsp(struct l2cap_conn *conn,
- struct l2cap_cmd_hdr *cmd, u8 *data)
+ struct l2cap_cmd_hdr *cmd, u16 cmd_len,
+ u8 *data)
{
struct l2cap_conn_rsp *rsp = (struct l2cap_conn_rsp *) data;
u16 scid, dcid, result, status;
@@ -3746,6 +3866,9 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn,
u8 req[128];
int err;
+ if (cmd_len < sizeof(*rsp))
+ return -EPROTO;
+
scid = __le16_to_cpu(rsp->scid);
dcid = __le16_to_cpu(rsp->dcid);
result = __le16_to_cpu(rsp->result);
@@ -3843,6 +3966,9 @@ static inline int l2cap_config_req(struct l2cap_conn *conn,
struct l2cap_chan *chan;
int len, err = 0;
+ if (cmd_len < sizeof(*req))
+ return -EPROTO;
+
dcid = __le16_to_cpu(req->dcid);
flags = __le16_to_cpu(req->flags);
@@ -3866,7 +3992,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn,
/* Reject if config buffer is too small. */
len = cmd_len - sizeof(*req);
- if (len < 0 || chan->conf_len + len > sizeof(chan->conf_req)) {
+ if (chan->conf_len + len > sizeof(chan->conf_req)) {
l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP,
l2cap_build_conf_rsp(chan, rsp,
L2CAP_CONF_REJECT, flags), rsp);
@@ -3944,14 +4070,18 @@ unlock:
}
static inline int l2cap_config_rsp(struct l2cap_conn *conn,
- struct l2cap_cmd_hdr *cmd, u8 *data)
+ struct l2cap_cmd_hdr *cmd, u16 cmd_len,
+ u8 *data)
{
struct l2cap_conf_rsp *rsp = (struct l2cap_conf_rsp *)data;
u16 scid, flags, result;
struct l2cap_chan *chan;
- int len = le16_to_cpu(cmd->len) - sizeof(*rsp);
+ int len = cmd_len - sizeof(*rsp);
int err = 0;
+ if (cmd_len < sizeof(*rsp))
+ return -EPROTO;
+
scid = __le16_to_cpu(rsp->scid);
flags = __le16_to_cpu(rsp->flags);
result = __le16_to_cpu(rsp->result);
@@ -4052,7 +4182,8 @@ done:
}
static inline int l2cap_disconnect_req(struct l2cap_conn *conn,
- struct l2cap_cmd_hdr *cmd, u8 *data)
+ struct l2cap_cmd_hdr *cmd, u16 cmd_len,
+ u8 *data)
{
struct l2cap_disconn_req *req = (struct l2cap_disconn_req *) data;
struct l2cap_disconn_rsp rsp;
@@ -4060,6 +4191,9 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn,
struct l2cap_chan *chan;
struct sock *sk;
+ if (cmd_len != sizeof(*req))
+ return -EPROTO;
+
scid = __le16_to_cpu(req->scid);
dcid = __le16_to_cpu(req->dcid);
@@ -4099,12 +4233,16 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn,
}
static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn,
- struct l2cap_cmd_hdr *cmd, u8 *data)
+ struct l2cap_cmd_hdr *cmd, u16 cmd_len,
+ u8 *data)
{
struct l2cap_disconn_rsp *rsp = (struct l2cap_disconn_rsp *) data;
u16 dcid, scid;
struct l2cap_chan *chan;
+ if (cmd_len != sizeof(*rsp))
+ return -EPROTO;
+
scid = __le16_to_cpu(rsp->scid);
dcid = __le16_to_cpu(rsp->dcid);
@@ -4134,11 +4272,15 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn,
}
static inline int l2cap_information_req(struct l2cap_conn *conn,
- struct l2cap_cmd_hdr *cmd, u8 *data)
+ struct l2cap_cmd_hdr *cmd, u16 cmd_len,
+ u8 *data)
{
struct l2cap_info_req *req = (struct l2cap_info_req *) data;
u16 type;
+ if (cmd_len != sizeof(*req))
+ return -EPROTO;
+
type = __le16_to_cpu(req->type);
BT_DBG("type 0x%4.4x", type);
@@ -4185,11 +4327,15 @@ static inline int l2cap_information_req(struct l2cap_conn *conn,
}
static inline int l2cap_information_rsp(struct l2cap_conn *conn,
- struct l2cap_cmd_hdr *cmd, u8 *data)
+ struct l2cap_cmd_hdr *cmd, u16 cmd_len,
+ u8 *data)
{
struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) data;
u16 type, result;
+ if (cmd_len < sizeof(*rsp))
+ return -EPROTO;
+
type = __le16_to_cpu(rsp->type);
result = __le16_to_cpu(rsp->result);
@@ -5055,16 +5201,16 @@ static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn,
switch (cmd->code) {
case L2CAP_COMMAND_REJ:
- l2cap_command_rej(conn, cmd, data);
+ l2cap_command_rej(conn, cmd, cmd_len, data);
break;
case L2CAP_CONN_REQ:
- err = l2cap_connect_req(conn, cmd, data);
+ err = l2cap_connect_req(conn, cmd, cmd_len, data);
break;
case L2CAP_CONN_RSP:
case L2CAP_CREATE_CHAN_RSP:
- err = l2cap_connect_create_rsp(conn, cmd, data);
+ err = l2cap_connect_create_rsp(conn, cmd, cmd_len, data);
break;
case L2CAP_CONF_REQ:
@@ -5072,15 +5218,15 @@ static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn,
break;
case L2CAP_CONF_RSP:
- err = l2cap_config_rsp(conn, cmd, data);
+ err = l2cap_config_rsp(conn, cmd, cmd_len, data);
break;
case L2CAP_DISCONN_REQ:
- err = l2cap_disconnect_req(conn, cmd, data);
+ err = l2cap_disconnect_req(conn, cmd, cmd_len, data);
break;
case L2CAP_DISCONN_RSP:
- err = l2cap_disconnect_rsp(conn, cmd, data);
+ err = l2cap_disconnect_rsp(conn, cmd, cmd_len, data);
break;
case L2CAP_ECHO_REQ:
@@ -5091,11 +5237,11 @@ static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn,
break;
case L2CAP_INFO_REQ:
- err = l2cap_information_req(conn, cmd, data);
+ err = l2cap_information_req(conn, cmd, cmd_len, data);
break;
case L2CAP_INFO_RSP:
- err = l2cap_information_rsp(conn, cmd, data);
+ err = l2cap_information_rsp(conn, cmd, cmd_len, data);
break;
case L2CAP_CREATE_CHAN_REQ:
@@ -6205,12 +6351,13 @@ drop:
kfree_skb(skb);
}
-static void l2cap_att_channel(struct l2cap_conn *conn, u16 cid,
+static void l2cap_att_channel(struct l2cap_conn *conn,
struct sk_buff *skb)
{
struct l2cap_chan *chan;
- chan = l2cap_global_chan_by_scid(0, cid, conn->src, conn->dst);
+ chan = l2cap_global_chan_by_scid(0, L2CAP_CID_LE_DATA,
+ conn->src, conn->dst);
if (!chan)
goto drop;
@@ -6259,7 +6406,7 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb)
break;
case L2CAP_CID_LE_DATA:
- l2cap_att_channel(conn, cid, skb);
+ l2cap_att_channel(conn, skb);
break;
case L2CAP_CID_SMP:
@@ -6313,7 +6460,7 @@ void l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status);
if (!status) {
- conn = l2cap_conn_add(hcon, status);
+ conn = l2cap_conn_add(hcon);
if (conn)
l2cap_conn_ready(conn);
} else {
@@ -6482,7 +6629,7 @@ int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
goto drop;
if (!conn)
- conn = l2cap_conn_add(hcon, 0);
+ conn = l2cap_conn_add(hcon);
if (!conn)
goto drop;
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 1bcfb8422fdc..36fed40c162c 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -43,6 +43,12 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent);
static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock,
int proto, gfp_t prio);
+bool l2cap_is_socket(struct socket *sock)
+{
+ return sock && sock->ops == &l2cap_sock_ops;
+}
+EXPORT_SYMBOL(l2cap_is_socket);
+
static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
{
struct sock *sk = sock->sk;
@@ -1292,7 +1298,7 @@ int __init l2cap_init_sockets(void)
goto error;
}
- err = bt_procfs_init(THIS_MODULE, &init_net, "l2cap", &l2cap_sk_list,
+ err = bt_procfs_init(&init_net, "l2cap", &l2cap_sk_list,
NULL);
if (err < 0) {
BT_ERR("Failed to create L2CAP proc file");
@@ -1312,8 +1318,6 @@ error:
void l2cap_cleanup_sockets(void)
{
bt_procfs_cleanup(&init_net, "l2cap");
- if (bt_sock_unregister(BTPROTO_L2CAP) < 0)
- BT_ERR("L2CAP socket unregistration failed");
-
+ bt_sock_unregister(BTPROTO_L2CAP);
proto_unregister(&l2cap_proto);
}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 39395c7144aa..f8ecbc70293d 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -106,11 +106,10 @@ static const u16 mgmt_events[] = {
* These LE scan and inquiry parameters were chosen according to LE General
* Discovery Procedure specification.
*/
-#define LE_SCAN_TYPE 0x01
#define LE_SCAN_WIN 0x12
#define LE_SCAN_INT 0x12
-#define LE_SCAN_TIMEOUT_LE_ONLY 10240 /* TGAP(gen_disc_scan_min) */
-#define LE_SCAN_TIMEOUT_BREDR_LE 5120 /* TGAP(100)/2 */
+#define LE_SCAN_TIMEOUT_LE_ONLY msecs_to_jiffies(10240)
+#define LE_SCAN_TIMEOUT_BREDR_LE msecs_to_jiffies(5120)
#define INQUIRY_LEN_BREDR 0x08 /* TGAP(100) */
#define INQUIRY_LEN_BREDR_LE 0x04 /* TGAP(100)/2 */
@@ -384,7 +383,8 @@ static u32 get_supported_settings(struct hci_dev *hdev)
if (lmp_bredr_capable(hdev)) {
settings |= MGMT_SETTING_CONNECTABLE;
- settings |= MGMT_SETTING_FAST_CONNECTABLE;
+ if (hdev->hci_ver >= BLUETOOTH_VER_1_2)
+ settings |= MGMT_SETTING_FAST_CONNECTABLE;
settings |= MGMT_SETTING_DISCOVERABLE;
settings |= MGMT_SETTING_BREDR;
settings |= MGMT_SETTING_LINK_SECURITY;
@@ -409,6 +409,9 @@ static u32 get_current_settings(struct hci_dev *hdev)
if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags))
settings |= MGMT_SETTING_CONNECTABLE;
+ if (test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags))
+ settings |= MGMT_SETTING_FAST_CONNECTABLE;
+
if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags))
settings |= MGMT_SETTING_DISCOVERABLE;
@@ -591,32 +594,33 @@ static void create_eir(struct hci_dev *hdev, u8 *data)
ptr = create_uuid128_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data));
}
-static int update_eir(struct hci_dev *hdev)
+static void update_eir(struct hci_request *req)
{
+ struct hci_dev *hdev = req->hdev;
struct hci_cp_write_eir cp;
if (!hdev_is_powered(hdev))
- return 0;
+ return;
if (!lmp_ext_inq_capable(hdev))
- return 0;
+ return;
if (!test_bit(HCI_SSP_ENABLED, &hdev->dev_flags))
- return 0;
+ return;
if (test_bit(HCI_SERVICE_CACHE, &hdev->dev_flags))
- return 0;
+ return;
memset(&cp, 0, sizeof(cp));
create_eir(hdev, cp.data);
if (memcmp(cp.data, hdev->eir, sizeof(cp.data)) == 0)
- return 0;
+ return;
memcpy(hdev->eir, cp.data, sizeof(cp.data));
- return hci_send_cmd(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
+ hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
}
static u8 get_service_classes(struct hci_dev *hdev)
@@ -630,47 +634,48 @@ static u8 get_service_classes(struct hci_dev *hdev)
return val;
}
-static int update_class(struct hci_dev *hdev)
+static void update_class(struct hci_request *req)
{
+ struct hci_dev *hdev = req->hdev;
u8 cod[3];
- int err;
BT_DBG("%s", hdev->name);
if (!hdev_is_powered(hdev))
- return 0;
+ return;
if (test_bit(HCI_SERVICE_CACHE, &hdev->dev_flags))
- return 0;
+ return;
cod[0] = hdev->minor_class;
cod[1] = hdev->major_class;
cod[2] = get_service_classes(hdev);
if (memcmp(cod, hdev->dev_class, 3) == 0)
- return 0;
-
- err = hci_send_cmd(hdev, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod);
- if (err == 0)
- set_bit(HCI_PENDING_CLASS, &hdev->dev_flags);
+ return;
- return err;
+ hci_req_add(req, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod);
}
static void service_cache_off(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev,
service_cache.work);
+ struct hci_request req;
if (!test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags))
return;
+ hci_req_init(&req, hdev);
+
hci_dev_lock(hdev);
- update_eir(hdev);
- update_class(hdev);
+ update_eir(&req);
+ update_class(&req);
hci_dev_unlock(hdev);
+
+ hci_req_run(&req, NULL);
}
static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev)
@@ -994,11 +999,64 @@ failed:
return err;
}
+static void write_fast_connectable(struct hci_request *req, bool enable)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct hci_cp_write_page_scan_activity acp;
+ u8 type;
+
+ if (hdev->hci_ver < BLUETOOTH_VER_1_2)
+ return;
+
+ if (enable) {
+ type = PAGE_SCAN_TYPE_INTERLACED;
+
+ /* 160 msec page scan interval */
+ acp.interval = __constant_cpu_to_le16(0x0100);
+ } else {
+ type = PAGE_SCAN_TYPE_STANDARD; /* default */
+
+ /* default 1.28 sec page scan */
+ acp.interval = __constant_cpu_to_le16(0x0800);
+ }
+
+ acp.window = __constant_cpu_to_le16(0x0012);
+
+ if (__cpu_to_le16(hdev->page_scan_interval) != acp.interval ||
+ __cpu_to_le16(hdev->page_scan_window) != acp.window)
+ hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY,
+ sizeof(acp), &acp);
+
+ if (hdev->page_scan_type != type)
+ hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type);
+}
+
+static void set_connectable_complete(struct hci_dev *hdev, u8 status)
+{
+ struct pending_cmd *cmd;
+
+ BT_DBG("status 0x%02x", status);
+
+ hci_dev_lock(hdev);
+
+ cmd = mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev);
+ if (!cmd)
+ goto unlock;
+
+ send_settings_rsp(cmd->sk, MGMT_OP_SET_CONNECTABLE, hdev);
+
+ mgmt_pending_remove(cmd);
+
+unlock:
+ hci_dev_unlock(hdev);
+}
+
static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
u16 len)
{
struct mgmt_mode *cp = data;
struct pending_cmd *cmd;
+ struct hci_request req;
u8 scan;
int err;
@@ -1065,7 +1123,20 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
cancel_delayed_work(&hdev->discov_off);
}
- err = hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+ hci_req_init(&req, hdev);
+
+ hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+
+ /* If we're going from non-connectable to connectable or
+ * vice-versa when fast connectable is enabled ensure that fast
+ * connectable gets disabled. write_fast_connectable won't do
+ * anything if the page scan parameters are already what they
+ * should be.
+ */
+ if (cp->val || test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags))
+ write_fast_connectable(&req, false);
+
+ err = hci_req_run(&req, set_connectable_complete);
if (err < 0)
mgmt_pending_remove(cmd);
@@ -1280,6 +1351,11 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
return cmd_status(sk, hdev->id, MGMT_OP_SET_LE,
MGMT_STATUS_INVALID_PARAMS);
+ /* LE-only devices do not allow toggling LE on/off */
+ if (!lmp_bredr_capable(hdev))
+ return cmd_status(sk, hdev->id, MGMT_OP_SET_LE,
+ MGMT_STATUS_REJECTED);
+
hci_dev_lock(hdev);
val = !!cp->val;
@@ -1332,6 +1408,29 @@ unlock:
return err;
}
+/* This is a helper function to test for pending mgmt commands that can
+ * cause CoD or EIR HCI commands. We can only allow one such pending
+ * mgmt command at a time since otherwise we cannot easily track what
+ * the current values are, will be, and based on that calculate if a new
+ * HCI command needs to be sent and if yes with what value.
+ */
+static bool pending_eir_or_class(struct hci_dev *hdev)
+{
+ struct pending_cmd *cmd;
+
+ list_for_each_entry(cmd, &hdev->mgmt_pending, list) {
+ switch (cmd->opcode) {
+ case MGMT_OP_ADD_UUID:
+ case MGMT_OP_REMOVE_UUID:
+ case MGMT_OP_SET_DEV_CLASS:
+ case MGMT_OP_SET_POWERED:
+ return true;
+ }
+ }
+
+ return false;
+}
+
static const u8 bluetooth_base_uuid[] = {
0xfb, 0x34, 0x9b, 0x5f, 0x80, 0x00, 0x00, 0x80,
0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -1351,10 +1450,37 @@ static u8 get_uuid_size(const u8 *uuid)
return 16;
}
+static void mgmt_class_complete(struct hci_dev *hdev, u16 mgmt_op, u8 status)
+{
+ struct pending_cmd *cmd;
+
+ hci_dev_lock(hdev);
+
+ cmd = mgmt_pending_find(mgmt_op, hdev);
+ if (!cmd)
+ goto unlock;
+
+ cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(status),
+ hdev->dev_class, 3);
+
+ mgmt_pending_remove(cmd);
+
+unlock:
+ hci_dev_unlock(hdev);
+}
+
+static void add_uuid_complete(struct hci_dev *hdev, u8 status)
+{
+ BT_DBG("status 0x%02x", status);
+
+ mgmt_class_complete(hdev, MGMT_OP_ADD_UUID, status);
+}
+
static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
{
struct mgmt_cp_add_uuid *cp = data;
struct pending_cmd *cmd;
+ struct hci_request req;
struct bt_uuid *uuid;
int err;
@@ -1362,7 +1488,7 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
hci_dev_lock(hdev);
- if (test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) {
+ if (pending_eir_or_class(hdev)) {
err = cmd_status(sk, hdev->id, MGMT_OP_ADD_UUID,
MGMT_STATUS_BUSY);
goto failed;
@@ -1380,23 +1506,28 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
list_add_tail(&uuid->list, &hdev->uuids);
- err = update_class(hdev);
- if (err < 0)
- goto failed;
+ hci_req_init(&req, hdev);
- err = update_eir(hdev);
- if (err < 0)
- goto failed;
+ update_class(&req);
+ update_eir(&req);
+
+ err = hci_req_run(&req, add_uuid_complete);
+ if (err < 0) {
+ if (err != -ENODATA)
+ goto failed;
- if (!test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) {
err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_UUID, 0,
hdev->dev_class, 3);
goto failed;
}
cmd = mgmt_pending_add(sk, MGMT_OP_ADD_UUID, hdev, data, len);
- if (!cmd)
+ if (!cmd) {
err = -ENOMEM;
+ goto failed;
+ }
+
+ err = 0;
failed:
hci_dev_unlock(hdev);
@@ -1417,6 +1548,13 @@ static bool enable_service_cache(struct hci_dev *hdev)
return false;
}
+static void remove_uuid_complete(struct hci_dev *hdev, u8 status)
+{
+ BT_DBG("status 0x%02x", status);
+
+ mgmt_class_complete(hdev, MGMT_OP_REMOVE_UUID, status);
+}
+
static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data,
u16 len)
{
@@ -1424,13 +1562,14 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data,
struct pending_cmd *cmd;
struct bt_uuid *match, *tmp;
u8 bt_uuid_any[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ struct hci_request req;
int err, found;
BT_DBG("request for %s", hdev->name);
hci_dev_lock(hdev);
- if (test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) {
+ if (pending_eir_or_class(hdev)) {
err = cmd_status(sk, hdev->id, MGMT_OP_REMOVE_UUID,
MGMT_STATUS_BUSY);
goto unlock;
@@ -1466,34 +1605,47 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data,
}
update_class:
- err = update_class(hdev);
- if (err < 0)
- goto unlock;
+ hci_req_init(&req, hdev);
- err = update_eir(hdev);
- if (err < 0)
- goto unlock;
+ update_class(&req);
+ update_eir(&req);
+
+ err = hci_req_run(&req, remove_uuid_complete);
+ if (err < 0) {
+ if (err != -ENODATA)
+ goto unlock;
- if (!test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) {
err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_UUID, 0,
hdev->dev_class, 3);
goto unlock;
}
cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_UUID, hdev, data, len);
- if (!cmd)
+ if (!cmd) {
err = -ENOMEM;
+ goto unlock;
+ }
+
+ err = 0;
unlock:
hci_dev_unlock(hdev);
return err;
}
+static void set_class_complete(struct hci_dev *hdev, u8 status)
+{
+ BT_DBG("status 0x%02x", status);
+
+ mgmt_class_complete(hdev, MGMT_OP_SET_DEV_CLASS, status);
+}
+
static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data,
u16 len)
{
struct mgmt_cp_set_dev_class *cp = data;
struct pending_cmd *cmd;
+ struct hci_request req;
int err;
BT_DBG("request for %s", hdev->name);
@@ -1502,15 +1654,19 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data,
return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS,
MGMT_STATUS_NOT_SUPPORTED);
- if (test_bit(HCI_PENDING_CLASS, &hdev->dev_flags))
- return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS,
- MGMT_STATUS_BUSY);
+ hci_dev_lock(hdev);
- if ((cp->minor & 0x03) != 0 || (cp->major & 0xe0) != 0)
- return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS,
- MGMT_STATUS_INVALID_PARAMS);
+ if (pending_eir_or_class(hdev)) {
+ err = cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS,
+ MGMT_STATUS_BUSY);
+ goto unlock;
+ }
- hci_dev_lock(hdev);
+ if ((cp->minor & 0x03) != 0 || (cp->major & 0xe0) != 0) {
+ err = cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS,
+ MGMT_STATUS_INVALID_PARAMS);
+ goto unlock;
+ }
hdev->major_class = cp->major;
hdev->minor_class = cp->minor;
@@ -1521,26 +1677,34 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data,
goto unlock;
}
+ hci_req_init(&req, hdev);
+
if (test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) {
hci_dev_unlock(hdev);
cancel_delayed_work_sync(&hdev->service_cache);
hci_dev_lock(hdev);
- update_eir(hdev);
+ update_eir(&req);
}
- err = update_class(hdev);
- if (err < 0)
- goto unlock;
+ update_class(&req);
+
+ err = hci_req_run(&req, set_class_complete);
+ if (err < 0) {
+ if (err != -ENODATA)
+ goto unlock;
- if (!test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) {
err = cmd_complete(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, 0,
hdev->dev_class, 3);
goto unlock;
}
cmd = mgmt_pending_add(sk, MGMT_OP_SET_DEV_CLASS, hdev, data, len);
- if (!cmd)
+ if (!cmd) {
err = -ENOMEM;
+ goto unlock;
+ }
+
+ err = 0;
unlock:
hci_dev_unlock(hdev);
@@ -1971,7 +2135,7 @@ static void pairing_complete(struct pending_cmd *cmd, u8 status)
conn->security_cfm_cb = NULL;
conn->disconn_cfm_cb = NULL;
- hci_conn_put(conn);
+ hci_conn_drop(conn);
mgmt_pending_remove(cmd);
}
@@ -2062,7 +2226,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
}
if (conn->connect_cfm_cb) {
- hci_conn_put(conn);
+ hci_conn_drop(conn);
err = cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE,
MGMT_STATUS_BUSY, &rp, sizeof(rp));
goto unlock;
@@ -2071,7 +2235,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
cmd = mgmt_pending_add(sk, MGMT_OP_PAIR_DEVICE, hdev, data, len);
if (!cmd) {
err = -ENOMEM;
- hci_conn_put(conn);
+ hci_conn_drop(conn);
goto unlock;
}
@@ -2140,7 +2304,7 @@ unlock:
}
static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev,
- bdaddr_t *bdaddr, u8 type, u16 mgmt_op,
+ struct mgmt_addr_info *addr, u16 mgmt_op,
u16 hci_op, __le32 passkey)
{
struct pending_cmd *cmd;
@@ -2150,37 +2314,41 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev,
hci_dev_lock(hdev);
if (!hdev_is_powered(hdev)) {
- err = cmd_status(sk, hdev->id, mgmt_op,
- MGMT_STATUS_NOT_POWERED);
+ err = cmd_complete(sk, hdev->id, mgmt_op,
+ MGMT_STATUS_NOT_POWERED, addr,
+ sizeof(*addr));
goto done;
}
- if (type == BDADDR_BREDR)
- conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, bdaddr);
+ if (addr->type == BDADDR_BREDR)
+ conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &addr->bdaddr);
else
- conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, bdaddr);
+ conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &addr->bdaddr);
if (!conn) {
- err = cmd_status(sk, hdev->id, mgmt_op,
- MGMT_STATUS_NOT_CONNECTED);
+ err = cmd_complete(sk, hdev->id, mgmt_op,
+ MGMT_STATUS_NOT_CONNECTED, addr,
+ sizeof(*addr));
goto done;
}
- if (type == BDADDR_LE_PUBLIC || type == BDADDR_LE_RANDOM) {
+ if (addr->type == BDADDR_LE_PUBLIC || addr->type == BDADDR_LE_RANDOM) {
/* Continue with pairing via SMP */
err = smp_user_confirm_reply(conn, mgmt_op, passkey);
if (!err)
- err = cmd_status(sk, hdev->id, mgmt_op,
- MGMT_STATUS_SUCCESS);
+ err = cmd_complete(sk, hdev->id, mgmt_op,
+ MGMT_STATUS_SUCCESS, addr,
+ sizeof(*addr));
else
- err = cmd_status(sk, hdev->id, mgmt_op,
- MGMT_STATUS_FAILED);
+ err = cmd_complete(sk, hdev->id, mgmt_op,
+ MGMT_STATUS_FAILED, addr,
+ sizeof(*addr));
goto done;
}
- cmd = mgmt_pending_add(sk, mgmt_op, hdev, bdaddr, sizeof(*bdaddr));
+ cmd = mgmt_pending_add(sk, mgmt_op, hdev, addr, sizeof(*addr));
if (!cmd) {
err = -ENOMEM;
goto done;
@@ -2190,11 +2358,12 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev,
if (hci_op == HCI_OP_USER_PASSKEY_REPLY) {
struct hci_cp_user_passkey_reply cp;
- bacpy(&cp.bdaddr, bdaddr);
+ bacpy(&cp.bdaddr, &addr->bdaddr);
cp.passkey = passkey;
err = hci_send_cmd(hdev, hci_op, sizeof(cp), &cp);
} else
- err = hci_send_cmd(hdev, hci_op, sizeof(*bdaddr), bdaddr);
+ err = hci_send_cmd(hdev, hci_op, sizeof(addr->bdaddr),
+ &addr->bdaddr);
if (err < 0)
mgmt_pending_remove(cmd);
@@ -2211,7 +2380,7 @@ static int pin_code_neg_reply(struct sock *sk, struct hci_dev *hdev,
BT_DBG("");
- return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type,
+ return user_pairing_resp(sk, hdev, &cp->addr,
MGMT_OP_PIN_CODE_NEG_REPLY,
HCI_OP_PIN_CODE_NEG_REPLY, 0);
}
@@ -2227,7 +2396,7 @@ static int user_confirm_reply(struct sock *sk, struct hci_dev *hdev, void *data,
return cmd_status(sk, hdev->id, MGMT_OP_USER_CONFIRM_REPLY,
MGMT_STATUS_INVALID_PARAMS);
- return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type,
+ return user_pairing_resp(sk, hdev, &cp->addr,
MGMT_OP_USER_CONFIRM_REPLY,
HCI_OP_USER_CONFIRM_REPLY, 0);
}
@@ -2239,7 +2408,7 @@ static int user_confirm_neg_reply(struct sock *sk, struct hci_dev *hdev,
BT_DBG("");
- return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type,
+ return user_pairing_resp(sk, hdev, &cp->addr,
MGMT_OP_USER_CONFIRM_NEG_REPLY,
HCI_OP_USER_CONFIRM_NEG_REPLY, 0);
}
@@ -2251,7 +2420,7 @@ static int user_passkey_reply(struct sock *sk, struct hci_dev *hdev, void *data,
BT_DBG("");
- return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type,
+ return user_pairing_resp(sk, hdev, &cp->addr,
MGMT_OP_USER_PASSKEY_REPLY,
HCI_OP_USER_PASSKEY_REPLY, cp->passkey);
}
@@ -2263,18 +2432,47 @@ static int user_passkey_neg_reply(struct sock *sk, struct hci_dev *hdev,
BT_DBG("");
- return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type,
+ return user_pairing_resp(sk, hdev, &cp->addr,
MGMT_OP_USER_PASSKEY_NEG_REPLY,
HCI_OP_USER_PASSKEY_NEG_REPLY, 0);
}
-static int update_name(struct hci_dev *hdev, const char *name)
+static void update_name(struct hci_request *req)
{
+ struct hci_dev *hdev = req->hdev;
struct hci_cp_write_local_name cp;
- memcpy(cp.name, name, sizeof(cp.name));
+ memcpy(cp.name, hdev->dev_name, sizeof(cp.name));
+
+ hci_req_add(req, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp);
+}
+
+static void set_name_complete(struct hci_dev *hdev, u8 status)
+{
+ struct mgmt_cp_set_local_name *cp;
+ struct pending_cmd *cmd;
+
+ BT_DBG("status 0x%02x", status);
+
+ hci_dev_lock(hdev);
+
+ cmd = mgmt_pending_find(MGMT_OP_SET_LOCAL_NAME, hdev);
+ if (!cmd)
+ goto unlock;
+
+ cp = cmd->param;
- return hci_send_cmd(hdev, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp);
+ if (status)
+ cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME,
+ mgmt_status(status));
+ else
+ cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0,
+ cp, sizeof(*cp));
+
+ mgmt_pending_remove(cmd);
+
+unlock:
+ hci_dev_unlock(hdev);
}
static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data,
@@ -2282,12 +2480,24 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data,
{
struct mgmt_cp_set_local_name *cp = data;
struct pending_cmd *cmd;
+ struct hci_request req;
int err;
BT_DBG("");
hci_dev_lock(hdev);
+ /* If the old values are the same as the new ones just return a
+ * direct command complete event.
+ */
+ if (!memcmp(hdev->dev_name, cp->name, sizeof(hdev->dev_name)) &&
+ !memcmp(hdev->short_name, cp->short_name,
+ sizeof(hdev->short_name))) {
+ err = cmd_complete(sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0,
+ data, len);
+ goto failed;
+ }
+
memcpy(hdev->short_name, cp->short_name, sizeof(hdev->short_name));
if (!hdev_is_powered(hdev)) {
@@ -2310,7 +2520,19 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data,
goto failed;
}
- err = update_name(hdev, cp->name);
+ memcpy(hdev->dev_name, cp->name, sizeof(hdev->dev_name));
+
+ hci_req_init(&req, hdev);
+
+ if (lmp_bredr_capable(hdev)) {
+ update_name(&req);
+ update_eir(&req);
+ }
+
+ if (lmp_le_capable(hdev))
+ hci_update_ad(&req);
+
+ err = hci_req_run(&req, set_name_complete);
if (err < 0)
mgmt_pending_remove(cmd);
@@ -2478,14 +2700,14 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev,
break;
case DISCOV_TYPE_LE:
- if (!lmp_host_le_capable(hdev)) {
+ if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY,
MGMT_STATUS_NOT_SUPPORTED);
mgmt_pending_remove(cmd);
goto failed;
}
- err = hci_le_scan(hdev, LE_SCAN_TYPE, LE_SCAN_INT,
+ err = hci_le_scan(hdev, LE_SCAN_ACTIVE, LE_SCAN_INT,
LE_SCAN_WIN, LE_SCAN_TIMEOUT_LE_ONLY);
break;
@@ -2497,8 +2719,8 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev,
goto failed;
}
- err = hci_le_scan(hdev, LE_SCAN_TYPE, LE_SCAN_INT, LE_SCAN_WIN,
- LE_SCAN_TIMEOUT_BREDR_LE);
+ err = hci_le_scan(hdev, LE_SCAN_ACTIVE, LE_SCAN_INT,
+ LE_SCAN_WIN, LE_SCAN_TIMEOUT_BREDR_LE);
break;
default:
@@ -2698,6 +2920,7 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data,
u16 len)
{
struct mgmt_cp_set_device_id *cp = data;
+ struct hci_request req;
int err;
__u16 source;
@@ -2718,24 +2941,59 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data,
err = cmd_complete(sk, hdev->id, MGMT_OP_SET_DEVICE_ID, 0, NULL, 0);
- update_eir(hdev);
+ hci_req_init(&req, hdev);
+ update_eir(&req);
+ hci_req_run(&req, NULL);
hci_dev_unlock(hdev);
return err;
}
+static void fast_connectable_complete(struct hci_dev *hdev, u8 status)
+{
+ struct pending_cmd *cmd;
+
+ BT_DBG("status 0x%02x", status);
+
+ hci_dev_lock(hdev);
+
+ cmd = mgmt_pending_find(MGMT_OP_SET_FAST_CONNECTABLE, hdev);
+ if (!cmd)
+ goto unlock;
+
+ if (status) {
+ cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
+ mgmt_status(status));
+ } else {
+ struct mgmt_mode *cp = cmd->param;
+
+ if (cp->val)
+ set_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags);
+ else
+ clear_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags);
+
+ send_settings_rsp(cmd->sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev);
+ new_settings(hdev, cmd->sk);
+ }
+
+ mgmt_pending_remove(cmd);
+
+unlock:
+ hci_dev_unlock(hdev);
+}
+
static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev,
void *data, u16 len)
{
struct mgmt_mode *cp = data;
- struct hci_cp_write_page_scan_activity acp;
- u8 type;
+ struct pending_cmd *cmd;
+ struct hci_request req;
int err;
BT_DBG("%s", hdev->name);
- if (!lmp_bredr_capable(hdev))
+ if (!lmp_bredr_capable(hdev) || hdev->hci_ver < BLUETOOTH_VER_1_2)
return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
MGMT_STATUS_NOT_SUPPORTED);
@@ -2753,40 +3011,39 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev,
hci_dev_lock(hdev);
- if (cp->val) {
- type = PAGE_SCAN_TYPE_INTERLACED;
+ if (mgmt_pending_find(MGMT_OP_SET_FAST_CONNECTABLE, hdev)) {
+ err = cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
+ MGMT_STATUS_BUSY);
+ goto unlock;
+ }
- /* 160 msec page scan interval */
- acp.interval = __constant_cpu_to_le16(0x0100);
- } else {
- type = PAGE_SCAN_TYPE_STANDARD; /* default */
+ if (!!cp->val == test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags)) {
+ err = send_settings_rsp(sk, MGMT_OP_SET_FAST_CONNECTABLE,
+ hdev);
+ goto unlock;
+ }
- /* default 1.28 sec page scan */
- acp.interval = __constant_cpu_to_le16(0x0800);
+ cmd = mgmt_pending_add(sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev,
+ data, len);
+ if (!cmd) {
+ err = -ENOMEM;
+ goto unlock;
}
- /* default 11.25 msec page scan window */
- acp.window = __constant_cpu_to_le16(0x0012);
+ hci_req_init(&req, hdev);
- err = hci_send_cmd(hdev, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY, sizeof(acp),
- &acp);
- if (err < 0) {
- err = cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
- MGMT_STATUS_FAILED);
- goto done;
- }
+ write_fast_connectable(&req, cp->val);
- err = hci_send_cmd(hdev, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type);
+ err = hci_req_run(&req, fast_connectable_complete);
if (err < 0) {
err = cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
MGMT_STATUS_FAILED);
- goto done;
+ mgmt_pending_remove(cmd);
}
- err = cmd_complete(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, 0,
- NULL, 0);
-done:
+unlock:
hci_dev_unlock(hdev);
+
return err;
}
@@ -3043,79 +3300,116 @@ static void settings_rsp(struct pending_cmd *cmd, void *data)
mgmt_pending_free(cmd);
}
-static int set_bredr_scan(struct hci_dev *hdev)
+static void set_bredr_scan(struct hci_request *req)
{
+ struct hci_dev *hdev = req->hdev;
u8 scan = 0;
+ /* Ensure that fast connectable is disabled. This function will
+ * not do anything if the page scan parameters are already what
+ * they should be.
+ */
+ write_fast_connectable(req, false);
+
if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags))
scan |= SCAN_PAGE;
if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags))
scan |= SCAN_INQUIRY;
- if (!scan)
- return 0;
-
- return hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+ if (scan)
+ hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
}
-int mgmt_powered(struct hci_dev *hdev, u8 powered)
+static void powered_complete(struct hci_dev *hdev, u8 status)
{
struct cmd_lookup match = { NULL, hdev };
- int err;
- if (!test_bit(HCI_MGMT, &hdev->dev_flags))
- return 0;
+ BT_DBG("status 0x%02x", status);
+
+ hci_dev_lock(hdev);
mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
- if (powered) {
- u8 link_sec;
+ new_settings(hdev, match.sk);
- if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) &&
- !lmp_host_ssp_capable(hdev)) {
- u8 ssp = 1;
+ hci_dev_unlock(hdev);
- hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, 1, &ssp);
- }
+ if (match.sk)
+ sock_put(match.sk);
+}
- if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
- struct hci_cp_write_le_host_supported cp;
+static int powered_update_hci(struct hci_dev *hdev)
+{
+ struct hci_request req;
+ u8 link_sec;
- cp.le = 1;
- cp.simul = lmp_le_br_capable(hdev);
+ hci_req_init(&req, hdev);
- /* Check first if we already have the right
- * host state (host features set)
- */
- if (cp.le != lmp_host_le_capable(hdev) ||
- cp.simul != lmp_host_le_br_capable(hdev))
- hci_send_cmd(hdev,
- HCI_OP_WRITE_LE_HOST_SUPPORTED,
- sizeof(cp), &cp);
- }
+ if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) &&
+ !lmp_host_ssp_capable(hdev)) {
+ u8 ssp = 1;
- link_sec = test_bit(HCI_LINK_SECURITY, &hdev->dev_flags);
- if (link_sec != test_bit(HCI_AUTH, &hdev->flags))
- hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE,
- sizeof(link_sec), &link_sec);
+ hci_req_add(&req, HCI_OP_WRITE_SSP_MODE, 1, &ssp);
+ }
- if (lmp_bredr_capable(hdev)) {
- set_bredr_scan(hdev);
- update_class(hdev);
- update_name(hdev, hdev->dev_name);
- update_eir(hdev);
- }
- } else {
- u8 status = MGMT_STATUS_NOT_POWERED;
- u8 zero_cod[] = { 0, 0, 0 };
+ if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags) &&
+ lmp_bredr_capable(hdev)) {
+ struct hci_cp_write_le_host_supported cp;
+
+ cp.le = 1;
+ cp.simul = lmp_le_br_capable(hdev);
+
+ /* Check first if we already have the right
+ * host state (host features set)
+ */
+ if (cp.le != lmp_host_le_capable(hdev) ||
+ cp.simul != lmp_host_le_br_capable(hdev))
+ hci_req_add(&req, HCI_OP_WRITE_LE_HOST_SUPPORTED,
+ sizeof(cp), &cp);
+ }
+
+ link_sec = test_bit(HCI_LINK_SECURITY, &hdev->dev_flags);
+ if (link_sec != test_bit(HCI_AUTH, &hdev->flags))
+ hci_req_add(&req, HCI_OP_WRITE_AUTH_ENABLE,
+ sizeof(link_sec), &link_sec);
+
+ if (lmp_bredr_capable(hdev)) {
+ set_bredr_scan(&req);
+ update_class(&req);
+ update_name(&req);
+ update_eir(&req);
+ }
+
+ return hci_req_run(&req, powered_complete);
+}
+
+int mgmt_powered(struct hci_dev *hdev, u8 powered)
+{
+ struct cmd_lookup match = { NULL, hdev };
+ u8 status_not_powered = MGMT_STATUS_NOT_POWERED;
+ u8 zero_cod[] = { 0, 0, 0 };
+ int err;
- mgmt_pending_foreach(0, hdev, cmd_status_rsp, &status);
+ if (!test_bit(HCI_MGMT, &hdev->dev_flags))
+ return 0;
+
+ if (powered) {
+ if (powered_update_hci(hdev) == 0)
+ return 0;
- if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0)
- mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev,
- zero_cod, sizeof(zero_cod), NULL);
+ mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp,
+ &match);
+ goto new_settings;
}
+ mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
+ mgmt_pending_foreach(0, hdev, cmd_status_rsp, &status_not_powered);
+
+ if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0)
+ mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev,
+ zero_cod, sizeof(zero_cod), NULL);
+
+new_settings:
err = new_settings(hdev, match.sk);
if (match.sk)
@@ -3124,6 +3418,27 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered)
return err;
}
+int mgmt_set_powered_failed(struct hci_dev *hdev, int err)
+{
+ struct pending_cmd *cmd;
+ u8 status;
+
+ cmd = mgmt_pending_find(MGMT_OP_SET_POWERED, hdev);
+ if (!cmd)
+ return -ENOENT;
+
+ if (err == -ERFKILL)
+ status = MGMT_STATUS_RFKILLED;
+ else
+ status = MGMT_STATUS_FAILED;
+
+ err = cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_POWERED, status);
+
+ mgmt_pending_remove(cmd);
+
+ return err;
+}
+
int mgmt_discoverable(struct hci_dev *hdev, u8 discoverable)
{
struct cmd_lookup match = { NULL, hdev };
@@ -3152,7 +3467,7 @@ int mgmt_discoverable(struct hci_dev *hdev, u8 discoverable)
int mgmt_connectable(struct hci_dev *hdev, u8 connectable)
{
- struct cmd_lookup match = { NULL, hdev };
+ struct pending_cmd *cmd;
bool changed = false;
int err = 0;
@@ -3164,14 +3479,10 @@ int mgmt_connectable(struct hci_dev *hdev, u8 connectable)
changed = true;
}
- mgmt_pending_foreach(MGMT_OP_SET_CONNECTABLE, hdev, settings_rsp,
- &match);
+ cmd = mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev);
if (changed)
- err = new_settings(hdev, match.sk);
-
- if (match.sk)
- sock_put(match.sk);
+ err = new_settings(hdev, cmd ? cmd->sk : NULL);
return err;
}
@@ -3555,23 +3866,25 @@ int mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status)
return err;
}
-static int clear_eir(struct hci_dev *hdev)
+static void clear_eir(struct hci_request *req)
{
+ struct hci_dev *hdev = req->hdev;
struct hci_cp_write_eir cp;
if (!lmp_ext_inq_capable(hdev))
- return 0;
+ return;
memset(hdev->eir, 0, sizeof(hdev->eir));
memset(&cp, 0, sizeof(cp));
- return hci_send_cmd(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
+ hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
}
int mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status)
{
struct cmd_lookup match = { NULL, hdev };
+ struct hci_request req;
bool changed = false;
int err = 0;
@@ -3604,29 +3917,26 @@ int mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status)
if (match.sk)
sock_put(match.sk);
+ hci_req_init(&req, hdev);
+
if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags))
- update_eir(hdev);
+ update_eir(&req);
else
- clear_eir(hdev);
+ clear_eir(&req);
+
+ hci_req_run(&req, NULL);
return err;
}
-static void class_rsp(struct pending_cmd *cmd, void *data)
+static void sk_lookup(struct pending_cmd *cmd, void *data)
{
struct cmd_lookup *match = data;
- cmd_complete(cmd->sk, cmd->index, cmd->opcode, match->mgmt_status,
- match->hdev->dev_class, 3);
-
- list_del(&cmd->list);
-
if (match->sk == NULL) {
match->sk = cmd->sk;
sock_hold(match->sk);
}
-
- mgmt_pending_free(cmd);
}
int mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
@@ -3635,11 +3945,9 @@ int mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
struct cmd_lookup match = { NULL, hdev, mgmt_status(status) };
int err = 0;
- clear_bit(HCI_PENDING_CLASS, &hdev->dev_flags);
-
- mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, class_rsp, &match);
- mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, class_rsp, &match);
- mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, class_rsp, &match);
+ mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, sk_lookup, &match);
+ mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, sk_lookup, &match);
+ mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match);
if (!status)
err = mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class,
@@ -3653,55 +3961,29 @@ int mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
int mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status)
{
- struct pending_cmd *cmd;
struct mgmt_cp_set_local_name ev;
- bool changed = false;
- int err = 0;
+ struct pending_cmd *cmd;
- if (memcmp(name, hdev->dev_name, sizeof(hdev->dev_name)) != 0) {
- memcpy(hdev->dev_name, name, sizeof(hdev->dev_name));
- changed = true;
- }
+ if (status)
+ return 0;
memset(&ev, 0, sizeof(ev));
memcpy(ev.name, name, HCI_MAX_NAME_LENGTH);
memcpy(ev.short_name, hdev->short_name, HCI_MAX_SHORT_NAME_LENGTH);
cmd = mgmt_pending_find(MGMT_OP_SET_LOCAL_NAME, hdev);
- if (!cmd)
- goto send_event;
-
- /* Always assume that either the short or the complete name has
- * changed if there was a pending mgmt command */
- changed = true;
+ if (!cmd) {
+ memcpy(hdev->dev_name, name, sizeof(hdev->dev_name));
- if (status) {
- err = cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME,
- mgmt_status(status));
- goto failed;
+ /* If this is a HCI command related to powering on the
+ * HCI dev don't send any mgmt signals.
+ */
+ if (mgmt_pending_find(MGMT_OP_SET_POWERED, hdev))
+ return 0;
}
- err = cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, &ev,
- sizeof(ev));
- if (err < 0)
- goto failed;
-
-send_event:
- if (changed)
- err = mgmt_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev,
- sizeof(ev), cmd ? cmd->sk : NULL);
-
- /* EIR is taken care of separately when powering on the
- * adapter so only update them here if this is a name change
- * unrelated to power on.
- */
- if (!test_bit(HCI_INIT, &hdev->flags))
- update_eir(hdev);
-
-failed:
- if (cmd)
- mgmt_pending_remove(cmd);
- return err;
+ return mgmt_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev),
+ cmd ? cmd->sk : NULL);
}
int mgmt_read_local_oob_data_reply_complete(struct hci_dev *hdev, u8 *hash,
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index b23e2713fea8..ca957d34b0c8 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -69,7 +69,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src,
u8 sec_level,
int *err);
static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst);
-static void rfcomm_session_del(struct rfcomm_session *s);
+static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s);
/* ---- RFCOMM frame parsing macros ---- */
#define __get_dlci(b) ((b & 0xfc) >> 2)
@@ -108,12 +108,6 @@ static void rfcomm_schedule(void)
wake_up_process(rfcomm_thread);
}
-static void rfcomm_session_put(struct rfcomm_session *s)
-{
- if (atomic_dec_and_test(&s->refcnt))
- rfcomm_session_del(s);
-}
-
/* ---- RFCOMM FCS computation ---- */
/* reversed, 8-bit, poly=0x07 */
@@ -249,16 +243,14 @@ static void rfcomm_session_set_timer(struct rfcomm_session *s, long timeout)
{
BT_DBG("session %p state %ld timeout %ld", s, s->state, timeout);
- if (!mod_timer(&s->timer, jiffies + timeout))
- rfcomm_session_hold(s);
+ mod_timer(&s->timer, jiffies + timeout);
}
static void rfcomm_session_clear_timer(struct rfcomm_session *s)
{
BT_DBG("session %p state %ld", s, s->state);
- if (del_timer(&s->timer))
- rfcomm_session_put(s);
+ del_timer_sync(&s->timer);
}
/* ---- RFCOMM DLCs ---- */
@@ -336,8 +328,6 @@ static void rfcomm_dlc_link(struct rfcomm_session *s, struct rfcomm_dlc *d)
{
BT_DBG("dlc %p session %p", d, s);
- rfcomm_session_hold(s);
-
rfcomm_session_clear_timer(s);
rfcomm_dlc_hold(d);
list_add(&d->list, &s->dlcs);
@@ -356,8 +346,6 @@ static void rfcomm_dlc_unlink(struct rfcomm_dlc *d)
if (list_empty(&s->dlcs))
rfcomm_session_set_timer(s, RFCOMM_IDLE_TIMEOUT);
-
- rfcomm_session_put(s);
}
static struct rfcomm_dlc *rfcomm_dlc_get(struct rfcomm_session *s, u8 dlci)
@@ -493,12 +481,34 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
int rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
{
- int r;
+ int r = 0;
+ struct rfcomm_dlc *d_list;
+ struct rfcomm_session *s, *s_list;
+
+ BT_DBG("dlc %p state %ld dlci %d err %d", d, d->state, d->dlci, err);
rfcomm_lock();
- r = __rfcomm_dlc_close(d, err);
+ s = d->session;
+ if (!s)
+ goto no_session;
+
+ /* after waiting on the mutex check the session still exists
+ * then check the dlc still exists
+ */
+ list_for_each_entry(s_list, &session_list, list) {
+ if (s_list == s) {
+ list_for_each_entry(d_list, &s->dlcs, list) {
+ if (d_list == d) {
+ r = __rfcomm_dlc_close(d, err);
+ break;
+ }
+ }
+ break;
+ }
+ }
+no_session:
rfcomm_unlock();
return r;
}
@@ -609,7 +619,7 @@ static struct rfcomm_session *rfcomm_session_add(struct socket *sock, int state)
return s;
}
-static void rfcomm_session_del(struct rfcomm_session *s)
+static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s)
{
int state = s->state;
@@ -617,15 +627,14 @@ static void rfcomm_session_del(struct rfcomm_session *s)
list_del(&s->list);
- if (state == BT_CONNECTED)
- rfcomm_send_disc(s, 0);
-
rfcomm_session_clear_timer(s);
sock_release(s->sock);
kfree(s);
if (state != BT_LISTEN)
module_put(THIS_MODULE);
+
+ return NULL;
}
static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst)
@@ -644,17 +653,16 @@ static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst)
return NULL;
}
-static void rfcomm_session_close(struct rfcomm_session *s, int err)
+static struct rfcomm_session *rfcomm_session_close(struct rfcomm_session *s,
+ int err)
{
struct rfcomm_dlc *d;
struct list_head *p, *n;
- BT_DBG("session %p state %ld err %d", s, s->state, err);
-
- rfcomm_session_hold(s);
-
s->state = BT_CLOSED;
+ BT_DBG("session %p state %ld err %d", s, s->state, err);
+
/* Close all dlcs */
list_for_each_safe(p, n, &s->dlcs) {
d = list_entry(p, struct rfcomm_dlc, list);
@@ -663,7 +671,7 @@ static void rfcomm_session_close(struct rfcomm_session *s, int err)
}
rfcomm_session_clear_timer(s);
- rfcomm_session_put(s);
+ return rfcomm_session_del(s);
}
static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src,
@@ -715,8 +723,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src,
if (*err == 0 || *err == -EINPROGRESS)
return s;
- rfcomm_session_del(s);
- return NULL;
+ return rfcomm_session_del(s);
failed:
sock_release(sock);
@@ -1105,7 +1112,7 @@ static void rfcomm_make_uih(struct sk_buff *skb, u8 addr)
}
/* ---- RFCOMM frame reception ---- */
-static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)
+static struct rfcomm_session *rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)
{
BT_DBG("session %p state %ld dlci %d", s, s->state, dlci);
@@ -1114,7 +1121,7 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)
struct rfcomm_dlc *d = rfcomm_dlc_get(s, dlci);
if (!d) {
rfcomm_send_dm(s, dlci);
- return 0;
+ return s;
}
switch (d->state) {
@@ -1150,25 +1157,14 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)
break;
case BT_DISCONN:
- /* rfcomm_session_put is called later so don't do
- * anything here otherwise we will mess up the session
- * reference counter:
- *
- * (a) when we are the initiator dlc_unlink will drive
- * the reference counter to 0 (there is no initial put
- * after session_add)
- *
- * (b) when we are not the initiator rfcomm_rx_process
- * will explicitly call put to balance the initial hold
- * done after session add.
- */
+ s = rfcomm_session_close(s, ECONNRESET);
break;
}
}
- return 0;
+ return s;
}
-static int rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci)
+static struct rfcomm_session *rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci)
{
int err = 0;
@@ -1192,13 +1188,13 @@ static int rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci)
else
err = ECONNRESET;
- s->state = BT_CLOSED;
- rfcomm_session_close(s, err);
+ s = rfcomm_session_close(s, err);
}
- return 0;
+ return s;
}
-static int rfcomm_recv_disc(struct rfcomm_session *s, u8 dlci)
+static struct rfcomm_session *rfcomm_recv_disc(struct rfcomm_session *s,
+ u8 dlci)
{
int err = 0;
@@ -1227,11 +1223,9 @@ static int rfcomm_recv_disc(struct rfcomm_session *s, u8 dlci)
else
err = ECONNRESET;
- s->state = BT_CLOSED;
- rfcomm_session_close(s, err);
+ s = rfcomm_session_close(s, err);
}
-
- return 0;
+ return s;
}
void rfcomm_dlc_accept(struct rfcomm_dlc *d)
@@ -1652,11 +1646,18 @@ drop:
return 0;
}
-static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)
+static struct rfcomm_session *rfcomm_recv_frame(struct rfcomm_session *s,
+ struct sk_buff *skb)
{
struct rfcomm_hdr *hdr = (void *) skb->data;
u8 type, dlci, fcs;
+ if (!s) {
+ /* no session, so free socket data */
+ kfree_skb(skb);
+ return s;
+ }
+
dlci = __get_dlci(hdr->addr);
type = __get_type(hdr->ctrl);
@@ -1667,7 +1668,7 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)
if (__check_fcs(skb->data, type, fcs)) {
BT_ERR("bad checksum in packet");
kfree_skb(skb);
- return -EILSEQ;
+ return s;
}
if (__test_ea(hdr->len))
@@ -1683,22 +1684,23 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)
case RFCOMM_DISC:
if (__test_pf(hdr->ctrl))
- rfcomm_recv_disc(s, dlci);
+ s = rfcomm_recv_disc(s, dlci);
break;
case RFCOMM_UA:
if (__test_pf(hdr->ctrl))
- rfcomm_recv_ua(s, dlci);
+ s = rfcomm_recv_ua(s, dlci);
break;
case RFCOMM_DM:
- rfcomm_recv_dm(s, dlci);
+ s = rfcomm_recv_dm(s, dlci);
break;
case RFCOMM_UIH:
- if (dlci)
- return rfcomm_recv_data(s, dlci, __test_pf(hdr->ctrl), skb);
-
+ if (dlci) {
+ rfcomm_recv_data(s, dlci, __test_pf(hdr->ctrl), skb);
+ return s;
+ }
rfcomm_recv_mcc(s, skb);
break;
@@ -1707,7 +1709,7 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)
break;
}
kfree_skb(skb);
- return 0;
+ return s;
}
/* ---- Connection and data processing ---- */
@@ -1844,7 +1846,7 @@ static void rfcomm_process_dlcs(struct rfcomm_session *s)
}
}
-static void rfcomm_process_rx(struct rfcomm_session *s)
+static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s)
{
struct socket *sock = s->sock;
struct sock *sk = sock->sk;
@@ -1856,17 +1858,15 @@ static void rfcomm_process_rx(struct rfcomm_session *s)
while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
skb_orphan(skb);
if (!skb_linearize(skb))
- rfcomm_recv_frame(s, skb);
+ s = rfcomm_recv_frame(s, skb);
else
kfree_skb(skb);
}
- if (sk->sk_state == BT_CLOSED) {
- if (!s->initiator)
- rfcomm_session_put(s);
+ if (s && (sk->sk_state == BT_CLOSED))
+ s = rfcomm_session_close(s, sk->sk_err);
- rfcomm_session_close(s, sk->sk_err);
- }
+ return s;
}
static void rfcomm_accept_connection(struct rfcomm_session *s)
@@ -1891,8 +1891,6 @@ static void rfcomm_accept_connection(struct rfcomm_session *s)
s = rfcomm_session_add(nsock, BT_OPEN);
if (s) {
- rfcomm_session_hold(s);
-
/* We should adjust MTU on incoming sessions.
* L2CAP MTU minus UIH header and FCS. */
s->mtu = min(l2cap_pi(nsock->sk)->chan->omtu,
@@ -1903,7 +1901,7 @@ static void rfcomm_accept_connection(struct rfcomm_session *s)
sock_release(nsock);
}
-static void rfcomm_check_connection(struct rfcomm_session *s)
+static struct rfcomm_session *rfcomm_check_connection(struct rfcomm_session *s)
{
struct sock *sk = s->sock->sk;
@@ -1921,10 +1919,10 @@ static void rfcomm_check_connection(struct rfcomm_session *s)
break;
case BT_CLOSED:
- s->state = BT_CLOSED;
- rfcomm_session_close(s, sk->sk_err);
+ s = rfcomm_session_close(s, sk->sk_err);
break;
}
+ return s;
}
static void rfcomm_process_sessions(void)
@@ -1940,7 +1938,6 @@ static void rfcomm_process_sessions(void)
if (test_and_clear_bit(RFCOMM_TIMED_OUT, &s->flags)) {
s->state = BT_DISCONN;
rfcomm_send_disc(s, 0);
- rfcomm_session_put(s);
continue;
}
@@ -1949,21 +1946,18 @@ static void rfcomm_process_sessions(void)
continue;
}
- rfcomm_session_hold(s);
-
switch (s->state) {
case BT_BOUND:
- rfcomm_check_connection(s);
+ s = rfcomm_check_connection(s);
break;
default:
- rfcomm_process_rx(s);
+ s = rfcomm_process_rx(s);
break;
}
- rfcomm_process_dlcs(s);
-
- rfcomm_session_put(s);
+ if (s)
+ rfcomm_process_dlcs(s);
}
rfcomm_unlock();
@@ -2010,10 +2004,11 @@ static int rfcomm_add_listener(bdaddr_t *ba)
/* Add listening session */
s = rfcomm_session_add(sock, BT_LISTEN);
- if (!s)
+ if (!s) {
+ err = -ENOMEM;
goto failed;
+ }
- rfcomm_session_hold(s);
return 0;
failed:
sock_release(sock);
@@ -2071,8 +2066,6 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
if (!s)
return;
- rfcomm_session_hold(s);
-
list_for_each_safe(p, n, &s->dlcs) {
d = list_entry(p, struct rfcomm_dlc, list);
@@ -2104,8 +2097,6 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
set_bit(RFCOMM_AUTH_REJECT, &d->flags);
}
- rfcomm_session_put(s);
-
rfcomm_schedule();
}
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 7c9224bcce17..30b3721dc6d7 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -1037,7 +1037,7 @@ int __init rfcomm_init_sockets(void)
goto error;
}
- err = bt_procfs_init(THIS_MODULE, &init_net, "rfcomm", &rfcomm_sk_list, NULL);
+ err = bt_procfs_init(&init_net, "rfcomm", &rfcomm_sk_list, NULL);
if (err < 0) {
BT_ERR("Failed to create RFCOMM proc file");
bt_sock_unregister(BTPROTO_RFCOMM);
@@ -1066,8 +1066,7 @@ void __exit rfcomm_cleanup_sockets(void)
debugfs_remove(rfcomm_sock_debugfs);
- if (bt_sock_unregister(BTPROTO_RFCOMM) < 0)
- BT_ERR("RFCOMM socket layer unregistration failed");
+ bt_sock_unregister(BTPROTO_RFCOMM);
proto_unregister(&rfcomm_proto);
}
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index fb6192c9812e..e7bd4eea575c 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -83,7 +83,7 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon)
if (conn)
return conn;
- conn = kzalloc(sizeof(struct sco_conn), GFP_ATOMIC);
+ conn = kzalloc(sizeof(struct sco_conn), GFP_KERNEL);
if (!conn)
return NULL;
@@ -185,7 +185,7 @@ static int sco_connect(struct sock *sk)
conn = sco_conn_add(hcon);
if (!conn) {
- hci_conn_put(hcon);
+ hci_conn_drop(hcon);
err = -ENOMEM;
goto done;
}
@@ -353,7 +353,7 @@ static void __sco_sock_close(struct sock *sk)
if (sco_pi(sk)->conn->hcon) {
sk->sk_state = BT_DISCONN;
sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT);
- hci_conn_put(sco_pi(sk)->conn->hcon);
+ hci_conn_drop(sco_pi(sk)->conn->hcon);
sco_pi(sk)->conn->hcon = NULL;
} else
sco_chan_del(sk, ECONNRESET);
@@ -481,8 +481,7 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen
{
struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
struct sock *sk = sock->sk;
- int err = 0;
-
+ int err;
BT_DBG("sk %p", sk);
@@ -653,6 +652,42 @@ static int sco_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
return err;
}
+static void sco_conn_defer_accept(struct hci_conn *conn, int mask)
+{
+ struct hci_dev *hdev = conn->hdev;
+
+ BT_DBG("conn %p", conn);
+
+ conn->state = BT_CONFIG;
+
+ if (!lmp_esco_capable(hdev)) {
+ struct hci_cp_accept_conn_req cp;
+
+ bacpy(&cp.bdaddr, &conn->dst);
+
+ if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER))
+ cp.role = 0x00; /* Become master */
+ else
+ cp.role = 0x01; /* Remain slave */
+
+ hci_send_cmd(hdev, HCI_OP_ACCEPT_CONN_REQ, sizeof(cp), &cp);
+ } else {
+ struct hci_cp_accept_sync_conn_req cp;
+
+ bacpy(&cp.bdaddr, &conn->dst);
+ cp.pkt_type = cpu_to_le16(conn->pkt_type);
+
+ cp.tx_bandwidth = __constant_cpu_to_le32(0x00001f40);
+ cp.rx_bandwidth = __constant_cpu_to_le32(0x00001f40);
+ cp.max_latency = __constant_cpu_to_le16(0xffff);
+ cp.content_format = cpu_to_le16(hdev->voice_setting);
+ cp.retrans_effort = 0xff;
+
+ hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ,
+ sizeof(cp), &cp);
+ }
+}
+
static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t len, int flags)
{
@@ -663,7 +698,7 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
if (sk->sk_state == BT_CONNECT2 &&
test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
- hci_conn_accept(pi->conn->hcon, 0);
+ sco_conn_defer_accept(pi->conn->hcon, 0);
sk->sk_state = BT_CONFIG;
msg->msg_namelen = 0;
@@ -883,7 +918,7 @@ static void sco_chan_del(struct sock *sk, int err)
sco_conn_unlock(conn);
if (conn->hcon)
- hci_conn_put(conn->hcon);
+ hci_conn_drop(conn->hcon);
}
sk->sk_state = BT_CLOSED;
@@ -1084,7 +1119,7 @@ int __init sco_init(void)
goto error;
}
- err = bt_procfs_init(THIS_MODULE, &init_net, "sco", &sco_sk_list, NULL);
+ err = bt_procfs_init(&init_net, "sco", &sco_sk_list, NULL);
if (err < 0) {
BT_ERR("Failed to create SCO proc file");
bt_sock_unregister(BTPROTO_SCO);
@@ -1113,8 +1148,7 @@ void __exit sco_exit(void)
debugfs_remove(sco_debugfs);
- if (bt_sock_unregister(BTPROTO_SCO) < 0)
- BT_ERR("SCO socket unregistration failed");
+ bt_sock_unregister(BTPROTO_SCO);
proto_unregister(&sco_proto);
}
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 5abefb12891d..b5562abdd6e0 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -522,7 +522,7 @@ void smp_chan_destroy(struct l2cap_conn *conn)
kfree(smp);
conn->smp_chan = NULL;
conn->hcon->smp_conn = NULL;
- hci_conn_put(conn->hcon);
+ hci_conn_drop(conn->hcon);
}
int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey)
@@ -770,7 +770,7 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
BT_DBG("conn %p hcon %p level 0x%2.2x", conn, hcon, sec_level);
- if (!lmp_host_le_capable(hcon->hdev))
+ if (!test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags))
return 1;
if (sec_level == BT_SECURITY_LOW)
@@ -851,7 +851,7 @@ int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb)
__u8 reason;
int err = 0;
- if (!lmp_host_le_capable(conn->hcon->hdev)) {
+ if (!test_bit(HCI_LE_ENABLED, &conn->hcon->hdev->dev_flags)) {
err = -ENOTSUPP;
reason = SMP_PAIRING_NOTSUPP;
goto done;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 314c73ed418f..967312803e41 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -348,10 +348,10 @@ void br_dev_setup(struct net_device *dev)
dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | NETIF_F_LLTX |
- NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_TX;
+ NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_CTAG_TX;
dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
NETIF_F_GSO_MASK | NETIF_F_HW_CSUM |
- NETIF_F_HW_VLAN_TX;
+ NETIF_F_HW_VLAN_CTAG_TX;
br->dev = dev;
spin_lock_init(&br->lock);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index bab338e6270d..ebfa4443c69b 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -161,9 +161,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
if (!pv)
return;
- for (vid = find_next_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN, vid);
- vid < BR_VLAN_BITMAP_LEN;
- vid = find_next_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN, vid+1)) {
+ for_each_set_bit_from(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
f = __br_fdb_get(br, br->dev->dev_addr, vid);
if (f && f->is_local && !f->dst)
fdb_delete(br, f);
@@ -617,6 +615,7 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
struct net_bridge *br = source->br;
struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
struct net_bridge_fdb_entry *fdb;
+ bool modified = false;
fdb = fdb_find(head, addr, vid);
if (fdb == NULL) {
@@ -626,10 +625,16 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
fdb = fdb_create(head, source, addr, vid);
if (!fdb)
return -ENOMEM;
- fdb_notify(br, fdb, RTM_NEWNEIGH);
+
+ modified = true;
} else {
if (flags & NLM_F_EXCL)
return -EEXIST;
+
+ if (fdb->dst != source) {
+ fdb->dst = source;
+ modified = true;
+ }
}
if (fdb_to_nud(fdb) != state) {
@@ -641,7 +646,12 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
} else
fdb->is_local = fdb->is_static = 0;
- fdb->updated = fdb->used = jiffies;
+ modified = true;
+ }
+
+ fdb->used = jiffies;
+ if (modified) {
+ fdb->updated = jiffies;
fdb_notify(br, fdb, RTM_NEWNEIGH);
}
@@ -724,13 +734,10 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
* specify a VLAN. To be nice, add/update entry for every
* vlan on this port.
*/
- vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN);
- while (vid < BR_VLAN_BITMAP_LEN) {
+ for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
if (err)
goto out;
- vid = find_next_bit(pv->vlan_bitmap,
- BR_VLAN_BITMAP_LEN, vid+1);
}
}
@@ -815,11 +822,8 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
* vlan on this port.
*/
err = -ENOENT;
- vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN);
- while (vid < BR_VLAN_BITMAP_LEN) {
+ for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
err &= __br_fdb_delete(p, addr, vid);
- vid = find_next_bit(pv->vlan_bitmap,
- BR_VLAN_BITMAP_LEN, vid+1);
}
}
out:
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 459dab22b3f6..4cdba60926ff 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -149,7 +149,6 @@ static void del_nbp(struct net_bridge_port *p)
dev->priv_flags &= ~IFF_BRIDGE_PORT;
netdev_rx_handler_unregister(dev);
- synchronize_net();
netdev_upper_dev_unlink(dev, br->dev);
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index ee79f3f20383..19942e38fd2d 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -382,7 +382,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
return ret;
}
-static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct br_mdb_entry *entry;
@@ -458,7 +458,7 @@ unlock:
return err;
}
-static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net_device *dev;
struct br_mdb_entry *entry;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 923fbeaf7afd..d6448e35e027 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -465,8 +465,9 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
skb_set_transport_header(skb, skb->len);
mldq = (struct mld_msg *) icmp6_hdr(skb);
- interval = ipv6_addr_any(group) ? br->multicast_last_member_interval :
- br->multicast_query_response_interval;
+ interval = ipv6_addr_any(group) ?
+ br->multicast_query_response_interval :
+ br->multicast_last_member_interval;
mldq->mld_type = ICMPV6_MGM_QUERY;
mldq->mld_code = 0;
@@ -1369,7 +1370,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
return -EINVAL;
if (iph->protocol != IPPROTO_IGMP) {
- if ((iph->daddr & IGMP_LOCAL_GROUP_MASK) != IGMP_LOCAL_GROUP)
+ if (!ipv4_is_local_multicast(iph->daddr))
BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
return 0;
}
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index fe43bc7b063f..1ed75bfd8d1d 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -535,7 +535,8 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct
if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb))
return br;
- vlan = __vlan_find_dev_deep(br, vlan_tx_tag_get(skb) & VLAN_VID_MASK);
+ vlan = __vlan_find_dev_deep(br, skb->vlan_proto,
+ vlan_tx_tag_get(skb) & VLAN_VID_MASK);
return vlan ? vlan : br;
}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 299fc5f40a26..8e3abf564798 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -136,10 +136,7 @@ static int br_fill_ifinfo(struct sk_buff *skb,
goto nla_put_failure;
pvid = br_get_pvid(pv);
- for (vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN);
- vid < BR_VLAN_BITMAP_LEN;
- vid = find_next_bit(pv->vlan_bitmap,
- BR_VLAN_BITMAP_LEN, vid+1)) {
+ for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
vinfo.vid = vid;
vinfo.flags = 0;
if (vid == pvid)
@@ -355,17 +352,14 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
/* Change state and parameters on port. */
int br_setlink(struct net_device *dev, struct nlmsghdr *nlh)
{
- struct ifinfomsg *ifm;
struct nlattr *protinfo;
struct nlattr *afspec;
struct net_bridge_port *p;
struct nlattr *tb[IFLA_BRPORT_MAX + 1];
- int err;
-
- ifm = nlmsg_data(nlh);
+ int err = 0;
- protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO);
- afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC);
+ protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO);
+ afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
if (!protinfo && !afspec)
return 0;
@@ -373,7 +367,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh)
/* We want to accept dev as bridge itself if the AF_SPEC
* is set to see if someone is setting vlan info on the brigde
*/
- if (!p && ((dev->priv_flags & IFF_EBRIDGE) && !afspec))
+ if (!p && !afspec)
return -EINVAL;
if (p && protinfo) {
@@ -414,14 +408,11 @@ out:
/* Delete port information */
int br_dellink(struct net_device *dev, struct nlmsghdr *nlh)
{
- struct ifinfomsg *ifm;
struct nlattr *afspec;
struct net_bridge_port *p;
int err;
- ifm = nlmsg_data(nlh);
-
- afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC);
+ afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
if (!afspec)
return 0;
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index b01849a74310..1c0a50f13229 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -225,7 +225,14 @@ static void br_record_config_timeout_values(struct net_bridge *br,
/* called under bridge lock */
void br_transmit_tcn(struct net_bridge *br)
{
- br_send_tcn_bpdu(br_get_port(br, br->root_port));
+ struct net_bridge_port *p;
+
+ p = br_get_port(br, br->root_port);
+ if (p)
+ br_send_tcn_bpdu(p);
+ else
+ br_notice(br, "root port %u not found for topology notice\n",
+ br->root_port);
}
/* called under bridge lock */
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index c3530a81a33b..950663d4d330 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -107,7 +107,7 @@ static void br_tcn_timer_expired(unsigned long arg)
br_debug(br, "tcn timer expired\n");
spin_lock(&br->lock);
- if (br->dev->flags & IFF_UP) {
+ if (!br_is_root_bridge(br) && (br->dev->flags & IFF_UP)) {
br_transmit_tcn(br);
mod_timer(&br->tcn_timer,jiffies + br->bridge_hello_time);
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 93dde75923f0..bd58b45f5f90 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -34,6 +34,7 @@ static void __vlan_add_flags(struct net_port_vlans *v, u16 vid, u16 flags)
static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
{
+ const struct net_device_ops *ops;
struct net_bridge_port *p = NULL;
struct net_bridge *br;
struct net_device *dev;
@@ -53,15 +54,17 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
br = v->parent.br;
dev = br->dev;
}
+ ops = dev->netdev_ops;
- if (p && (dev->features & NETIF_F_HW_VLAN_FILTER)) {
+ if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) {
/* Add VLAN to the device filter if it is supported.
* Stricly speaking, this is not necessary now, since
* devices are made promiscuous by the bridge, but if
* that ever changes this code will allow tagged
* traffic to enter the bridge.
*/
- err = dev->netdev_ops->ndo_vlan_rx_add_vid(dev, vid);
+ err = ops->ndo_vlan_rx_add_vid(dev, htons(ETH_P_8021Q),
+ vid);
if (err)
return err;
}
@@ -82,8 +85,8 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
return 0;
out_filt:
- if (p && (dev->features & NETIF_F_HW_VLAN_FILTER))
- dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, vid);
+ if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+ ops->ndo_vlan_rx_kill_vid(dev, htons(ETH_P_8021Q), vid);
return err;
}
@@ -97,9 +100,10 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid)
if (v->port_idx && vid) {
struct net_device *dev = v->parent.port->dev;
+ const struct net_device_ops *ops = dev->netdev_ops;
- if (dev->features & NETIF_F_HW_VLAN_FILTER)
- dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, vid);
+ if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
+ ops->ndo_vlan_rx_kill_vid(dev, htons(ETH_P_8021Q), vid);
}
clear_bit(vid, v->vlan_bitmap);
@@ -171,7 +175,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
* mac header.
*/
skb_push(skb, ETH_HLEN);
- skb = __vlan_put_tag(skb, skb->vlan_tci);
+ skb = __vlan_put_tag(skb, skb->vlan_proto, skb->vlan_tci);
if (!skb)
goto out;
/* put skb->data back to where it was */
@@ -213,7 +217,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
/* PVID is set on this port. Any untagged ingress
* frame is considered to belong to this vlan.
*/
- __vlan_hwaccel_put_tag(skb, pvid);
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid);
return true;
}
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 92de5e5f9db2..19c37a4929bc 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -72,13 +72,17 @@ print_ports(const struct sk_buff *skb, uint8_t protocol, int offset)
}
static void
-ebt_log_packet(u_int8_t pf, unsigned int hooknum,
- const struct sk_buff *skb, const struct net_device *in,
- const struct net_device *out, const struct nf_loginfo *loginfo,
- const char *prefix)
+ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
+ const struct sk_buff *skb, const struct net_device *in,
+ const struct net_device *out, const struct nf_loginfo *loginfo,
+ const char *prefix)
{
unsigned int bitmask;
+ /* FIXME: Disabled from containers until syslog ns is supported */
+ if (!net_eq(net, &init_net))
+ return;
+
spin_lock_bh(&ebt_log_lock);
printk(KERN_SOH "%c%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x",
'0' + loginfo->u.log.level, prefix,
@@ -176,17 +180,18 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ebt_log_info *info = par->targinfo;
struct nf_loginfo li;
+ struct net *net = dev_net(par->in ? par->in : par->out);
li.type = NF_LOG_TYPE_LOG;
li.u.log.level = info->loglevel;
li.u.log.logflags = info->bitmask;
if (info->bitmask & EBT_LOG_NFLOG)
- nf_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in,
- par->out, &li, "%s", info->prefix);
+ nf_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb,
+ par->in, par->out, &li, "%s", info->prefix);
else
- ebt_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in,
- par->out, &li, info->prefix);
+ ebt_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb, par->in,
+ par->out, &li, info->prefix);
return EBT_CONTINUE;
}
@@ -206,19 +211,47 @@ static struct nf_logger ebt_log_logger __read_mostly = {
.me = THIS_MODULE,
};
+static int __net_init ebt_log_net_init(struct net *net)
+{
+ nf_log_set(net, NFPROTO_BRIDGE, &ebt_log_logger);
+ return 0;
+}
+
+static void __net_exit ebt_log_net_fini(struct net *net)
+{
+ nf_log_unset(net, &ebt_log_logger);
+}
+
+static struct pernet_operations ebt_log_net_ops = {
+ .init = ebt_log_net_init,
+ .exit = ebt_log_net_fini,
+};
+
static int __init ebt_log_init(void)
{
int ret;
+ ret = register_pernet_subsys(&ebt_log_net_ops);
+ if (ret < 0)
+ goto err_pernet;
+
ret = xt_register_target(&ebt_log_tg_reg);
if (ret < 0)
- return ret;
+ goto err_target;
+
nf_log_register(NFPROTO_BRIDGE, &ebt_log_logger);
- return 0;
+
+ return ret;
+
+err_target:
+ unregister_pernet_subsys(&ebt_log_net_ops);
+err_pernet:
+ return ret;
}
static void __exit ebt_log_fini(void)
{
+ unregister_pernet_subsys(&ebt_log_net_ops);
nf_log_unregister(&ebt_log_logger);
xt_unregister_target(&ebt_log_tg_reg);
}
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index 5be68bbcc341..59ac7952010d 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -24,14 +24,15 @@ ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ebt_nflog_info *info = par->targinfo;
struct nf_loginfo li;
+ struct net *net = dev_net(par->in ? par->in : par->out);
li.type = NF_LOG_TYPE_ULOG;
li.u.ulog.copy_len = info->len;
li.u.ulog.group = info->group;
li.u.ulog.qthreshold = info->threshold;
- nf_log_packet(PF_BRIDGE, par->hooknum, skb, par->in, par->out,
- &li, "%s", info->prefix);
+ nf_log_packet(net, PF_BRIDGE, par->hooknum, skb, par->in,
+ par->out, &li, "%s", info->prefix);
return EBT_CONTINUE;
}
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 3bf43f7bb9d4..df0364aa12d5 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -35,12 +35,13 @@
#include <linux/skbuff.h>
#include <linux/kernel.h>
#include <linux/timer.h>
-#include <linux/netlink.h>
+#include <net/netlink.h>
#include <linux/netdevice.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_bridge/ebtables.h>
#include <linux/netfilter_bridge/ebt_ulog.h>
#include <net/netfilter/nf_log.h>
+#include <net/netns/generic.h>
#include <net/sock.h>
#include "../br_private.h"
@@ -62,13 +63,22 @@ typedef struct {
spinlock_t lock; /* the per-queue lock */
} ebt_ulog_buff_t;
-static ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS];
-static struct sock *ebtulognl;
+static int ebt_ulog_net_id __read_mostly;
+struct ebt_ulog_net {
+ unsigned int nlgroup[EBT_ULOG_MAXNLGROUPS];
+ ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS];
+ struct sock *ebtulognl;
+};
+
+static struct ebt_ulog_net *ebt_ulog_pernet(struct net *net)
+{
+ return net_generic(net, ebt_ulog_net_id);
+}
/* send one ulog_buff_t to userspace */
-static void ulog_send(unsigned int nlgroup)
+static void ulog_send(struct ebt_ulog_net *ebt, unsigned int nlgroup)
{
- ebt_ulog_buff_t *ub = &ulog_buffers[nlgroup];
+ ebt_ulog_buff_t *ub = &ebt->ulog_buffers[nlgroup];
del_timer(&ub->timer);
@@ -80,7 +90,7 @@ static void ulog_send(unsigned int nlgroup)
ub->lastnlh->nlmsg_type = NLMSG_DONE;
NETLINK_CB(ub->skb).dst_group = nlgroup + 1;
- netlink_broadcast(ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC);
+ netlink_broadcast(ebt->ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC);
ub->qlen = 0;
ub->skb = NULL;
@@ -89,10 +99,15 @@ static void ulog_send(unsigned int nlgroup)
/* timer function to flush queue in flushtimeout time */
static void ulog_timer(unsigned long data)
{
- spin_lock_bh(&ulog_buffers[data].lock);
- if (ulog_buffers[data].skb)
- ulog_send(data);
- spin_unlock_bh(&ulog_buffers[data].lock);
+ struct ebt_ulog_net *ebt = container_of((void *)data,
+ struct ebt_ulog_net,
+ nlgroup[*(unsigned int *)data]);
+
+ ebt_ulog_buff_t *ub = &ebt->ulog_buffers[*(unsigned int *)data];
+ spin_lock_bh(&ub->lock);
+ if (ub->skb)
+ ulog_send(ebt, *(unsigned int *)data);
+ spin_unlock_bh(&ub->lock);
}
static struct sk_buff *ulog_alloc_skb(unsigned int size)
@@ -116,15 +131,19 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size)
return skb;
}
-static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- const struct ebt_ulog_info *uloginfo, const char *prefix)
+static void ebt_ulog_packet(struct net *net, unsigned int hooknr,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct ebt_ulog_info *uloginfo,
+ const char *prefix)
{
ebt_ulog_packet_msg_t *pm;
size_t size, copy_len;
struct nlmsghdr *nlh;
+ struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
unsigned int group = uloginfo->nlgroup;
- ebt_ulog_buff_t *ub = &ulog_buffers[group];
+ ebt_ulog_buff_t *ub = &ebt->ulog_buffers[group];
spinlock_t *lock = &ub->lock;
ktime_t kt;
@@ -134,7 +153,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
else
copy_len = uloginfo->cprange;
- size = NLMSG_SPACE(sizeof(*pm) + copy_len);
+ size = nlmsg_total_size(sizeof(*pm) + copy_len);
if (size > nlbufsiz) {
pr_debug("Size %Zd needed, but nlbufsiz=%d\n", size, nlbufsiz);
return;
@@ -146,7 +165,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
if (!(ub->skb = ulog_alloc_skb(size)))
goto unlock;
} else if (size > skb_tailroom(ub->skb)) {
- ulog_send(group);
+ ulog_send(ebt, group);
if (!(ub->skb = ulog_alloc_skb(size)))
goto unlock;
@@ -205,7 +224,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
ub->lastnlh = nlh;
if (ub->qlen >= uloginfo->qthreshold)
- ulog_send(group);
+ ulog_send(ebt, group);
else if (!timer_pending(&ub->timer)) {
ub->timer.expires = jiffies + flushtimeout * HZ / 100;
add_timer(&ub->timer);
@@ -216,7 +235,7 @@ unlock:
}
/* this function is registered with the netfilter core */
-static void ebt_log_packet(u_int8_t pf, unsigned int hooknum,
+static void ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
const struct sk_buff *skb, const struct net_device *in,
const struct net_device *out, const struct nf_loginfo *li,
const char *prefix)
@@ -235,13 +254,15 @@ static void ebt_log_packet(u_int8_t pf, unsigned int hooknum,
strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix));
}
- ebt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
+ ebt_ulog_packet(net, hooknum, skb, in, out, &loginfo, prefix);
}
static unsigned int
ebt_ulog_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
- ebt_ulog_packet(par->hooknum, skb, par->in, par->out,
+ struct net *net = dev_net(par->in ? par->in : par->out);
+
+ ebt_ulog_packet(net, par->hooknum, skb, par->in, par->out,
par->targinfo, NULL);
return EBT_CONTINUE;
}
@@ -277,56 +298,89 @@ static struct nf_logger ebt_ulog_logger __read_mostly = {
.me = THIS_MODULE,
};
-static int __init ebt_ulog_init(void)
+static int __net_init ebt_ulog_net_init(struct net *net)
{
- int ret;
int i;
+ struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
+
struct netlink_kernel_cfg cfg = {
.groups = EBT_ULOG_MAXNLGROUPS,
};
- if (nlbufsiz >= 128*1024) {
- pr_warning("Netlink buffer has to be <= 128kB,"
- " please try a smaller nlbufsiz parameter.\n");
- return -EINVAL;
- }
-
/* initialize ulog_buffers */
for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
- setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
- spin_lock_init(&ulog_buffers[i].lock);
+ ebt->nlgroup[i] = i;
+ setup_timer(&ebt->ulog_buffers[i].timer, ulog_timer,
+ (unsigned long)&ebt->nlgroup[i]);
+ spin_lock_init(&ebt->ulog_buffers[i].lock);
}
- ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg);
- if (!ebtulognl)
- ret = -ENOMEM;
- else if ((ret = xt_register_target(&ebt_ulog_tg_reg)) != 0)
- netlink_kernel_release(ebtulognl);
-
- if (ret == 0)
- nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger);
+ ebt->ebtulognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg);
+ if (!ebt->ebtulognl)
+ return -ENOMEM;
- return ret;
+ nf_log_set(net, NFPROTO_BRIDGE, &ebt_ulog_logger);
+ return 0;
}
-static void __exit ebt_ulog_fini(void)
+static void __net_exit ebt_ulog_net_fini(struct net *net)
{
- ebt_ulog_buff_t *ub;
int i;
+ struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
- nf_log_unregister(&ebt_ulog_logger);
- xt_unregister_target(&ebt_ulog_tg_reg);
+ nf_log_unset(net, &ebt_ulog_logger);
for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
- ub = &ulog_buffers[i];
+ ebt_ulog_buff_t *ub = &ebt->ulog_buffers[i];
del_timer(&ub->timer);
- spin_lock_bh(&ub->lock);
+
if (ub->skb) {
kfree_skb(ub->skb);
ub->skb = NULL;
}
- spin_unlock_bh(&ub->lock);
}
- netlink_kernel_release(ebtulognl);
+ netlink_kernel_release(ebt->ebtulognl);
+}
+
+static struct pernet_operations ebt_ulog_net_ops = {
+ .init = ebt_ulog_net_init,
+ .exit = ebt_ulog_net_fini,
+ .id = &ebt_ulog_net_id,
+ .size = sizeof(struct ebt_ulog_net),
+};
+
+static int __init ebt_ulog_init(void)
+{
+ int ret;
+
+ if (nlbufsiz >= 128*1024) {
+ pr_warn("Netlink buffer has to be <= 128kB,"
+ "please try a smaller nlbufsiz parameter.\n");
+ return -EINVAL;
+ }
+
+ ret = register_pernet_subsys(&ebt_ulog_net_ops);
+ if (ret)
+ goto out_pernet;
+
+ ret = xt_register_target(&ebt_ulog_tg_reg);
+ if (ret)
+ goto out_target;
+
+ nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger);
+
+ return 0;
+
+out_target:
+ unregister_pernet_subsys(&ebt_ulog_net_ops);
+out_pernet:
+ return ret;
+}
+
+static void __exit ebt_ulog_fini(void)
+{
+ nf_log_unregister(&ebt_ulog_logger);
+ xt_unregister_target(&ebt_ulog_tg_reg);
+ unregister_pernet_subsys(&ebt_ulog_net_ops);
}
module_init(ebt_ulog_init);
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 40d8258bf74f..70f656ce0f4a 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -64,9 +64,7 @@ static int ebt_broute(struct sk_buff *skb)
static int __net_init broute_net_init(struct net *net)
{
net->xt.broute_table = ebt_register_table(net, &broute_table);
- if (IS_ERR(net->xt.broute_table))
- return PTR_ERR(net->xt.broute_table);
- return 0;
+ return PTR_RET(net->xt.broute_table);
}
static void __net_exit broute_net_exit(struct net *net)
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 8d493c91a562..3d110c4fc787 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -138,7 +138,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
ethproto = h->h_proto;
if (e->bitmask & EBT_802_3) {
- if (FWINV2(ntohs(ethproto) >= 1536, EBT_IPROTO))
+ if (FWINV2(ntohs(ethproto) >= ETH_P_802_3_MIN, EBT_IPROTO))
return 1;
} else if (!(e->bitmask & EBT_NOPROTO) &&
FWINV2(e->ethproto != ethproto, EBT_IPROTO))
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 21760f008974..1f9ece1a9c34 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -1,7 +1,7 @@
/*
* CAIF Interface registration.
* Copyright (C) ST-Ericsson AB 2010
- * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author: Sjur Brendeland
* License terms: GNU General Public License (GPL) version 2
*
* Borrowed heavily from file: pn_dev.c. Thanks to Remi Denis-Courmont
@@ -301,10 +301,11 @@ static void dev_flowctrl(struct net_device *dev, int on)
}
void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
- struct cflayer *link_support, int head_room,
- struct cflayer **layer, int (**rcv_func)(
- struct sk_buff *, struct net_device *,
- struct packet_type *, struct net_device *))
+ struct cflayer *link_support, int head_room,
+ struct cflayer **layer,
+ int (**rcv_func)(struct sk_buff *, struct net_device *,
+ struct packet_type *,
+ struct net_device *))
{
struct caif_device_entry *caifd;
enum cfcnfg_phy_preference pref;
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index ff2ff3ce6965..05a41c7ec304 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) ST-Ericsson AB 2010
- * Author: Sjur Brendeland sjur.brandeland@stericsson.com
+ * Author: Sjur Brendeland
* License terms: GNU General Public License (GPL) version 2
*/
@@ -197,8 +197,8 @@ static void cfsk_put(struct cflayer *layr)
/* Packet Control Callback function called from CAIF */
static void caif_ctrl_cb(struct cflayer *layr,
- enum caif_ctrlcmd flow,
- int phyid)
+ enum caif_ctrlcmd flow,
+ int phyid)
{
struct caifsock *cf_sk = container_of(layr, struct caifsock, layer);
switch (flow) {
@@ -274,7 +274,7 @@ static void caif_check_flow_release(struct sock *sk)
* changed locking, address handling and added MSG_TRUNC.
*/
static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t len, int flags)
+ struct msghdr *m, size_t len, int flags)
{
struct sock *sk = sock->sk;
@@ -348,8 +348,8 @@ static long caif_stream_data_wait(struct sock *sk, long timeo)
* changed locking calls, changed address handling.
*/
static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t size,
- int flags)
+ struct msghdr *msg, size_t size,
+ int flags)
{
struct sock *sk = sock->sk;
int copied = 0;
@@ -464,7 +464,7 @@ out:
* CAIF flow-on and sock_writable.
*/
static long caif_wait_for_flow_on(struct caifsock *cf_sk,
- int wait_writeable, long timeo, int *err)
+ int wait_writeable, long timeo, int *err)
{
struct sock *sk = &cf_sk->sk;
DEFINE_WAIT(wait);
@@ -518,7 +518,7 @@ static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk,
/* Copied from af_unix:unix_dgram_sendmsg, and adapted to CAIF */
static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+ struct msghdr *msg, size_t len)
{
struct sock *sk = sock->sk;
struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
@@ -593,7 +593,7 @@ err:
* and other minor adaptations.
*/
static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+ struct msghdr *msg, size_t len)
{
struct sock *sk = sock->sk;
struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
@@ -672,7 +672,7 @@ out_err:
}
static int setsockopt(struct socket *sock,
- int lvl, int opt, char __user *ov, unsigned int ol)
+ int lvl, int opt, char __user *ov, unsigned int ol)
{
struct sock *sk = sock->sk;
struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
@@ -934,7 +934,7 @@ static int caif_release(struct socket *sock)
/* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */
static unsigned int caif_poll(struct file *file,
- struct socket *sock, poll_table *wait)
+ struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
unsigned int mask;
@@ -1024,7 +1024,7 @@ static void caif_sock_destructor(struct sock *sk)
}
static int caif_create(struct net *net, struct socket *sock, int protocol,
- int kern)
+ int kern)
{
struct sock *sk = NULL;
struct caifsock *cf_sk = NULL;
diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
index ef8ebaa993cf..942e00a425fd 100644
--- a/net/caif/caif_usb.c
+++ b/net/caif/caif_usb.c
@@ -1,7 +1,7 @@
/*
* CAIF USB handler
* Copyright (C) ST-Ericsson AB 2011
- * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author: Sjur Brendeland
* License terms: GNU General Public License (GPL) version 2
*
*/
@@ -75,7 +75,7 @@ static int cfusbl_transmit(struct cflayer *layr, struct cfpkt *pkt)
}
static void cfusbl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
- int phyid)
+ int phyid)
{
if (layr->up && layr->up->ctrlcmd)
layr->up->ctrlcmd(layr->up, ctrl, layr->id);
@@ -121,7 +121,7 @@ static struct packet_type caif_usb_type __read_mostly = {
};
static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
- void *arg)
+ void *arg)
{
struct net_device *dev = arg;
struct caif_dev_common common;
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index f1dbddb95a6c..fa39fc298708 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) ST-Ericsson AB 2010
- * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author: Sjur Brendeland
* License terms: GNU General Public License (GPL) version 2
*/
@@ -61,11 +61,11 @@ struct cfcnfg {
};
static void cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id,
- enum cfctrl_srv serv, u8 phyid,
- struct cflayer *adapt_layer);
+ enum cfctrl_srv serv, u8 phyid,
+ struct cflayer *adapt_layer);
static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id);
static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id,
- struct cflayer *adapt_layer);
+ struct cflayer *adapt_layer);
static void cfctrl_resp_func(void);
static void cfctrl_enum_resp(void);
@@ -131,7 +131,7 @@ static void cfctrl_resp_func(void)
}
static struct cfcnfg_phyinfo *cfcnfg_get_phyinfo_rcu(struct cfcnfg *cnfg,
- u8 phyid)
+ u8 phyid)
{
struct cfcnfg_phyinfo *phy;
@@ -216,8 +216,8 @@ static const int protohead[CFCTRL_SRV_MASK] = {
static int caif_connect_req_to_link_param(struct cfcnfg *cnfg,
- struct caif_connect_request *s,
- struct cfctrl_link_param *l)
+ struct caif_connect_request *s,
+ struct cfctrl_link_param *l)
{
struct dev_info *dev_info;
enum cfcnfg_phy_preference pref;
@@ -301,8 +301,7 @@ static int caif_connect_req_to_link_param(struct cfcnfg *cnfg,
int caif_connect_client(struct net *net, struct caif_connect_request *conn_req,
struct cflayer *adap_layer, int *ifindex,
- int *proto_head,
- int *proto_tail)
+ int *proto_head, int *proto_tail)
{
struct cflayer *frml;
struct cfcnfg_phyinfo *phy;
@@ -364,7 +363,7 @@ unlock:
EXPORT_SYMBOL(caif_connect_client);
static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id,
- struct cflayer *adapt_layer)
+ struct cflayer *adapt_layer)
{
if (adapt_layer != NULL && adapt_layer->ctrlcmd != NULL)
adapt_layer->ctrlcmd(adapt_layer,
@@ -526,7 +525,7 @@ out_err:
EXPORT_SYMBOL(cfcnfg_add_phy_layer);
int cfcnfg_set_phy_state(struct cfcnfg *cnfg, struct cflayer *phy_layer,
- bool up)
+ bool up)
{
struct cfcnfg_phyinfo *phyinfo;
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index a376ec1ac0a7..2bd4b58f4372 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) ST-Ericsson AB 2010
- * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author: Sjur Brendeland
* License terms: GNU General Public License (GPL) version 2
*/
@@ -20,12 +20,12 @@
#ifdef CAIF_NO_LOOP
static int handle_loop(struct cfctrl *ctrl,
- int cmd, struct cfpkt *pkt){
+ int cmd, struct cfpkt *pkt){
return -1;
}
#else
static int handle_loop(struct cfctrl *ctrl,
- int cmd, struct cfpkt *pkt);
+ int cmd, struct cfpkt *pkt);
#endif
static int cfctrl_recv(struct cflayer *layr, struct cfpkt *pkt);
static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
@@ -72,7 +72,7 @@ void cfctrl_remove(struct cflayer *layer)
}
static bool param_eq(const struct cfctrl_link_param *p1,
- const struct cfctrl_link_param *p2)
+ const struct cfctrl_link_param *p2)
{
bool eq =
p1->linktype == p2->linktype &&
@@ -197,8 +197,8 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid)
}
int cfctrl_linkup_request(struct cflayer *layer,
- struct cfctrl_link_param *param,
- struct cflayer *user_layer)
+ struct cfctrl_link_param *param,
+ struct cflayer *user_layer)
{
struct cfctrl *cfctrl = container_obj(layer);
u32 tmp32;
@@ -301,7 +301,7 @@ int cfctrl_linkup_request(struct cflayer *layer,
}
int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
- struct cflayer *client)
+ struct cflayer *client)
{
int ret;
struct cfpkt *pkt;
@@ -555,7 +555,7 @@ error:
}
static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
- int phyid)
+ int phyid)
{
struct cfctrl *this = container_obj(layr);
switch (ctrl) {
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
index 2914659eb9b2..7aae0b56829e 100644
--- a/net/caif/cfdbgl.c
+++ b/net/caif/cfdbgl.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) ST-Ericsson AB 2010
- * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author: Sjur Brendeland
* License terms: GNU General Public License (GPL) version 2
*/
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index a63f4a5f5aff..3bdddb32d55a 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) ST-Ericsson AB 2010
- * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author: Sjur Brendeland
* License terms: GNU General Public License (GPL) version 2
*/
diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c
index 0a7df7ef062d..8bc7caa28e64 100644
--- a/net/caif/cffrml.c
+++ b/net/caif/cffrml.c
@@ -2,7 +2,7 @@
* CAIF Framing Layer.
*
* Copyright (C) ST-Ericsson AB 2010
- * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author: Sjur Brendeland
* License terms: GNU General Public License (GPL) version 2
*/
@@ -28,7 +28,7 @@ struct cffrml {
static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt);
static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt);
static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
- int phyid);
+ int phyid);
static u32 cffrml_rcv_error;
static u32 cffrml_rcv_checsum_error;
@@ -167,7 +167,7 @@ static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt)
}
static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
- int phyid)
+ int phyid)
{
if (layr->up && layr->up->ctrlcmd)
layr->up->ctrlcmd(layr->up, ctrl, layr->id);
diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
index 94b08612a4d8..8c5d6386319f 100644
--- a/net/caif/cfmuxl.c
+++ b/net/caif/cfmuxl.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) ST-Ericsson AB 2010
- * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author: Sjur Brendeland
* License terms: GNU General Public License (GPL) version 2
*/
@@ -42,7 +42,7 @@ struct cfmuxl {
static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt);
static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt);
static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
- int phyid);
+ int phyid);
static struct cflayer *get_up(struct cfmuxl *muxl, u16 id);
struct cflayer *cfmuxl_create(void)
@@ -244,7 +244,7 @@ static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt)
}
static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
- int phyid)
+ int phyid)
{
struct cfmuxl *muxl = container_obj(layr);
struct cflayer *layer;
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index 863dedd91bb6..6493351f39c6 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) ST-Ericsson AB 2010
- * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author: Sjur Brendeland
* License terms: GNU General Public License (GPL) version 2
*/
@@ -266,8 +266,8 @@ inline u16 cfpkt_getlen(struct cfpkt *pkt)
}
inline u16 cfpkt_iterate(struct cfpkt *pkt,
- u16 (*iter_func)(u16, void *, u16),
- u16 data)
+ u16 (*iter_func)(u16, void *, u16),
+ u16 data)
{
/*
* Don't care about the performance hit of linearizing,
@@ -307,8 +307,8 @@ int cfpkt_setlen(struct cfpkt *pkt, u16 len)
}
struct cfpkt *cfpkt_append(struct cfpkt *dstpkt,
- struct cfpkt *addpkt,
- u16 expectlen)
+ struct cfpkt *addpkt,
+ u16 expectlen)
{
struct sk_buff *dst = pkt_to_skb(dstpkt);
struct sk_buff *add = pkt_to_skb(addpkt);
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index 2b563ad04597..61d7617d9249 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) ST-Ericsson AB 2010
- * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author: Sjur Brendeland
* License terms: GNU General Public License (GPL) version 2
*/
@@ -43,7 +43,7 @@ static void cfrfml_release(struct cflayer *layer)
}
struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info,
- int mtu_size)
+ int mtu_size)
{
int tmp;
struct cfrfml *this = kzalloc(sizeof(struct cfrfml), GFP_ATOMIC);
@@ -69,7 +69,7 @@ struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info,
}
static struct cfpkt *rfm_append(struct cfrfml *rfml, char *seghead,
- struct cfpkt *pkt, int *err)
+ struct cfpkt *pkt, int *err)
{
struct cfpkt *tmppkt;
*err = -EPROTO;
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index 8e68b97f13ee..ce60f06d76de 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) ST-Ericsson AB 2010
- * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author: Sjur Brendeland
* License terms: GNU General Public License (GPL) version 2
*/
@@ -29,7 +29,7 @@ struct cfserl {
static int cfserl_receive(struct cflayer *layr, struct cfpkt *pkt);
static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt);
static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
- int phyid);
+ int phyid);
struct cflayer *cfserl_create(int instance, bool use_stx)
{
@@ -182,7 +182,7 @@ static int cfserl_transmit(struct cflayer *layer, struct cfpkt *newpkt)
}
static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
- int phyid)
+ int phyid)
{
layr->up->ctrlcmd(layr->up, ctrl, phyid);
}
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index ba217e90765e..353f793d1b3b 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) ST-Ericsson AB 2010
- * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author: Sjur Brendeland
* License terms: GNU General Public License (GPL) version 2
*/
@@ -25,7 +25,7 @@
#define container_obj(layr) container_of(layr, struct cfsrvl, layer)
static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
- int phyid)
+ int phyid)
{
struct cfsrvl *service = container_obj(layr);
@@ -158,10 +158,9 @@ static void cfsrvl_release(struct cflayer *layer)
}
void cfsrvl_init(struct cfsrvl *service,
- u8 channel_id,
- struct dev_info *dev_info,
- bool supports_flowctrl
- )
+ u8 channel_id,
+ struct dev_info *dev_info,
+ bool supports_flowctrl)
{
caif_assert(offsetof(struct cfsrvl, layer) == 0);
service->open = false;
@@ -207,8 +206,8 @@ void caif_free_client(struct cflayer *adap_layer)
EXPORT_SYMBOL(caif_free_client);
void caif_client_register_refcnt(struct cflayer *adapt_layer,
- void (*hold)(struct cflayer *lyr),
- void (*put)(struct cflayer *lyr))
+ void (*hold)(struct cflayer *lyr),
+ void (*put)(struct cflayer *lyr))
{
struct cfsrvl *service;
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
index 86d2dadb4b73..1728fa4471cf 100644
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) ST-Ericsson AB 2010
- * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author: Sjur Brendeland
* License terms: GNU General Public License (GPL) version 2
*/
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index 910ab0661f66..262224581efa 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) ST-Ericsson AB 2010
- * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author: Sjur Brendeland
* License terms: GNU General Public License (GPL) version 2
*/
diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c
index a8e2a2d758a5..b3b110e8a350 100644
--- a/net/caif/cfvidl.c
+++ b/net/caif/cfvidl.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) ST-Ericsson AB 2010
- * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author: Sjur Brendeland
* License terms: GNU General Public License (GPL) version 2
*/
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index e597733affb8..7344a8fa1bb0 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -1,7 +1,7 @@
/*
* Copyright (C) ST-Ericsson AB 2010
- * Authors: Sjur Brendeland/sjur.brandeland@stericsson.com
- * Daniel Martensson / Daniel.Martensson@stericsson.com
+ * Authors: Sjur Brendeland
+ * Daniel Martensson
* License terms: GNU General Public License (GPL) version 2
*/
@@ -167,7 +167,7 @@ static void chnl_put(struct cflayer *lyr)
}
static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow,
- int phyid)
+ int phyid)
{
struct chnl_net *priv = container_of(layr, struct chnl_net, chnl);
pr_debug("NET flowctrl func called flow: %s\n",
@@ -443,7 +443,7 @@ nla_put_failure:
}
static void caif_netlink_parms(struct nlattr *data[],
- struct caif_connect_request *conn_req)
+ struct caif_connect_request *conn_req)
{
if (!data) {
pr_warn("no params data found\n");
@@ -488,7 +488,7 @@ static int ipcaif_newlink(struct net *src_net, struct net_device *dev,
}
static int ipcaif_changelink(struct net_device *dev, struct nlattr *tb[],
- struct nlattr *data[])
+ struct nlattr *data[])
{
struct chnl_net *caifdev;
ASSERT_RTNL();
diff --git a/net/can/af_can.c b/net/can/af_can.c
index c48e5220bbac..c4e50852c9f4 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -525,7 +525,7 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
d = find_dev_rcv_lists(dev);
if (!d) {
- printk(KERN_ERR "BUG: receive list not found for "
+ pr_err("BUG: receive list not found for "
"dev %s, id %03X, mask %03X\n",
DNAME(dev), can_id, mask);
goto out;
@@ -546,16 +546,13 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
}
/*
- * Check for bugs in CAN protocol implementations:
- * If no matching list item was found, the list cursor variable next
- * will be NULL, while r will point to the last item of the list.
+ * Check for bugs in CAN protocol implementations using af_can.c:
+ * 'r' will be NULL if no matching list item was found for removal.
*/
if (!r) {
- printk(KERN_ERR "BUG: receive list entry not found for "
- "dev %s, id %03X, mask %03X\n",
- DNAME(dev), can_id, mask);
- r = NULL;
+ WARN(1, "BUG: receive list entry not found for dev %s, "
+ "id %03X, mask %03X\n", DNAME(dev), can_id, mask);
goto out;
}
@@ -749,8 +746,7 @@ int can_proto_register(const struct can_proto *cp)
int err = 0;
if (proto < 0 || proto >= CAN_NPROTO) {
- printk(KERN_ERR "can: protocol number %d out of range\n",
- proto);
+ pr_err("can: protocol number %d out of range\n", proto);
return -EINVAL;
}
@@ -761,8 +757,7 @@ int can_proto_register(const struct can_proto *cp)
mutex_lock(&proto_tab_lock);
if (proto_tab[proto]) {
- printk(KERN_ERR "can: protocol %d already registered\n",
- proto);
+ pr_err("can: protocol %d already registered\n", proto);
err = -EBUSY;
} else
RCU_INIT_POINTER(proto_tab[proto], cp);
@@ -816,11 +811,8 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
/* create new dev_rcv_lists for this device */
d = kzalloc(sizeof(*d), GFP_KERNEL);
- if (!d) {
- printk(KERN_ERR
- "can: allocation of receive list failed\n");
+ if (!d)
return NOTIFY_DONE;
- }
BUG_ON(dev->ml_priv);
dev->ml_priv = d;
@@ -838,8 +830,8 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
dev->ml_priv = NULL;
}
} else
- printk(KERN_ERR "can: notifier: receive list not "
- "found for dev %s\n", dev->name);
+ pr_err("can: notifier: receive list not found for dev "
+ "%s\n", dev->name);
spin_unlock(&can_rcvlists_lock);
@@ -927,7 +919,7 @@ static __exit void can_exit(void)
/* remove created dev_rcv_lists from still registered CAN devices */
rcu_read_lock();
for_each_netdev_rcu(&init_net, dev) {
- if (dev->type == ARPHRD_CAN && dev->ml_priv){
+ if (dev->type == ARPHRD_CAN && dev->ml_priv) {
struct dev_rcv_lists *d = dev->ml_priv;
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 5dcb20076f39..8f113e6ff327 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -226,7 +226,7 @@ static int bcm_proc_show(struct seq_file *m, void *v)
static int bcm_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, bcm_proc_show, PDE(inode)->data);
+ return single_open(file, bcm_proc_show, PDE_DATA(inode));
}
static const struct file_operations bcm_proc_fops = {
diff --git a/net/can/gw.c b/net/can/gw.c
index 117814a7e73c..3ee690e8c7d3 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -778,8 +778,7 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
return 0;
}
-static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh,
- void *arg)
+static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct rtcanmsg *r;
struct cgw_job *gwj;
@@ -868,7 +867,7 @@ static void cgw_remove_all_jobs(void)
}
}
-static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct cgw_job *gwj = NULL;
struct hlist_node *nx;
diff --git a/net/can/proc.c b/net/can/proc.c
index 1ab8c888f102..b543470c8f8b 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -378,7 +378,7 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v)
static int can_rcvlist_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, can_rcvlist_proc_show, PDE(inode)->data);
+ return single_open(file, can_rcvlist_proc_show, PDE_DATA(inode));
}
static const struct file_operations can_rcvlist_proc_fops = {
diff --git a/net/can/raw.c b/net/can/raw.c
index c1764e41ddaf..1085e65f848e 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -711,9 +711,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
if (err < 0)
goto free_skb;
- err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
- if (err < 0)
- goto free_skb;
+
+ sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
skb->dev = dev;
skb->sk = sk;
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index e87ef435e11b..958d9856912c 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -11,5 +11,5 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
crypto.o armor.o \
auth_x.o \
ceph_fs.o ceph_strings.o ceph_hash.o \
- pagevec.o
+ pagevec.o snapshot.o
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index b4bf4ac090f1..6b923bcaa2a4 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -47,6 +47,7 @@ struct ceph_auth_client *ceph_auth_init(const char *name, const struct ceph_cryp
if (!ac)
goto out;
+ mutex_init(&ac->mutex);
ac->negotiating = true;
if (name)
ac->name = name;
@@ -73,10 +74,12 @@ void ceph_auth_destroy(struct ceph_auth_client *ac)
*/
void ceph_auth_reset(struct ceph_auth_client *ac)
{
+ mutex_lock(&ac->mutex);
dout("auth_reset %p\n", ac);
if (ac->ops && !ac->negotiating)
ac->ops->reset(ac);
ac->negotiating = true;
+ mutex_unlock(&ac->mutex);
}
int ceph_entity_name_encode(const char *name, void **p, void *end)
@@ -102,6 +105,7 @@ int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len)
int i, num;
int ret;
+ mutex_lock(&ac->mutex);
dout("auth_build_hello\n");
monhdr->have_version = 0;
monhdr->session_mon = cpu_to_le16(-1);
@@ -122,15 +126,19 @@ int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len)
ret = ceph_entity_name_encode(ac->name, &p, end);
if (ret < 0)
- return ret;
+ goto out;
ceph_decode_need(&p, end, sizeof(u64), bad);
ceph_encode_64(&p, ac->global_id);
ceph_encode_32(&lenp, p - lenp - sizeof(u32));
- return p - buf;
+ ret = p - buf;
+out:
+ mutex_unlock(&ac->mutex);
+ return ret;
bad:
- return -ERANGE;
+ ret = -ERANGE;
+ goto out;
}
static int ceph_build_auth_request(struct ceph_auth_client *ac,
@@ -151,11 +159,13 @@ static int ceph_build_auth_request(struct ceph_auth_client *ac,
if (ret < 0) {
pr_err("error %d building auth method %s request\n", ret,
ac->ops->name);
- return ret;
+ goto out;
}
dout(" built request %d bytes\n", ret);
ceph_encode_32(&p, ret);
- return p + ret - msg_buf;
+ ret = p + ret - msg_buf;
+out:
+ return ret;
}
/*
@@ -176,6 +186,7 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
int result_msg_len;
int ret = -EINVAL;
+ mutex_lock(&ac->mutex);
dout("handle_auth_reply %p %p\n", p, end);
ceph_decode_need(&p, end, sizeof(u32) * 3 + sizeof(u64), bad);
protocol = ceph_decode_32(&p);
@@ -227,33 +238,103 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
ret = ac->ops->handle_reply(ac, result, payload, payload_end);
if (ret == -EAGAIN) {
- return ceph_build_auth_request(ac, reply_buf, reply_len);
+ ret = ceph_build_auth_request(ac, reply_buf, reply_len);
} else if (ret) {
pr_err("auth method '%s' error %d\n", ac->ops->name, ret);
- return ret;
}
- return 0;
-bad:
- pr_err("failed to decode auth msg\n");
out:
+ mutex_unlock(&ac->mutex);
return ret;
+
+bad:
+ pr_err("failed to decode auth msg\n");
+ ret = -EINVAL;
+ goto out;
}
int ceph_build_auth(struct ceph_auth_client *ac,
void *msg_buf, size_t msg_len)
{
+ int ret = 0;
+
+ mutex_lock(&ac->mutex);
if (!ac->protocol)
- return ceph_auth_build_hello(ac, msg_buf, msg_len);
- BUG_ON(!ac->ops);
- if (ac->ops->should_authenticate(ac))
- return ceph_build_auth_request(ac, msg_buf, msg_len);
- return 0;
+ ret = ceph_auth_build_hello(ac, msg_buf, msg_len);
+ else if (ac->ops->should_authenticate(ac))
+ ret = ceph_build_auth_request(ac, msg_buf, msg_len);
+ mutex_unlock(&ac->mutex);
+ return ret;
}
int ceph_auth_is_authenticated(struct ceph_auth_client *ac)
{
- if (!ac->ops)
- return 0;
- return ac->ops->is_authenticated(ac);
+ int ret = 0;
+
+ mutex_lock(&ac->mutex);
+ if (ac->ops)
+ ret = ac->ops->is_authenticated(ac);
+ mutex_unlock(&ac->mutex);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_auth_is_authenticated);
+
+int ceph_auth_create_authorizer(struct ceph_auth_client *ac,
+ int peer_type,
+ struct ceph_auth_handshake *auth)
+{
+ int ret = 0;
+
+ mutex_lock(&ac->mutex);
+ if (ac->ops && ac->ops->create_authorizer)
+ ret = ac->ops->create_authorizer(ac, peer_type, auth);
+ mutex_unlock(&ac->mutex);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_auth_create_authorizer);
+
+void ceph_auth_destroy_authorizer(struct ceph_auth_client *ac,
+ struct ceph_authorizer *a)
+{
+ mutex_lock(&ac->mutex);
+ if (ac->ops && ac->ops->destroy_authorizer)
+ ac->ops->destroy_authorizer(ac, a);
+ mutex_unlock(&ac->mutex);
+}
+EXPORT_SYMBOL(ceph_auth_destroy_authorizer);
+
+int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
+ int peer_type,
+ struct ceph_auth_handshake *a)
+{
+ int ret = 0;
+
+ mutex_lock(&ac->mutex);
+ if (ac->ops && ac->ops->update_authorizer)
+ ret = ac->ops->update_authorizer(ac, peer_type, a);
+ mutex_unlock(&ac->mutex);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_auth_update_authorizer);
+
+int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
+ struct ceph_authorizer *a, size_t len)
+{
+ int ret = 0;
+
+ mutex_lock(&ac->mutex);
+ if (ac->ops && ac->ops->verify_authorizer_reply)
+ ret = ac->ops->verify_authorizer_reply(ac, a, len);
+ mutex_unlock(&ac->mutex);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_auth_verify_authorizer_reply);
+
+void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac, int peer_type)
+{
+ mutex_lock(&ac->mutex);
+ if (ac->ops && ac->ops->invalidate_authorizer)
+ ac->ops->invalidate_authorizer(ac, peer_type);
+ mutex_unlock(&ac->mutex);
}
+EXPORT_SYMBOL(ceph_auth_invalidate_authorizer);
diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c
index 925ca583c09c..8c93fa8d81bc 100644
--- a/net/ceph/auth_none.c
+++ b/net/ceph/auth_none.c
@@ -39,6 +39,11 @@ static int should_authenticate(struct ceph_auth_client *ac)
return xi->starting;
}
+static int build_request(struct ceph_auth_client *ac, void *buf, void *end)
+{
+ return 0;
+}
+
/*
* the generic auth code decode the global_id, and we carry no actual
* authenticate state, so nothing happens here.
@@ -106,6 +111,7 @@ static const struct ceph_auth_client_ops ceph_auth_none_ops = {
.destroy = destroy,
.is_authenticated = is_authenticated,
.should_authenticate = should_authenticate,
+ .build_request = build_request,
.handle_reply = handle_reply,
.create_authorizer = ceph_auth_none_create_authorizer,
.destroy_authorizer = ceph_auth_none_destroy_authorizer,
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index a16bf14eb027..96238ba95f2b 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -298,6 +298,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
return -ENOMEM;
}
au->service = th->service;
+ au->secret_id = th->secret_id;
msg_a = au->buf->vec.iov_base;
msg_a->struct_v = 1;
@@ -555,6 +556,26 @@ static int ceph_x_create_authorizer(
return 0;
}
+static int ceph_x_update_authorizer(
+ struct ceph_auth_client *ac, int peer_type,
+ struct ceph_auth_handshake *auth)
+{
+ struct ceph_x_authorizer *au;
+ struct ceph_x_ticket_handler *th;
+
+ th = get_ticket_handler(ac, peer_type);
+ if (IS_ERR(th))
+ return PTR_ERR(th);
+
+ au = (struct ceph_x_authorizer *)auth->authorizer;
+ if (au->secret_id < th->secret_id) {
+ dout("ceph_x_update_authorizer service %u secret %llu < %llu\n",
+ au->service, au->secret_id, th->secret_id);
+ return ceph_x_build_authorizer(ac, th, au);
+ }
+ return 0;
+}
+
static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_authorizer *a, size_t len)
{
@@ -630,7 +651,7 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
th = get_ticket_handler(ac, peer_type);
if (!IS_ERR(th))
- remove_ticket_handler(ac, th);
+ memset(&th->validity, 0, sizeof(th->validity));
}
@@ -641,6 +662,7 @@ static const struct ceph_auth_client_ops ceph_x_ops = {
.build_request = ceph_x_build_request,
.handle_reply = ceph_x_handle_reply,
.create_authorizer = ceph_x_create_authorizer,
+ .update_authorizer = ceph_x_update_authorizer,
.verify_authorizer_reply = ceph_x_verify_authorizer_reply,
.destroy_authorizer = ceph_x_destroy_authorizer,
.invalidate_authorizer = ceph_x_invalidate_authorizer,
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
index f459e93b774f..c5a058da7ac8 100644
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h
@@ -29,6 +29,7 @@ struct ceph_x_authorizer {
struct ceph_buffer *buf;
unsigned int service;
u64 nonce;
+ u64 secret_id;
char reply_buf[128]; /* big enough for encrypted blob */
};
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index e65e6e4be38b..34b11ee8124e 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -606,11 +606,17 @@ static int __init init_ceph_lib(void)
if (ret < 0)
goto out_crypto;
+ ret = ceph_osdc_setup();
+ if (ret < 0)
+ goto out_msgr;
+
pr_info("loaded (mon/osd proto %d/%d)\n",
CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL);
return 0;
+out_msgr:
+ ceph_msgr_exit();
out_crypto:
ceph_crypto_shutdown();
out_debugfs:
@@ -622,6 +628,7 @@ out:
static void __exit exit_ceph_lib(void)
{
dout("exit_ceph_lib\n");
+ ceph_osdc_cleanup();
ceph_msgr_exit();
ceph_crypto_shutdown();
ceph_debugfs_cleanup();
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 00d051f4894e..83661cdc0766 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -123,8 +123,8 @@ static int osdc_show(struct seq_file *s, void *pp)
mutex_lock(&osdc->request_mutex);
for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
struct ceph_osd_request *req;
+ unsigned int i;
int opcode;
- int i;
req = rb_entry(p, struct ceph_osd_request, r_node);
@@ -142,7 +142,7 @@ static int osdc_show(struct seq_file *s, void *pp)
seq_printf(s, "\t");
for (i = 0; i < req->r_num_ops; i++) {
- opcode = le16_to_cpu(req->r_request_ops[i].op);
+ opcode = req->r_ops[i].op;
seq_printf(s, "\t%s", ceph_osd_op_name(opcode));
}
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 2c0669fb54e3..eb0a46a49bd4 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -21,6 +21,9 @@
#include <linux/ceph/pagelist.h>
#include <linux/export.h>
+#define list_entry_next(pos, member) \
+ list_entry(pos->member.next, typeof(*pos), member)
+
/*
* Ceph uses the messenger to exchange ceph_msg messages with other
* hosts in the system. The messenger provides ordered and reliable
@@ -149,6 +152,11 @@ static bool con_flag_test_and_set(struct ceph_connection *con,
return test_and_set_bit(con_flag, &con->flags);
}
+/* Slab caches for frequently-allocated structures */
+
+static struct kmem_cache *ceph_msg_cache;
+static struct kmem_cache *ceph_msg_data_cache;
+
/* static tag bytes (protocol control messages) */
static char tag_msg = CEPH_MSGR_TAG_MSG;
static char tag_ack = CEPH_MSGR_TAG_ACK;
@@ -223,6 +231,41 @@ static void encode_my_addr(struct ceph_messenger *msgr)
*/
static struct workqueue_struct *ceph_msgr_wq;
+static int ceph_msgr_slab_init(void)
+{
+ BUG_ON(ceph_msg_cache);
+ ceph_msg_cache = kmem_cache_create("ceph_msg",
+ sizeof (struct ceph_msg),
+ __alignof__(struct ceph_msg), 0, NULL);
+
+ if (!ceph_msg_cache)
+ return -ENOMEM;
+
+ BUG_ON(ceph_msg_data_cache);
+ ceph_msg_data_cache = kmem_cache_create("ceph_msg_data",
+ sizeof (struct ceph_msg_data),
+ __alignof__(struct ceph_msg_data),
+ 0, NULL);
+ if (ceph_msg_data_cache)
+ return 0;
+
+ kmem_cache_destroy(ceph_msg_cache);
+ ceph_msg_cache = NULL;
+
+ return -ENOMEM;
+}
+
+static void ceph_msgr_slab_exit(void)
+{
+ BUG_ON(!ceph_msg_data_cache);
+ kmem_cache_destroy(ceph_msg_data_cache);
+ ceph_msg_data_cache = NULL;
+
+ BUG_ON(!ceph_msg_cache);
+ kmem_cache_destroy(ceph_msg_cache);
+ ceph_msg_cache = NULL;
+}
+
static void _ceph_msgr_exit(void)
{
if (ceph_msgr_wq) {
@@ -230,6 +273,8 @@ static void _ceph_msgr_exit(void)
ceph_msgr_wq = NULL;
}
+ ceph_msgr_slab_exit();
+
BUG_ON(zero_page == NULL);
kunmap(zero_page);
page_cache_release(zero_page);
@@ -242,6 +287,9 @@ int ceph_msgr_init(void)
zero_page = ZERO_PAGE(0);
page_cache_get(zero_page);
+ if (ceph_msgr_slab_init())
+ return -ENOMEM;
+
ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0);
if (ceph_msgr_wq)
return 0;
@@ -471,6 +519,22 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
return r;
}
+static int ceph_tcp_recvpage(struct socket *sock, struct page *page,
+ int page_offset, size_t length)
+{
+ void *kaddr;
+ int ret;
+
+ BUG_ON(page_offset + length > PAGE_SIZE);
+
+ kaddr = kmap(page);
+ BUG_ON(!kaddr);
+ ret = ceph_tcp_recvmsg(sock, kaddr + page_offset, length);
+ kunmap(page);
+
+ return ret;
+}
+
/*
* write something. @more is true if caller will be sending more data
* shortly.
@@ -493,7 +557,7 @@ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
}
static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
- int offset, size_t size, int more)
+ int offset, size_t size, bool more)
{
int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR);
int ret;
@@ -697,50 +761,397 @@ static void con_out_kvec_add(struct ceph_connection *con,
}
#ifdef CONFIG_BLOCK
-static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg)
+
+/*
+ * For a bio data item, a piece is whatever remains of the next
+ * entry in the current bio iovec, or the first entry in the next
+ * bio in the list.
+ */
+static void ceph_msg_data_bio_cursor_init(struct ceph_msg_data_cursor *cursor,
+ size_t length)
{
- if (!bio) {
- *iter = NULL;
- *seg = 0;
- return;
+ struct ceph_msg_data *data = cursor->data;
+ struct bio *bio;
+
+ BUG_ON(data->type != CEPH_MSG_DATA_BIO);
+
+ bio = data->bio;
+ BUG_ON(!bio);
+ BUG_ON(!bio->bi_vcnt);
+
+ cursor->resid = min(length, data->bio_length);
+ cursor->bio = bio;
+ cursor->vector_index = 0;
+ cursor->vector_offset = 0;
+ cursor->last_piece = length <= bio->bi_io_vec[0].bv_len;
+}
+
+static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor,
+ size_t *page_offset,
+ size_t *length)
+{
+ struct ceph_msg_data *data = cursor->data;
+ struct bio *bio;
+ struct bio_vec *bio_vec;
+ unsigned int index;
+
+ BUG_ON(data->type != CEPH_MSG_DATA_BIO);
+
+ bio = cursor->bio;
+ BUG_ON(!bio);
+
+ index = cursor->vector_index;
+ BUG_ON(index >= (unsigned int) bio->bi_vcnt);
+
+ bio_vec = &bio->bi_io_vec[index];
+ BUG_ON(cursor->vector_offset >= bio_vec->bv_len);
+ *page_offset = (size_t) (bio_vec->bv_offset + cursor->vector_offset);
+ BUG_ON(*page_offset >= PAGE_SIZE);
+ if (cursor->last_piece) /* pagelist offset is always 0 */
+ *length = cursor->resid;
+ else
+ *length = (size_t) (bio_vec->bv_len - cursor->vector_offset);
+ BUG_ON(*length > cursor->resid);
+ BUG_ON(*page_offset + *length > PAGE_SIZE);
+
+ return bio_vec->bv_page;
+}
+
+static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
+ size_t bytes)
+{
+ struct bio *bio;
+ struct bio_vec *bio_vec;
+ unsigned int index;
+
+ BUG_ON(cursor->data->type != CEPH_MSG_DATA_BIO);
+
+ bio = cursor->bio;
+ BUG_ON(!bio);
+
+ index = cursor->vector_index;
+ BUG_ON(index >= (unsigned int) bio->bi_vcnt);
+ bio_vec = &bio->bi_io_vec[index];
+
+ /* Advance the cursor offset */
+
+ BUG_ON(cursor->resid < bytes);
+ cursor->resid -= bytes;
+ cursor->vector_offset += bytes;
+ if (cursor->vector_offset < bio_vec->bv_len)
+ return false; /* more bytes to process in this segment */
+ BUG_ON(cursor->vector_offset != bio_vec->bv_len);
+
+ /* Move on to the next segment, and possibly the next bio */
+
+ if (++index == (unsigned int) bio->bi_vcnt) {
+ bio = bio->bi_next;
+ index = 0;
}
- *iter = bio;
- *seg = bio->bi_idx;
+ cursor->bio = bio;
+ cursor->vector_index = index;
+ cursor->vector_offset = 0;
+
+ if (!cursor->last_piece) {
+ BUG_ON(!cursor->resid);
+ BUG_ON(!bio);
+ /* A short read is OK, so use <= rather than == */
+ if (cursor->resid <= bio->bi_io_vec[index].bv_len)
+ cursor->last_piece = true;
+ }
+
+ return true;
}
+#endif /* CONFIG_BLOCK */
-static void iter_bio_next(struct bio **bio_iter, int *seg)
+/*
+ * For a page array, a piece comes from the first page in the array
+ * that has not already been fully consumed.
+ */
+static void ceph_msg_data_pages_cursor_init(struct ceph_msg_data_cursor *cursor,
+ size_t length)
{
- if (*bio_iter == NULL)
- return;
+ struct ceph_msg_data *data = cursor->data;
+ int page_count;
+
+ BUG_ON(data->type != CEPH_MSG_DATA_PAGES);
- BUG_ON(*seg >= (*bio_iter)->bi_vcnt);
+ BUG_ON(!data->pages);
+ BUG_ON(!data->length);
- (*seg)++;
- if (*seg == (*bio_iter)->bi_vcnt)
- init_bio_iter((*bio_iter)->bi_next, bio_iter, seg);
+ cursor->resid = min(length, data->length);
+ page_count = calc_pages_for(data->alignment, (u64)data->length);
+ cursor->page_offset = data->alignment & ~PAGE_MASK;
+ cursor->page_index = 0;
+ BUG_ON(page_count > (int)USHRT_MAX);
+ cursor->page_count = (unsigned short)page_count;
+ BUG_ON(length > SIZE_MAX - cursor->page_offset);
+ cursor->last_piece = (size_t)cursor->page_offset + length <= PAGE_SIZE;
}
-#endif
-static void prepare_write_message_data(struct ceph_connection *con)
+static struct page *
+ceph_msg_data_pages_next(struct ceph_msg_data_cursor *cursor,
+ size_t *page_offset, size_t *length)
{
- struct ceph_msg *msg = con->out_msg;
+ struct ceph_msg_data *data = cursor->data;
- BUG_ON(!msg);
- BUG_ON(!msg->hdr.data_len);
+ BUG_ON(data->type != CEPH_MSG_DATA_PAGES);
+
+ BUG_ON(cursor->page_index >= cursor->page_count);
+ BUG_ON(cursor->page_offset >= PAGE_SIZE);
+
+ *page_offset = cursor->page_offset;
+ if (cursor->last_piece)
+ *length = cursor->resid;
+ else
+ *length = PAGE_SIZE - *page_offset;
+
+ return data->pages[cursor->page_index];
+}
+
+static bool ceph_msg_data_pages_advance(struct ceph_msg_data_cursor *cursor,
+ size_t bytes)
+{
+ BUG_ON(cursor->data->type != CEPH_MSG_DATA_PAGES);
+
+ BUG_ON(cursor->page_offset + bytes > PAGE_SIZE);
+
+ /* Advance the cursor page offset */
+
+ cursor->resid -= bytes;
+ cursor->page_offset = (cursor->page_offset + bytes) & ~PAGE_MASK;
+ if (!bytes || cursor->page_offset)
+ return false; /* more bytes to process in the current page */
+
+ /* Move on to the next page; offset is already at 0 */
+
+ BUG_ON(cursor->page_index >= cursor->page_count);
+ cursor->page_index++;
+ cursor->last_piece = cursor->resid <= PAGE_SIZE;
+
+ return true;
+}
+
+/*
+ * For a pagelist, a piece is whatever remains to be consumed in the
+ * first page in the list, or the front of the next page.
+ */
+static void
+ceph_msg_data_pagelist_cursor_init(struct ceph_msg_data_cursor *cursor,
+ size_t length)
+{
+ struct ceph_msg_data *data = cursor->data;
+ struct ceph_pagelist *pagelist;
+ struct page *page;
+
+ BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST);
+
+ pagelist = data->pagelist;
+ BUG_ON(!pagelist);
+
+ if (!length)
+ return; /* pagelist can be assigned but empty */
+
+ BUG_ON(list_empty(&pagelist->head));
+ page = list_first_entry(&pagelist->head, struct page, lru);
+
+ cursor->resid = min(length, pagelist->length);
+ cursor->page = page;
+ cursor->offset = 0;
+ cursor->last_piece = cursor->resid <= PAGE_SIZE;
+}
+
+static struct page *
+ceph_msg_data_pagelist_next(struct ceph_msg_data_cursor *cursor,
+ size_t *page_offset, size_t *length)
+{
+ struct ceph_msg_data *data = cursor->data;
+ struct ceph_pagelist *pagelist;
+
+ BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST);
- /* initialize page iterator */
- con->out_msg_pos.page = 0;
- if (msg->pages)
- con->out_msg_pos.page_pos = msg->page_alignment;
+ pagelist = data->pagelist;
+ BUG_ON(!pagelist);
+
+ BUG_ON(!cursor->page);
+ BUG_ON(cursor->offset + cursor->resid != pagelist->length);
+
+ /* offset of first page in pagelist is always 0 */
+ *page_offset = cursor->offset & ~PAGE_MASK;
+ if (cursor->last_piece)
+ *length = cursor->resid;
else
- con->out_msg_pos.page_pos = 0;
+ *length = PAGE_SIZE - *page_offset;
+
+ return cursor->page;
+}
+
+static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor,
+ size_t bytes)
+{
+ struct ceph_msg_data *data = cursor->data;
+ struct ceph_pagelist *pagelist;
+
+ BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST);
+
+ pagelist = data->pagelist;
+ BUG_ON(!pagelist);
+
+ BUG_ON(cursor->offset + cursor->resid != pagelist->length);
+ BUG_ON((cursor->offset & ~PAGE_MASK) + bytes > PAGE_SIZE);
+
+ /* Advance the cursor offset */
+
+ cursor->resid -= bytes;
+ cursor->offset += bytes;
+ /* offset of first page in pagelist is always 0 */
+ if (!bytes || cursor->offset & ~PAGE_MASK)
+ return false; /* more bytes to process in the current page */
+
+ /* Move on to the next page */
+
+ BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head));
+ cursor->page = list_entry_next(cursor->page, lru);
+ cursor->last_piece = cursor->resid <= PAGE_SIZE;
+
+ return true;
+}
+
+/*
+ * Message data is handled (sent or received) in pieces, where each
+ * piece resides on a single page. The network layer might not
+ * consume an entire piece at once. A data item's cursor keeps
+ * track of which piece is next to process and how much remains to
+ * be processed in that piece. It also tracks whether the current
+ * piece is the last one in the data item.
+ */
+static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor)
+{
+ size_t length = cursor->total_resid;
+
+ switch (cursor->data->type) {
+ case CEPH_MSG_DATA_PAGELIST:
+ ceph_msg_data_pagelist_cursor_init(cursor, length);
+ break;
+ case CEPH_MSG_DATA_PAGES:
+ ceph_msg_data_pages_cursor_init(cursor, length);
+ break;
#ifdef CONFIG_BLOCK
- if (msg->bio)
- init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg);
-#endif
- con->out_msg_pos.data_pos = 0;
- con->out_msg_pos.did_page_crc = false;
- con->out_more = 1; /* data + footer will follow */
+ case CEPH_MSG_DATA_BIO:
+ ceph_msg_data_bio_cursor_init(cursor, length);
+ break;
+#endif /* CONFIG_BLOCK */
+ case CEPH_MSG_DATA_NONE:
+ default:
+ /* BUG(); */
+ break;
+ }
+ cursor->need_crc = true;
+}
+
+static void ceph_msg_data_cursor_init(struct ceph_msg *msg, size_t length)
+{
+ struct ceph_msg_data_cursor *cursor = &msg->cursor;
+ struct ceph_msg_data *data;
+
+ BUG_ON(!length);
+ BUG_ON(length > msg->data_length);
+ BUG_ON(list_empty(&msg->data));
+
+ cursor->data_head = &msg->data;
+ cursor->total_resid = length;
+ data = list_first_entry(&msg->data, struct ceph_msg_data, links);
+ cursor->data = data;
+
+ __ceph_msg_data_cursor_init(cursor);
+}
+
+/*
+ * Return the page containing the next piece to process for a given
+ * data item, and supply the page offset and length of that piece.
+ * Indicate whether this is the last piece in this data item.
+ */
+static struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
+ size_t *page_offset, size_t *length,
+ bool *last_piece)
+{
+ struct page *page;
+
+ switch (cursor->data->type) {
+ case CEPH_MSG_DATA_PAGELIST:
+ page = ceph_msg_data_pagelist_next(cursor, page_offset, length);
+ break;
+ case CEPH_MSG_DATA_PAGES:
+ page = ceph_msg_data_pages_next(cursor, page_offset, length);
+ break;
+#ifdef CONFIG_BLOCK
+ case CEPH_MSG_DATA_BIO:
+ page = ceph_msg_data_bio_next(cursor, page_offset, length);
+ break;
+#endif /* CONFIG_BLOCK */
+ case CEPH_MSG_DATA_NONE:
+ default:
+ page = NULL;
+ break;
+ }
+ BUG_ON(!page);
+ BUG_ON(*page_offset + *length > PAGE_SIZE);
+ BUG_ON(!*length);
+ if (last_piece)
+ *last_piece = cursor->last_piece;
+
+ return page;
+}
+
+/*
+ * Returns true if the result moves the cursor on to the next piece
+ * of the data item.
+ */
+static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
+ size_t bytes)
+{
+ bool new_piece;
+
+ BUG_ON(bytes > cursor->resid);
+ switch (cursor->data->type) {
+ case CEPH_MSG_DATA_PAGELIST:
+ new_piece = ceph_msg_data_pagelist_advance(cursor, bytes);
+ break;
+ case CEPH_MSG_DATA_PAGES:
+ new_piece = ceph_msg_data_pages_advance(cursor, bytes);
+ break;
+#ifdef CONFIG_BLOCK
+ case CEPH_MSG_DATA_BIO:
+ new_piece = ceph_msg_data_bio_advance(cursor, bytes);
+ break;
+#endif /* CONFIG_BLOCK */
+ case CEPH_MSG_DATA_NONE:
+ default:
+ BUG();
+ break;
+ }
+ cursor->total_resid -= bytes;
+
+ if (!cursor->resid && cursor->total_resid) {
+ WARN_ON(!cursor->last_piece);
+ BUG_ON(list_is_last(&cursor->data->links, cursor->data_head));
+ cursor->data = list_entry_next(cursor->data, links);
+ __ceph_msg_data_cursor_init(cursor);
+ new_piece = true;
+ }
+ cursor->need_crc = new_piece;
+
+ return new_piece;
+}
+
+static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
+{
+ BUG_ON(!msg);
+ BUG_ON(!data_len);
+
+ /* Initialize data cursor */
+
+ ceph_msg_data_cursor_init(msg, (size_t)data_len);
}
/*
@@ -803,16 +1214,12 @@ static void prepare_write_message(struct ceph_connection *con)
m->hdr.seq = cpu_to_le64(++con->out_seq);
m->needs_out_seq = false;
}
-#ifdef CONFIG_BLOCK
- else
- m->bio_iter = NULL;
-#endif
+ WARN_ON(m->data_length != le32_to_cpu(m->hdr.data_len));
- dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n",
+ dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n",
m, con->out_seq, le16_to_cpu(m->hdr.type),
le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len),
- le32_to_cpu(m->hdr.data_len),
- m->nr_pages);
+ m->data_length);
BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len);
/* tag + hdr + front + middle */
@@ -843,11 +1250,13 @@ static void prepare_write_message(struct ceph_connection *con)
/* is there a data payload? */
con->out_msg->footer.data_crc = 0;
- if (m->hdr.data_len)
- prepare_write_message_data(con);
- else
+ if (m->data_length) {
+ prepare_message_data(con->out_msg, m->data_length);
+ con->out_more = 1; /* data + footer will follow */
+ } else {
/* no, queue up footer too and be done */
prepare_write_message_footer(con);
+ }
con_flag_set(con, CON_FLAG_WRITE_PENDING);
}
@@ -874,6 +1283,24 @@ static void prepare_write_ack(struct ceph_connection *con)
}
/*
+ * Prepare to share the seq during handshake
+ */
+static void prepare_write_seq(struct ceph_connection *con)
+{
+ dout("prepare_write_seq %p %llu -> %llu\n", con,
+ con->in_seq_acked, con->in_seq);
+ con->in_seq_acked = con->in_seq;
+
+ con_out_kvec_reset(con);
+
+ con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
+ con_out_kvec_add(con, sizeof (con->out_temp_ack),
+ &con->out_temp_ack);
+
+ con_flag_set(con, CON_FLAG_WRITE_PENDING);
+}
+
+/*
* Prepare to write keepalive byte.
*/
static void prepare_write_keepalive(struct ceph_connection *con)
@@ -1022,35 +1449,19 @@ out:
return ret; /* done! */
}
-static void out_msg_pos_next(struct ceph_connection *con, struct page *page,
- size_t len, size_t sent, bool in_trail)
+static u32 ceph_crc32c_page(u32 crc, struct page *page,
+ unsigned int page_offset,
+ unsigned int length)
{
- struct ceph_msg *msg = con->out_msg;
+ char *kaddr;
- BUG_ON(!msg);
- BUG_ON(!sent);
-
- con->out_msg_pos.data_pos += sent;
- con->out_msg_pos.page_pos += sent;
- if (sent < len)
- return;
+ kaddr = kmap(page);
+ BUG_ON(kaddr == NULL);
+ crc = crc32c(crc, kaddr + page_offset, length);
+ kunmap(page);
- BUG_ON(sent != len);
- con->out_msg_pos.page_pos = 0;
- con->out_msg_pos.page++;
- con->out_msg_pos.did_page_crc = false;
- if (in_trail)
- list_move_tail(&page->lru,
- &msg->trail->head);
- else if (msg->pagelist)
- list_move_tail(&page->lru,
- &msg->pagelist->head);
-#ifdef CONFIG_BLOCK
- else if (msg->bio)
- iter_bio_next(&msg->bio_iter, &msg->bio_seg);
-#endif
+ return crc;
}
-
/*
* Write as much message data payload as we can. If we finish, queue
* up the footer.
@@ -1058,21 +1469,17 @@ static void out_msg_pos_next(struct ceph_connection *con, struct page *page,
* 0 -> socket full, but more to do
* <0 -> error
*/
-static int write_partial_msg_pages(struct ceph_connection *con)
+static int write_partial_message_data(struct ceph_connection *con)
{
struct ceph_msg *msg = con->out_msg;
- unsigned int data_len = le32_to_cpu(msg->hdr.data_len);
- size_t len;
+ struct ceph_msg_data_cursor *cursor = &msg->cursor;
bool do_datacrc = !con->msgr->nocrc;
- int ret;
- int total_max_write;
- bool in_trail = false;
- const size_t trail_len = (msg->trail ? msg->trail->length : 0);
- const size_t trail_off = data_len - trail_len;
+ u32 crc;
- dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n",
- con, msg, con->out_msg_pos.page, msg->nr_pages,
- con->out_msg_pos.page_pos);
+ dout("%s %p msg %p\n", __func__, con, msg);
+
+ if (list_empty(&msg->data))
+ return -EINVAL;
/*
* Iterate through each page that contains data to be
@@ -1082,72 +1489,41 @@ static int write_partial_msg_pages(struct ceph_connection *con)
* need to map the page. If we have no pages, they have
* been revoked, so use the zero page.
*/
- while (data_len > con->out_msg_pos.data_pos) {
- struct page *page = NULL;
- int max_write = PAGE_SIZE;
- int bio_offset = 0;
-
- in_trail = in_trail || con->out_msg_pos.data_pos >= trail_off;
- if (!in_trail)
- total_max_write = trail_off - con->out_msg_pos.data_pos;
-
- if (in_trail) {
- total_max_write = data_len - con->out_msg_pos.data_pos;
-
- page = list_first_entry(&msg->trail->head,
- struct page, lru);
- } else if (msg->pages) {
- page = msg->pages[con->out_msg_pos.page];
- } else if (msg->pagelist) {
- page = list_first_entry(&msg->pagelist->head,
- struct page, lru);
-#ifdef CONFIG_BLOCK
- } else if (msg->bio) {
- struct bio_vec *bv;
+ crc = do_datacrc ? le32_to_cpu(msg->footer.data_crc) : 0;
+ while (cursor->resid) {
+ struct page *page;
+ size_t page_offset;
+ size_t length;
+ bool last_piece;
+ bool need_crc;
+ int ret;
- bv = bio_iovec_idx(msg->bio_iter, msg->bio_seg);
- page = bv->bv_page;
- bio_offset = bv->bv_offset;
- max_write = bv->bv_len;
-#endif
- } else {
- page = zero_page;
- }
- len = min_t(int, max_write - con->out_msg_pos.page_pos,
- total_max_write);
-
- if (do_datacrc && !con->out_msg_pos.did_page_crc) {
- void *base;
- u32 crc = le32_to_cpu(msg->footer.data_crc);
- char *kaddr;
-
- kaddr = kmap(page);
- BUG_ON(kaddr == NULL);
- base = kaddr + con->out_msg_pos.page_pos + bio_offset;
- crc = crc32c(crc, base, len);
- kunmap(page);
- msg->footer.data_crc = cpu_to_le32(crc);
- con->out_msg_pos.did_page_crc = true;
- }
- ret = ceph_tcp_sendpage(con->sock, page,
- con->out_msg_pos.page_pos + bio_offset,
- len, 1);
- if (ret <= 0)
- goto out;
+ page = ceph_msg_data_next(&msg->cursor, &page_offset, &length,
+ &last_piece);
+ ret = ceph_tcp_sendpage(con->sock, page, page_offset,
+ length, last_piece);
+ if (ret <= 0) {
+ if (do_datacrc)
+ msg->footer.data_crc = cpu_to_le32(crc);
- out_msg_pos_next(con, page, len, (size_t) ret, in_trail);
+ return ret;
+ }
+ if (do_datacrc && cursor->need_crc)
+ crc = ceph_crc32c_page(crc, page, page_offset, length);
+ need_crc = ceph_msg_data_advance(&msg->cursor, (size_t)ret);
}
- dout("write_partial_msg_pages %p msg %p done\n", con, msg);
+ dout("%s %p msg %p done\n", __func__, con, msg);
/* prepare and queue up footer, too */
- if (!do_datacrc)
+ if (do_datacrc)
+ msg->footer.data_crc = cpu_to_le32(crc);
+ else
msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
con_out_kvec_reset(con);
prepare_write_message_footer(con);
- ret = 1;
-out:
- return ret;
+
+ return 1; /* must return > 0 to indicate success */
}
/*
@@ -1160,7 +1536,7 @@ static int write_partial_skip(struct ceph_connection *con)
while (con->out_skip > 0) {
size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE);
- ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, 1);
+ ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, true);
if (ret <= 0)
goto out;
con->out_skip -= ret;
@@ -1191,6 +1567,13 @@ static void prepare_read_ack(struct ceph_connection *con)
con->in_base_pos = 0;
}
+static void prepare_read_seq(struct ceph_connection *con)
+{
+ dout("prepare_read_seq %p\n", con);
+ con->in_base_pos = 0;
+ con->in_tag = CEPH_MSGR_TAG_SEQ;
+}
+
static void prepare_read_tag(struct ceph_connection *con)
{
dout("prepare_read_tag %p\n", con);
@@ -1597,7 +1980,6 @@ static int process_connect(struct ceph_connection *con)
con->error_msg = "connect authorization failure";
return -1;
}
- con->auth_retry = 1;
con_out_kvec_reset(con);
ret = prepare_write_connect(con);
if (ret < 0)
@@ -1668,6 +2050,7 @@ static int process_connect(struct ceph_connection *con)
prepare_read_connect(con);
break;
+ case CEPH_MSGR_TAG_SEQ:
case CEPH_MSGR_TAG_READY:
if (req_feat & ~server_feat) {
pr_err("%s%lld %s protocol feature mismatch,"
@@ -1682,7 +2065,7 @@ static int process_connect(struct ceph_connection *con)
WARN_ON(con->state != CON_STATE_NEGOTIATING);
con->state = CON_STATE_OPEN;
-
+ con->auth_retry = 0; /* we authenticated; clear flag */
con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
con->connect_seq++;
con->peer_features = server_feat;
@@ -1698,7 +2081,12 @@ static int process_connect(struct ceph_connection *con)
con->delay = 0; /* reset backoff memory */
- prepare_read_tag(con);
+ if (con->in_reply.tag == CEPH_MSGR_TAG_SEQ) {
+ prepare_write_seq(con);
+ prepare_read_seq(con);
+ } else {
+ prepare_read_tag(con);
+ }
break;
case CEPH_MSGR_TAG_WAIT:
@@ -1732,7 +2120,6 @@ static int read_partial_ack(struct ceph_connection *con)
return read_partial(con, end, size, &con->in_temp_ack);
}
-
/*
* We can finally discard anything that's been acked.
*/
@@ -1757,8 +2144,6 @@ static void process_ack(struct ceph_connection *con)
}
-
-
static int read_partial_message_section(struct ceph_connection *con,
struct kvec *section,
unsigned int sec_len, u32 *crc)
@@ -1782,77 +2167,49 @@ static int read_partial_message_section(struct ceph_connection *con,
return 1;
}
-static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip);
-
-static int read_partial_message_pages(struct ceph_connection *con,
- struct page **pages,
- unsigned int data_len, bool do_datacrc)
+static int read_partial_msg_data(struct ceph_connection *con)
{
- void *p;
+ struct ceph_msg *msg = con->in_msg;
+ struct ceph_msg_data_cursor *cursor = &msg->cursor;
+ const bool do_datacrc = !con->msgr->nocrc;
+ struct page *page;
+ size_t page_offset;
+ size_t length;
+ u32 crc = 0;
int ret;
- int left;
- left = min((int)(data_len - con->in_msg_pos.data_pos),
- (int)(PAGE_SIZE - con->in_msg_pos.page_pos));
- /* (page) data */
- BUG_ON(pages == NULL);
- p = kmap(pages[con->in_msg_pos.page]);
- ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos,
- left);
- if (ret > 0 && do_datacrc)
- con->in_data_crc =
- crc32c(con->in_data_crc,
- p + con->in_msg_pos.page_pos, ret);
- kunmap(pages[con->in_msg_pos.page]);
- if (ret <= 0)
- return ret;
- con->in_msg_pos.data_pos += ret;
- con->in_msg_pos.page_pos += ret;
- if (con->in_msg_pos.page_pos == PAGE_SIZE) {
- con->in_msg_pos.page_pos = 0;
- con->in_msg_pos.page++;
- }
-
- return ret;
-}
-
-#ifdef CONFIG_BLOCK
-static int read_partial_message_bio(struct ceph_connection *con,
- struct bio **bio_iter, int *bio_seg,
- unsigned int data_len, bool do_datacrc)
-{
- struct bio_vec *bv = bio_iovec_idx(*bio_iter, *bio_seg);
- void *p;
- int ret, left;
+ BUG_ON(!msg);
+ if (list_empty(&msg->data))
+ return -EIO;
- left = min((int)(data_len - con->in_msg_pos.data_pos),
- (int)(bv->bv_len - con->in_msg_pos.page_pos));
+ if (do_datacrc)
+ crc = con->in_data_crc;
+ while (cursor->resid) {
+ page = ceph_msg_data_next(&msg->cursor, &page_offset, &length,
+ NULL);
+ ret = ceph_tcp_recvpage(con->sock, page, page_offset, length);
+ if (ret <= 0) {
+ if (do_datacrc)
+ con->in_data_crc = crc;
- p = kmap(bv->bv_page) + bv->bv_offset;
+ return ret;
+ }
- ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos,
- left);
- if (ret > 0 && do_datacrc)
- con->in_data_crc =
- crc32c(con->in_data_crc,
- p + con->in_msg_pos.page_pos, ret);
- kunmap(bv->bv_page);
- if (ret <= 0)
- return ret;
- con->in_msg_pos.data_pos += ret;
- con->in_msg_pos.page_pos += ret;
- if (con->in_msg_pos.page_pos == bv->bv_len) {
- con->in_msg_pos.page_pos = 0;
- iter_bio_next(bio_iter, bio_seg);
+ if (do_datacrc)
+ crc = ceph_crc32c_page(crc, page, page_offset, ret);
+ (void) ceph_msg_data_advance(&msg->cursor, (size_t)ret);
}
+ if (do_datacrc)
+ con->in_data_crc = crc;
- return ret;
+ return 1; /* must return > 0 to indicate success */
}
-#endif
/*
* read (part of) a message.
*/
+static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip);
+
static int read_partial_message(struct ceph_connection *con)
{
struct ceph_msg *m = con->in_msg;
@@ -1885,7 +2242,7 @@ static int read_partial_message(struct ceph_connection *con)
if (front_len > CEPH_MSG_MAX_FRONT_LEN)
return -EIO;
middle_len = le32_to_cpu(con->in_hdr.middle_len);
- if (middle_len > CEPH_MSG_MAX_DATA_LEN)
+ if (middle_len > CEPH_MSG_MAX_MIDDLE_LEN)
return -EIO;
data_len = le32_to_cpu(con->in_hdr.data_len);
if (data_len > CEPH_MSG_MAX_DATA_LEN)
@@ -1914,14 +2271,22 @@ static int read_partial_message(struct ceph_connection *con)
int skip = 0;
dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
- con->in_hdr.front_len, con->in_hdr.data_len);
+ front_len, data_len);
ret = ceph_con_in_msg_alloc(con, &skip);
if (ret < 0)
return ret;
+
+ BUG_ON(!con->in_msg ^ skip);
+ if (con->in_msg && data_len > con->in_msg->data_length) {
+ pr_warning("%s skipping long message (%u > %zd)\n",
+ __func__, data_len, con->in_msg->data_length);
+ ceph_msg_put(con->in_msg);
+ con->in_msg = NULL;
+ skip = 1;
+ }
if (skip) {
/* skip this message */
dout("alloc_msg said skip message\n");
- BUG_ON(con->in_msg);
con->in_base_pos = -front_len - middle_len - data_len -
sizeof(m->footer);
con->in_tag = CEPH_MSGR_TAG_READY;
@@ -1936,17 +2301,10 @@ static int read_partial_message(struct ceph_connection *con)
if (m->middle)
m->middle->vec.iov_len = 0;
- con->in_msg_pos.page = 0;
- if (m->pages)
- con->in_msg_pos.page_pos = m->page_alignment;
- else
- con->in_msg_pos.page_pos = 0;
- con->in_msg_pos.data_pos = 0;
+ /* prepare for data payload, if any */
-#ifdef CONFIG_BLOCK
- if (m->bio)
- init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg);
-#endif
+ if (data_len)
+ prepare_message_data(con->in_msg, data_len);
}
/* front */
@@ -1965,24 +2323,10 @@ static int read_partial_message(struct ceph_connection *con)
}
/* (page) data */
- while (con->in_msg_pos.data_pos < data_len) {
- if (m->pages) {
- ret = read_partial_message_pages(con, m->pages,
- data_len, do_datacrc);
- if (ret <= 0)
- return ret;
-#ifdef CONFIG_BLOCK
- } else if (m->bio) {
- BUG_ON(!m->bio_iter);
- ret = read_partial_message_bio(con,
- &m->bio_iter, &m->bio_seg,
- data_len, do_datacrc);
- if (ret <= 0)
- return ret;
-#endif
- } else {
- BUG_ON(1);
- }
+ if (data_len) {
+ ret = read_partial_msg_data(con);
+ if (ret <= 0)
+ return ret;
}
/* footer */
@@ -2108,13 +2452,13 @@ more_kvec:
goto do_next;
}
- ret = write_partial_msg_pages(con);
+ ret = write_partial_message_data(con);
if (ret == 1)
goto more_kvec; /* we need to send the footer, too! */
if (ret == 0)
goto out;
if (ret < 0) {
- dout("try_write write_partial_msg_pages err %d\n",
+ dout("try_write write_partial_message_data err %d\n",
ret);
goto out;
}
@@ -2266,7 +2610,12 @@ more:
prepare_read_tag(con);
goto more;
}
- if (con->in_tag == CEPH_MSGR_TAG_ACK) {
+ if (con->in_tag == CEPH_MSGR_TAG_ACK ||
+ con->in_tag == CEPH_MSGR_TAG_SEQ) {
+ /*
+ * the final handshake seq exchange is semantically
+ * equivalent to an ACK
+ */
ret = read_partial_ack(con);
if (ret <= 0)
goto out;
@@ -2672,6 +3021,88 @@ void ceph_con_keepalive(struct ceph_connection *con)
}
EXPORT_SYMBOL(ceph_con_keepalive);
+static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type)
+{
+ struct ceph_msg_data *data;
+
+ if (WARN_ON(!ceph_msg_data_type_valid(type)))
+ return NULL;
+
+ data = kmem_cache_zalloc(ceph_msg_data_cache, GFP_NOFS);
+ if (data)
+ data->type = type;
+ INIT_LIST_HEAD(&data->links);
+
+ return data;
+}
+
+static void ceph_msg_data_destroy(struct ceph_msg_data *data)
+{
+ if (!data)
+ return;
+
+ WARN_ON(!list_empty(&data->links));
+ if (data->type == CEPH_MSG_DATA_PAGELIST) {
+ ceph_pagelist_release(data->pagelist);
+ kfree(data->pagelist);
+ }
+ kmem_cache_free(ceph_msg_data_cache, data);
+}
+
+void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
+ size_t length, size_t alignment)
+{
+ struct ceph_msg_data *data;
+
+ BUG_ON(!pages);
+ BUG_ON(!length);
+
+ data = ceph_msg_data_create(CEPH_MSG_DATA_PAGES);
+ BUG_ON(!data);
+ data->pages = pages;
+ data->length = length;
+ data->alignment = alignment & ~PAGE_MASK;
+
+ list_add_tail(&data->links, &msg->data);
+ msg->data_length += length;
+}
+EXPORT_SYMBOL(ceph_msg_data_add_pages);
+
+void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
+ struct ceph_pagelist *pagelist)
+{
+ struct ceph_msg_data *data;
+
+ BUG_ON(!pagelist);
+ BUG_ON(!pagelist->length);
+
+ data = ceph_msg_data_create(CEPH_MSG_DATA_PAGELIST);
+ BUG_ON(!data);
+ data->pagelist = pagelist;
+
+ list_add_tail(&data->links, &msg->data);
+ msg->data_length += pagelist->length;
+}
+EXPORT_SYMBOL(ceph_msg_data_add_pagelist);
+
+#ifdef CONFIG_BLOCK
+void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
+ size_t length)
+{
+ struct ceph_msg_data *data;
+
+ BUG_ON(!bio);
+
+ data = ceph_msg_data_create(CEPH_MSG_DATA_BIO);
+ BUG_ON(!data);
+ data->bio = bio;
+ data->bio_length = length;
+
+ list_add_tail(&data->links, &msg->data);
+ msg->data_length += length;
+}
+EXPORT_SYMBOL(ceph_msg_data_add_bio);
+#endif /* CONFIG_BLOCK */
/*
* construct a new message with given type, size
@@ -2682,49 +3113,20 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
{
struct ceph_msg *m;
- m = kmalloc(sizeof(*m), flags);
+ m = kmem_cache_zalloc(ceph_msg_cache, flags);
if (m == NULL)
goto out;
- kref_init(&m->kref);
- m->con = NULL;
- INIT_LIST_HEAD(&m->list_head);
-
- m->hdr.tid = 0;
m->hdr.type = cpu_to_le16(type);
m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT);
- m->hdr.version = 0;
m->hdr.front_len = cpu_to_le32(front_len);
- m->hdr.middle_len = 0;
- m->hdr.data_len = 0;
- m->hdr.data_off = 0;
- m->hdr.reserved = 0;
- m->footer.front_crc = 0;
- m->footer.middle_crc = 0;
- m->footer.data_crc = 0;
- m->footer.flags = 0;
- m->front_max = front_len;
- m->front_is_vmalloc = false;
- m->more_to_follow = false;
- m->ack_stamp = 0;
- m->pool = NULL;
-
- /* middle */
- m->middle = NULL;
- /* data */
- m->nr_pages = 0;
- m->page_alignment = 0;
- m->pages = NULL;
- m->pagelist = NULL;
-#ifdef CONFIG_BLOCK
- m->bio = NULL;
- m->bio_iter = NULL;
- m->bio_seg = 0;
-#endif /* CONFIG_BLOCK */
- m->trail = NULL;
+ INIT_LIST_HEAD(&m->list_head);
+ kref_init(&m->kref);
+ INIT_LIST_HEAD(&m->data);
/* front */
+ m->front_max = front_len;
if (front_len) {
if (front_len > PAGE_CACHE_SIZE) {
m->front.iov_base = __vmalloc(front_len, flags,
@@ -2802,49 +3204,37 @@ static int ceph_alloc_middle(struct ceph_connection *con, struct ceph_msg *msg)
static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
{
struct ceph_msg_header *hdr = &con->in_hdr;
- int type = le16_to_cpu(hdr->type);
- int front_len = le32_to_cpu(hdr->front_len);
int middle_len = le32_to_cpu(hdr->middle_len);
+ struct ceph_msg *msg;
int ret = 0;
BUG_ON(con->in_msg != NULL);
+ BUG_ON(!con->ops->alloc_msg);
- if (con->ops->alloc_msg) {
- struct ceph_msg *msg;
-
- mutex_unlock(&con->mutex);
- msg = con->ops->alloc_msg(con, hdr, skip);
- mutex_lock(&con->mutex);
- if (con->state != CON_STATE_OPEN) {
- if (msg)
- ceph_msg_put(msg);
- return -EAGAIN;
- }
- con->in_msg = msg;
- if (con->in_msg) {
- con->in_msg->con = con->ops->get(con);
- BUG_ON(con->in_msg->con == NULL);
- }
- if (*skip) {
- con->in_msg = NULL;
- return 0;
- }
- if (!con->in_msg) {
- con->error_msg =
- "error allocating memory for incoming message";
- return -ENOMEM;
- }
+ mutex_unlock(&con->mutex);
+ msg = con->ops->alloc_msg(con, hdr, skip);
+ mutex_lock(&con->mutex);
+ if (con->state != CON_STATE_OPEN) {
+ if (msg)
+ ceph_msg_put(msg);
+ return -EAGAIN;
}
- if (!con->in_msg) {
- con->in_msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
- if (!con->in_msg) {
- pr_err("unable to allocate msg type %d len %d\n",
- type, front_len);
- return -ENOMEM;
- }
+ if (msg) {
+ BUG_ON(*skip);
+ con->in_msg = msg;
con->in_msg->con = con->ops->get(con);
BUG_ON(con->in_msg->con == NULL);
- con->in_msg->page_alignment = le16_to_cpu(hdr->data_off);
+ } else {
+ /*
+ * Null message pointer means either we should skip
+ * this message or we couldn't allocate memory. The
+ * former is not an error.
+ */
+ if (*skip)
+ return 0;
+ con->error_msg = "error allocating memory for incoming message";
+
+ return -ENOMEM;
}
memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
@@ -2870,7 +3260,7 @@ void ceph_msg_kfree(struct ceph_msg *m)
vfree(m->front.iov_base);
else
kfree(m->front.iov_base);
- kfree(m);
+ kmem_cache_free(ceph_msg_cache, m);
}
/*
@@ -2879,6 +3269,9 @@ void ceph_msg_kfree(struct ceph_msg *m)
void ceph_msg_last_put(struct kref *kref)
{
struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
+ LIST_HEAD(data);
+ struct list_head *links;
+ struct list_head *next;
dout("ceph_msg_put last one on %p\n", m);
WARN_ON(!list_empty(&m->list_head));
@@ -2888,16 +3281,16 @@ void ceph_msg_last_put(struct kref *kref)
ceph_buffer_put(m->middle);
m->middle = NULL;
}
- m->nr_pages = 0;
- m->pages = NULL;
- if (m->pagelist) {
- ceph_pagelist_release(m->pagelist);
- kfree(m->pagelist);
- m->pagelist = NULL;
- }
+ list_splice_init(&m->data, &data);
+ list_for_each_safe(links, next, &data) {
+ struct ceph_msg_data *data;
- m->trail = NULL;
+ data = list_entry(links, struct ceph_msg_data, links);
+ list_del_init(links);
+ ceph_msg_data_destroy(data);
+ }
+ m->data_length = 0;
if (m->pool)
ceph_msgpool_put(m->pool, m);
@@ -2908,8 +3301,8 @@ EXPORT_SYMBOL(ceph_msg_last_put);
void ceph_msg_dump(struct ceph_msg *msg)
{
- pr_debug("msg_dump %p (front_max %d nr_pages %d)\n", msg,
- msg->front_max, msg->nr_pages);
+ pr_debug("msg_dump %p (front_max %d length %zd)\n", msg,
+ msg->front_max, msg->data_length);
print_hex_dump(KERN_DEBUG, "header: ",
DUMP_PREFIX_OFFSET, 16, 1,
&msg->hdr, sizeof(msg->hdr), true);
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index aef5b1062bee..1fe25cd29d0e 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -737,7 +737,7 @@ static void delayed_work(struct work_struct *work)
__validate_auth(monc);
- if (monc->auth->ops->is_authenticated(monc->auth))
+ if (ceph_auth_is_authenticated(monc->auth))
__send_subscribe(monc);
}
__schedule_delayed(monc);
@@ -892,8 +892,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
mutex_lock(&monc->mutex);
had_debugfs_info = have_debugfs_info(monc);
- if (monc->auth->ops)
- was_auth = monc->auth->ops->is_authenticated(monc->auth);
+ was_auth = ceph_auth_is_authenticated(monc->auth);
monc->pending_auth = 0;
ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base,
msg->front.iov_len,
@@ -904,7 +903,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
wake_up_all(&monc->client->auth_wq);
} else if (ret > 0) {
__send_prepared_auth_request(monc, ret);
- } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) {
+ } else if (!was_auth && ceph_auth_is_authenticated(monc->auth)) {
dout("authenticated, starting session\n");
monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index d730dd4d8eb2..dd47889adc4a 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1,3 +1,4 @@
+
#include <linux/ceph/ceph_debug.h>
#include <linux/module.h>
@@ -21,6 +22,8 @@
#define OSD_OP_FRONT_LEN 4096
#define OSD_OPREPLY_FRONT_LEN 512
+static struct kmem_cache *ceph_osd_request_cache;
+
static const struct ceph_connection_operations osd_con_ops;
static void __send_queued(struct ceph_osd_client *osdc);
@@ -32,12 +35,6 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc,
static void __send_request(struct ceph_osd_client *osdc,
struct ceph_osd_request *req);
-static int op_has_extent(int op)
-{
- return (op == CEPH_OSD_OP_READ ||
- op == CEPH_OSD_OP_WRITE);
-}
-
/*
* Implement client access to distributed object storage cluster.
*
@@ -63,53 +60,238 @@ static int op_has_extent(int op)
*
* fill osd op in request message.
*/
-static int calc_layout(struct ceph_vino vino,
- struct ceph_file_layout *layout,
- u64 off, u64 *plen,
- struct ceph_osd_request *req,
- struct ceph_osd_req_op *op)
+static int calc_layout(struct ceph_file_layout *layout, u64 off, u64 *plen,
+ u64 *objnum, u64 *objoff, u64 *objlen)
{
u64 orig_len = *plen;
- u64 bno = 0;
- u64 objoff = 0;
- u64 objlen = 0;
int r;
/* object extent? */
- r = ceph_calc_file_object_mapping(layout, off, orig_len, &bno,
- &objoff, &objlen);
+ r = ceph_calc_file_object_mapping(layout, off, orig_len, objnum,
+ objoff, objlen);
if (r < 0)
return r;
- if (objlen < orig_len) {
- *plen = objlen;
+ if (*objlen < orig_len) {
+ *plen = *objlen;
dout(" skipping last %llu, final file extent %llu~%llu\n",
orig_len - *plen, off, *plen);
}
- if (op_has_extent(op->op)) {
- u32 osize = le32_to_cpu(layout->fl_object_size);
- op->extent.offset = objoff;
- op->extent.length = objlen;
- if (op->extent.truncate_size <= off - objoff) {
- op->extent.truncate_size = 0;
- } else {
- op->extent.truncate_size -= off - objoff;
- if (op->extent.truncate_size > osize)
- op->extent.truncate_size = osize;
- }
+ dout("calc_layout objnum=%llx %llu~%llu\n", *objnum, *objoff, *objlen);
+
+ return 0;
+}
+
+static void ceph_osd_data_init(struct ceph_osd_data *osd_data)
+{
+ memset(osd_data, 0, sizeof (*osd_data));
+ osd_data->type = CEPH_OSD_DATA_TYPE_NONE;
+}
+
+static void ceph_osd_data_pages_init(struct ceph_osd_data *osd_data,
+ struct page **pages, u64 length, u32 alignment,
+ bool pages_from_pool, bool own_pages)
+{
+ osd_data->type = CEPH_OSD_DATA_TYPE_PAGES;
+ osd_data->pages = pages;
+ osd_data->length = length;
+ osd_data->alignment = alignment;
+ osd_data->pages_from_pool = pages_from_pool;
+ osd_data->own_pages = own_pages;
+}
+
+static void ceph_osd_data_pagelist_init(struct ceph_osd_data *osd_data,
+ struct ceph_pagelist *pagelist)
+{
+ osd_data->type = CEPH_OSD_DATA_TYPE_PAGELIST;
+ osd_data->pagelist = pagelist;
+}
+
+#ifdef CONFIG_BLOCK
+static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data,
+ struct bio *bio, size_t bio_length)
+{
+ osd_data->type = CEPH_OSD_DATA_TYPE_BIO;
+ osd_data->bio = bio;
+ osd_data->bio_length = bio_length;
+}
+#endif /* CONFIG_BLOCK */
+
+#define osd_req_op_data(oreq, whch, typ, fld) \
+ ({ \
+ BUG_ON(whch >= (oreq)->r_num_ops); \
+ &(oreq)->r_ops[whch].typ.fld; \
+ })
+
+static struct ceph_osd_data *
+osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which)
+{
+ BUG_ON(which >= osd_req->r_num_ops);
+
+ return &osd_req->r_ops[which].raw_data_in;
+}
+
+struct ceph_osd_data *
+osd_req_op_extent_osd_data(struct ceph_osd_request *osd_req,
+ unsigned int which)
+{
+ return osd_req_op_data(osd_req, which, extent, osd_data);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_data);
+
+struct ceph_osd_data *
+osd_req_op_cls_response_data(struct ceph_osd_request *osd_req,
+ unsigned int which)
+{
+ return osd_req_op_data(osd_req, which, cls, response_data);
+}
+EXPORT_SYMBOL(osd_req_op_cls_response_data); /* ??? */
+
+void osd_req_op_raw_data_in_pages(struct ceph_osd_request *osd_req,
+ unsigned int which, struct page **pages,
+ u64 length, u32 alignment,
+ bool pages_from_pool, bool own_pages)
+{
+ struct ceph_osd_data *osd_data;
+
+ osd_data = osd_req_op_raw_data_in(osd_req, which);
+ ceph_osd_data_pages_init(osd_data, pages, length, alignment,
+ pages_from_pool, own_pages);
+}
+EXPORT_SYMBOL(osd_req_op_raw_data_in_pages);
+
+void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *osd_req,
+ unsigned int which, struct page **pages,
+ u64 length, u32 alignment,
+ bool pages_from_pool, bool own_pages)
+{
+ struct ceph_osd_data *osd_data;
+
+ osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+ ceph_osd_data_pages_init(osd_data, pages, length, alignment,
+ pages_from_pool, own_pages);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_data_pages);
+
+void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *osd_req,
+ unsigned int which, struct ceph_pagelist *pagelist)
+{
+ struct ceph_osd_data *osd_data;
+
+ osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+ ceph_osd_data_pagelist_init(osd_data, pagelist);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_data_pagelist);
+
+#ifdef CONFIG_BLOCK
+void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
+ unsigned int which, struct bio *bio, size_t bio_length)
+{
+ struct ceph_osd_data *osd_data;
+
+ osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+ ceph_osd_data_bio_init(osd_data, bio, bio_length);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_data_bio);
+#endif /* CONFIG_BLOCK */
+
+static void osd_req_op_cls_request_info_pagelist(
+ struct ceph_osd_request *osd_req,
+ unsigned int which, struct ceph_pagelist *pagelist)
+{
+ struct ceph_osd_data *osd_data;
+
+ osd_data = osd_req_op_data(osd_req, which, cls, request_info);
+ ceph_osd_data_pagelist_init(osd_data, pagelist);
+}
+
+void osd_req_op_cls_request_data_pagelist(
+ struct ceph_osd_request *osd_req,
+ unsigned int which, struct ceph_pagelist *pagelist)
+{
+ struct ceph_osd_data *osd_data;
+
+ osd_data = osd_req_op_data(osd_req, which, cls, request_data);
+ ceph_osd_data_pagelist_init(osd_data, pagelist);
+}
+EXPORT_SYMBOL(osd_req_op_cls_request_data_pagelist);
+
+void osd_req_op_cls_request_data_pages(struct ceph_osd_request *osd_req,
+ unsigned int which, struct page **pages, u64 length,
+ u32 alignment, bool pages_from_pool, bool own_pages)
+{
+ struct ceph_osd_data *osd_data;
+
+ osd_data = osd_req_op_data(osd_req, which, cls, request_data);
+ ceph_osd_data_pages_init(osd_data, pages, length, alignment,
+ pages_from_pool, own_pages);
+}
+EXPORT_SYMBOL(osd_req_op_cls_request_data_pages);
+
+void osd_req_op_cls_response_data_pages(struct ceph_osd_request *osd_req,
+ unsigned int which, struct page **pages, u64 length,
+ u32 alignment, bool pages_from_pool, bool own_pages)
+{
+ struct ceph_osd_data *osd_data;
+
+ osd_data = osd_req_op_data(osd_req, which, cls, response_data);
+ ceph_osd_data_pages_init(osd_data, pages, length, alignment,
+ pages_from_pool, own_pages);
+}
+EXPORT_SYMBOL(osd_req_op_cls_response_data_pages);
+
+static u64 ceph_osd_data_length(struct ceph_osd_data *osd_data)
+{
+ switch (osd_data->type) {
+ case CEPH_OSD_DATA_TYPE_NONE:
+ return 0;
+ case CEPH_OSD_DATA_TYPE_PAGES:
+ return osd_data->length;
+ case CEPH_OSD_DATA_TYPE_PAGELIST:
+ return (u64)osd_data->pagelist->length;
+#ifdef CONFIG_BLOCK
+ case CEPH_OSD_DATA_TYPE_BIO:
+ return (u64)osd_data->bio_length;
+#endif /* CONFIG_BLOCK */
+ default:
+ WARN(true, "unrecognized data type %d\n", (int)osd_data->type);
+ return 0;
}
- req->r_num_pages = calc_pages_for(off, *plen);
- req->r_page_alignment = off & ~PAGE_MASK;
- if (op->op == CEPH_OSD_OP_WRITE)
- op->payload_len = *plen;
+}
- dout("calc_layout bno=%llx %llu~%llu (%d pages)\n",
- bno, objoff, objlen, req->r_num_pages);
+static void ceph_osd_data_release(struct ceph_osd_data *osd_data)
+{
+ if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES && osd_data->own_pages) {
+ int num_pages;
- snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno);
- req->r_oid_len = strlen(req->r_oid);
+ num_pages = calc_pages_for((u64)osd_data->alignment,
+ (u64)osd_data->length);
+ ceph_release_page_vector(osd_data->pages, num_pages);
+ }
+ ceph_osd_data_init(osd_data);
+}
+
+static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
+ unsigned int which)
+{
+ struct ceph_osd_req_op *op;
+
+ BUG_ON(which >= osd_req->r_num_ops);
+ op = &osd_req->r_ops[which];
- return r;
+ switch (op->op) {
+ case CEPH_OSD_OP_READ:
+ case CEPH_OSD_OP_WRITE:
+ ceph_osd_data_release(&op->extent.osd_data);
+ break;
+ case CEPH_OSD_OP_CALL:
+ ceph_osd_data_release(&op->cls.request_info);
+ ceph_osd_data_release(&op->cls.request_data);
+ ceph_osd_data_release(&op->cls.response_data);
+ break;
+ default:
+ break;
+ }
}
/*
@@ -117,30 +299,26 @@ static int calc_layout(struct ceph_vino vino,
*/
void ceph_osdc_release_request(struct kref *kref)
{
- struct ceph_osd_request *req = container_of(kref,
- struct ceph_osd_request,
- r_kref);
+ struct ceph_osd_request *req;
+ unsigned int which;
+ req = container_of(kref, struct ceph_osd_request, r_kref);
if (req->r_request)
ceph_msg_put(req->r_request);
- if (req->r_con_filling_msg) {
- dout("%s revoking msg %p from con %p\n", __func__,
- req->r_reply, req->r_con_filling_msg);
+ if (req->r_reply) {
ceph_msg_revoke_incoming(req->r_reply);
- req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
- req->r_con_filling_msg = NULL;
- }
- if (req->r_reply)
ceph_msg_put(req->r_reply);
- if (req->r_own_pages)
- ceph_release_page_vector(req->r_pages,
- req->r_num_pages);
+ }
+
+ for (which = 0; which < req->r_num_ops; which++)
+ osd_req_op_data_release(req, which);
+
ceph_put_snap_context(req->r_snapc);
- ceph_pagelist_release(&req->r_trail);
if (req->r_mempool)
mempool_free(req, req->r_osdc->req_mempool);
else
- kfree(req);
+ kmem_cache_free(ceph_osd_request_cache, req);
+
}
EXPORT_SYMBOL(ceph_osdc_release_request);
@@ -154,6 +332,9 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
struct ceph_msg *msg;
size_t msg_size;
+ BUILD_BUG_ON(CEPH_OSD_MAX_OP > U16_MAX);
+ BUG_ON(num_ops > CEPH_OSD_MAX_OP);
+
msg_size = 4 + 4 + 8 + 8 + 4+8;
msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */
msg_size += 1 + 8 + 4 + 4; /* pg_t */
@@ -168,13 +349,14 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
req = mempool_alloc(osdc->req_mempool, gfp_flags);
memset(req, 0, sizeof(*req));
} else {
- req = kzalloc(sizeof(*req), gfp_flags);
+ req = kmem_cache_zalloc(ceph_osd_request_cache, gfp_flags);
}
if (req == NULL)
return NULL;
req->r_osdc = osdc;
req->r_mempool = use_mempool;
+ req->r_num_ops = num_ops;
kref_init(&req->r_kref);
init_completion(&req->r_completion);
@@ -198,8 +380,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
}
req->r_reply = msg;
- ceph_pagelist_init(&req->r_trail);
-
/* create request message; allow space for oid */
if (use_mempool)
msg = ceph_msgpool_get(&osdc->msgpool_op, 0);
@@ -218,60 +398,24 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
}
EXPORT_SYMBOL(ceph_osdc_alloc_request);
-static void osd_req_encode_op(struct ceph_osd_request *req,
- struct ceph_osd_op *dst,
- struct ceph_osd_req_op *src)
+static bool osd_req_opcode_valid(u16 opcode)
{
- dst->op = cpu_to_le16(src->op);
-
- switch (src->op) {
- case CEPH_OSD_OP_STAT:
- break;
+ switch (opcode) {
case CEPH_OSD_OP_READ:
- case CEPH_OSD_OP_WRITE:
- dst->extent.offset =
- cpu_to_le64(src->extent.offset);
- dst->extent.length =
- cpu_to_le64(src->extent.length);
- dst->extent.truncate_size =
- cpu_to_le64(src->extent.truncate_size);
- dst->extent.truncate_seq =
- cpu_to_le32(src->extent.truncate_seq);
- break;
- case CEPH_OSD_OP_CALL:
- dst->cls.class_len = src->cls.class_len;
- dst->cls.method_len = src->cls.method_len;
- dst->cls.indata_len = cpu_to_le32(src->cls.indata_len);
-
- ceph_pagelist_append(&req->r_trail, src->cls.class_name,
- src->cls.class_len);
- ceph_pagelist_append(&req->r_trail, src->cls.method_name,
- src->cls.method_len);
- ceph_pagelist_append(&req->r_trail, src->cls.indata,
- src->cls.indata_len);
- break;
- case CEPH_OSD_OP_STARTSYNC:
- break;
- case CEPH_OSD_OP_NOTIFY_ACK:
- case CEPH_OSD_OP_WATCH:
- dst->watch.cookie = cpu_to_le64(src->watch.cookie);
- dst->watch.ver = cpu_to_le64(src->watch.ver);
- dst->watch.flag = src->watch.flag;
- break;
- default:
- pr_err("unrecognized osd opcode %d\n", dst->op);
- WARN_ON(1);
- break;
+ case CEPH_OSD_OP_STAT:
case CEPH_OSD_OP_MAPEXT:
case CEPH_OSD_OP_MASKTRUNC:
case CEPH_OSD_OP_SPARSE_READ:
case CEPH_OSD_OP_NOTIFY:
+ case CEPH_OSD_OP_NOTIFY_ACK:
case CEPH_OSD_OP_ASSERT_VER:
+ case CEPH_OSD_OP_WRITE:
case CEPH_OSD_OP_WRITEFULL:
case CEPH_OSD_OP_TRUNCATE:
case CEPH_OSD_OP_ZERO:
case CEPH_OSD_OP_DELETE:
case CEPH_OSD_OP_APPEND:
+ case CEPH_OSD_OP_STARTSYNC:
case CEPH_OSD_OP_SETTRUNC:
case CEPH_OSD_OP_TRIMTRUNC:
case CEPH_OSD_OP_TMAPUP:
@@ -279,11 +423,11 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
case CEPH_OSD_OP_TMAPGET:
case CEPH_OSD_OP_CREATE:
case CEPH_OSD_OP_ROLLBACK:
+ case CEPH_OSD_OP_WATCH:
case CEPH_OSD_OP_OMAPGETKEYS:
case CEPH_OSD_OP_OMAPGETVALS:
case CEPH_OSD_OP_OMAPGETHEADER:
case CEPH_OSD_OP_OMAPGETVALSBYKEYS:
- case CEPH_OSD_OP_MODE_RD:
case CEPH_OSD_OP_OMAPSETVALS:
case CEPH_OSD_OP_OMAPSETHEADER:
case CEPH_OSD_OP_OMAPCLEAR:
@@ -314,113 +458,233 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
case CEPH_OSD_OP_RDUNLOCK:
case CEPH_OSD_OP_UPLOCK:
case CEPH_OSD_OP_DNLOCK:
+ case CEPH_OSD_OP_CALL:
case CEPH_OSD_OP_PGLS:
case CEPH_OSD_OP_PGLS_FILTER:
- pr_err("unsupported osd opcode %s\n",
- ceph_osd_op_name(dst->op));
- WARN_ON(1);
- break;
+ return true;
+ default:
+ return false;
}
- dst->payload_len = cpu_to_le32(src->payload_len);
}
/*
- * build new request AND message
- *
+ * This is an osd op init function for opcodes that have no data or
+ * other information associated with them. It also serves as a
+ * common init routine for all the other init functions, below.
*/
-void ceph_osdc_build_request(struct ceph_osd_request *req,
- u64 off, u64 len, unsigned int num_ops,
- struct ceph_osd_req_op *src_ops,
- struct ceph_snap_context *snapc, u64 snap_id,
- struct timespec *mtime)
+static struct ceph_osd_req_op *
+_osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which,
+ u16 opcode)
{
- struct ceph_msg *msg = req->r_request;
- struct ceph_osd_req_op *src_op;
- void *p;
- size_t msg_size;
- int flags = req->r_flags;
- u64 data_len;
- int i;
+ struct ceph_osd_req_op *op;
- req->r_num_ops = num_ops;
- req->r_snapid = snap_id;
- req->r_snapc = ceph_get_snap_context(snapc);
+ BUG_ON(which >= osd_req->r_num_ops);
+ BUG_ON(!osd_req_opcode_valid(opcode));
- /* encode request */
- msg->hdr.version = cpu_to_le16(4);
+ op = &osd_req->r_ops[which];
+ memset(op, 0, sizeof (*op));
+ op->op = opcode;
- p = msg->front.iov_base;
- ceph_encode_32(&p, 1); /* client_inc is always 1 */
- req->r_request_osdmap_epoch = p;
- p += 4;
- req->r_request_flags = p;
- p += 4;
- if (req->r_flags & CEPH_OSD_FLAG_WRITE)
- ceph_encode_timespec(p, mtime);
- p += sizeof(struct ceph_timespec);
- req->r_request_reassert_version = p;
- p += sizeof(struct ceph_eversion); /* will get filled in */
+ return op;
+}
- /* oloc */
- ceph_encode_8(&p, 4);
- ceph_encode_8(&p, 4);
- ceph_encode_32(&p, 8 + 4 + 4);
- req->r_request_pool = p;
- p += 8;
- ceph_encode_32(&p, -1); /* preferred */
- ceph_encode_32(&p, 0); /* key len */
+void osd_req_op_init(struct ceph_osd_request *osd_req,
+ unsigned int which, u16 opcode)
+{
+ (void)_osd_req_op_init(osd_req, which, opcode);
+}
+EXPORT_SYMBOL(osd_req_op_init);
- ceph_encode_8(&p, 1);
- req->r_request_pgid = p;
- p += 8 + 4;
- ceph_encode_32(&p, -1); /* preferred */
+void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
+ unsigned int which, u16 opcode,
+ u64 offset, u64 length,
+ u64 truncate_size, u32 truncate_seq)
+{
+ struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+ size_t payload_len = 0;
- /* oid */
- ceph_encode_32(&p, req->r_oid_len);
- memcpy(p, req->r_oid, req->r_oid_len);
- dout("oid '%.*s' len %d\n", req->r_oid_len, req->r_oid, req->r_oid_len);
- p += req->r_oid_len;
+ BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE);
- /* ops */
- ceph_encode_16(&p, num_ops);
- src_op = src_ops;
- req->r_request_ops = p;
- for (i = 0; i < num_ops; i++, src_op++) {
- osd_req_encode_op(req, p, src_op);
- p += sizeof(struct ceph_osd_op);
- }
+ op->extent.offset = offset;
+ op->extent.length = length;
+ op->extent.truncate_size = truncate_size;
+ op->extent.truncate_seq = truncate_seq;
+ if (opcode == CEPH_OSD_OP_WRITE)
+ payload_len += length;
- /* snaps */
- ceph_encode_64(&p, req->r_snapid);
- ceph_encode_64(&p, req->r_snapc ? req->r_snapc->seq : 0);
- ceph_encode_32(&p, req->r_snapc ? req->r_snapc->num_snaps : 0);
- if (req->r_snapc) {
- for (i = 0; i < snapc->num_snaps; i++) {
- ceph_encode_64(&p, req->r_snapc->snaps[i]);
- }
+ op->payload_len = payload_len;
+}
+EXPORT_SYMBOL(osd_req_op_extent_init);
+
+void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
+ unsigned int which, u64 length)
+{
+ struct ceph_osd_req_op *op;
+ u64 previous;
+
+ BUG_ON(which >= osd_req->r_num_ops);
+ op = &osd_req->r_ops[which];
+ previous = op->extent.length;
+
+ if (length == previous)
+ return; /* Nothing to do */
+ BUG_ON(length > previous);
+
+ op->extent.length = length;
+ op->payload_len -= previous - length;
+}
+EXPORT_SYMBOL(osd_req_op_extent_update);
+
+void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
+ u16 opcode, const char *class, const char *method)
+{
+ struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+ struct ceph_pagelist *pagelist;
+ size_t payload_len = 0;
+ size_t size;
+
+ BUG_ON(opcode != CEPH_OSD_OP_CALL);
+
+ pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS);
+ BUG_ON(!pagelist);
+ ceph_pagelist_init(pagelist);
+
+ op->cls.class_name = class;
+ size = strlen(class);
+ BUG_ON(size > (size_t) U8_MAX);
+ op->cls.class_len = size;
+ ceph_pagelist_append(pagelist, class, size);
+ payload_len += size;
+
+ op->cls.method_name = method;
+ size = strlen(method);
+ BUG_ON(size > (size_t) U8_MAX);
+ op->cls.method_len = size;
+ ceph_pagelist_append(pagelist, method, size);
+ payload_len += size;
+
+ osd_req_op_cls_request_info_pagelist(osd_req, which, pagelist);
+
+ op->cls.argc = 0; /* currently unused */
+
+ op->payload_len = payload_len;
+}
+EXPORT_SYMBOL(osd_req_op_cls_init);
+
+void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
+ unsigned int which, u16 opcode,
+ u64 cookie, u64 version, int flag)
+{
+ struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+
+ BUG_ON(opcode != CEPH_OSD_OP_NOTIFY_ACK && opcode != CEPH_OSD_OP_WATCH);
+
+ op->watch.cookie = cookie;
+ op->watch.ver = version;
+ if (opcode == CEPH_OSD_OP_WATCH && flag)
+ op->watch.flag = (u8)1;
+}
+EXPORT_SYMBOL(osd_req_op_watch_init);
+
+static void ceph_osdc_msg_data_add(struct ceph_msg *msg,
+ struct ceph_osd_data *osd_data)
+{
+ u64 length = ceph_osd_data_length(osd_data);
+
+ if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) {
+ BUG_ON(length > (u64) SIZE_MAX);
+ if (length)
+ ceph_msg_data_add_pages(msg, osd_data->pages,
+ length, osd_data->alignment);
+ } else if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGELIST) {
+ BUG_ON(!length);
+ ceph_msg_data_add_pagelist(msg, osd_data->pagelist);
+#ifdef CONFIG_BLOCK
+ } else if (osd_data->type == CEPH_OSD_DATA_TYPE_BIO) {
+ ceph_msg_data_add_bio(msg, osd_data->bio, length);
+#endif
+ } else {
+ BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE);
}
+}
- req->r_request_attempts = p;
- p += 4;
+static u64 osd_req_encode_op(struct ceph_osd_request *req,
+ struct ceph_osd_op *dst, unsigned int which)
+{
+ struct ceph_osd_req_op *src;
+ struct ceph_osd_data *osd_data;
+ u64 request_data_len = 0;
+ u64 data_length;
- data_len = req->r_trail.length;
- if (flags & CEPH_OSD_FLAG_WRITE) {
- req->r_request->hdr.data_off = cpu_to_le16(off);
- data_len += len;
+ BUG_ON(which >= req->r_num_ops);
+ src = &req->r_ops[which];
+ if (WARN_ON(!osd_req_opcode_valid(src->op))) {
+ pr_err("unrecognized osd opcode %d\n", src->op);
+
+ return 0;
}
- req->r_request->hdr.data_len = cpu_to_le32(data_len);
- req->r_request->page_alignment = req->r_page_alignment;
- BUG_ON(p > msg->front.iov_base + msg->front.iov_len);
- msg_size = p - msg->front.iov_base;
- msg->front.iov_len = msg_size;
- msg->hdr.front_len = cpu_to_le32(msg_size);
+ switch (src->op) {
+ case CEPH_OSD_OP_STAT:
+ osd_data = &src->raw_data_in;
+ ceph_osdc_msg_data_add(req->r_reply, osd_data);
+ break;
+ case CEPH_OSD_OP_READ:
+ case CEPH_OSD_OP_WRITE:
+ if (src->op == CEPH_OSD_OP_WRITE)
+ request_data_len = src->extent.length;
+ dst->extent.offset = cpu_to_le64(src->extent.offset);
+ dst->extent.length = cpu_to_le64(src->extent.length);
+ dst->extent.truncate_size =
+ cpu_to_le64(src->extent.truncate_size);
+ dst->extent.truncate_seq =
+ cpu_to_le32(src->extent.truncate_seq);
+ osd_data = &src->extent.osd_data;
+ if (src->op == CEPH_OSD_OP_WRITE)
+ ceph_osdc_msg_data_add(req->r_request, osd_data);
+ else
+ ceph_osdc_msg_data_add(req->r_reply, osd_data);
+ break;
+ case CEPH_OSD_OP_CALL:
+ dst->cls.class_len = src->cls.class_len;
+ dst->cls.method_len = src->cls.method_len;
+ osd_data = &src->cls.request_info;
+ ceph_osdc_msg_data_add(req->r_request, osd_data);
+ BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGELIST);
+ request_data_len = osd_data->pagelist->length;
+
+ osd_data = &src->cls.request_data;
+ data_length = ceph_osd_data_length(osd_data);
+ if (data_length) {
+ BUG_ON(osd_data->type == CEPH_OSD_DATA_TYPE_NONE);
+ dst->cls.indata_len = cpu_to_le32(data_length);
+ ceph_osdc_msg_data_add(req->r_request, osd_data);
+ src->payload_len += data_length;
+ request_data_len += data_length;
+ }
+ osd_data = &src->cls.response_data;
+ ceph_osdc_msg_data_add(req->r_reply, osd_data);
+ break;
+ case CEPH_OSD_OP_STARTSYNC:
+ break;
+ case CEPH_OSD_OP_NOTIFY_ACK:
+ case CEPH_OSD_OP_WATCH:
+ dst->watch.cookie = cpu_to_le64(src->watch.cookie);
+ dst->watch.ver = cpu_to_le64(src->watch.ver);
+ dst->watch.flag = src->watch.flag;
+ break;
+ default:
+ pr_err("unsupported osd opcode %s\n",
+ ceph_osd_op_name(src->op));
+ WARN_ON(1);
- dout("build_request msg_size was %d num_ops %d\n", (int)msg_size,
- num_ops);
- return;
+ return 0;
+ }
+ dst->op = cpu_to_le16(src->op);
+ dst->payload_len = cpu_to_le32(src->payload_len);
+
+ return request_data_len;
}
-EXPORT_SYMBOL(ceph_osdc_build_request);
/*
* build new request AND message, calculate layout, and adjust file
@@ -436,51 +700,65 @@ EXPORT_SYMBOL(ceph_osdc_build_request);
struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
struct ceph_file_layout *layout,
struct ceph_vino vino,
- u64 off, u64 *plen,
+ u64 off, u64 *plen, int num_ops,
int opcode, int flags,
struct ceph_snap_context *snapc,
- int do_sync,
u32 truncate_seq,
u64 truncate_size,
- struct timespec *mtime,
- bool use_mempool,
- int page_align)
+ bool use_mempool)
{
- struct ceph_osd_req_op ops[2];
struct ceph_osd_request *req;
- unsigned int num_op = 1;
+ u64 objnum = 0;
+ u64 objoff = 0;
+ u64 objlen = 0;
+ u32 object_size;
+ u64 object_base;
int r;
- memset(&ops, 0, sizeof ops);
-
- ops[0].op = opcode;
- ops[0].extent.truncate_seq = truncate_seq;
- ops[0].extent.truncate_size = truncate_size;
+ BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE);
- if (do_sync) {
- ops[1].op = CEPH_OSD_OP_STARTSYNC;
- num_op++;
- }
-
- req = ceph_osdc_alloc_request(osdc, snapc, num_op, use_mempool,
+ req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool,
GFP_NOFS);
if (!req)
return ERR_PTR(-ENOMEM);
+
req->r_flags = flags;
/* calculate max write size */
- r = calc_layout(vino, layout, off, plen, req, ops);
- if (r < 0)
+ r = calc_layout(layout, off, plen, &objnum, &objoff, &objlen);
+ if (r < 0) {
+ ceph_osdc_put_request(req);
return ERR_PTR(r);
- req->r_file_layout = *layout; /* keep a copy */
+ }
+
+ object_size = le32_to_cpu(layout->fl_object_size);
+ object_base = off - objoff;
+ if (!(truncate_seq == 1 && truncate_size == -1ULL)) {
+ if (truncate_size <= object_base) {
+ truncate_size = 0;
+ } else {
+ truncate_size -= object_base;
+ if (truncate_size > object_size)
+ truncate_size = object_size;
+ }
+ }
+
+ osd_req_op_extent_init(req, 0, opcode, objoff, objlen,
+ truncate_size, truncate_seq);
+
+ /*
+ * A second op in the ops array means the caller wants to
+ * also issue a include a 'startsync' command so that the
+ * osd will flush data quickly.
+ */
+ if (num_ops > 1)
+ osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
- /* in case it differs from natural (file) alignment that
- calc_layout filled in for us */
- req->r_num_pages = calc_pages_for(page_align, *plen);
- req->r_page_alignment = page_align;
+ req->r_file_layout = *layout; /* keep a copy */
- ceph_osdc_build_request(req, off, *plen, num_op, ops,
- snapc, vino.snap, mtime);
+ snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx",
+ vino.ino, objnum);
+ req->r_oid_len = strlen(req->r_oid);
return req;
}
@@ -558,21 +836,46 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
struct ceph_osd *osd)
{
struct ceph_osd_request *req, *nreq;
+ LIST_HEAD(resend);
int err;
dout("__kick_osd_requests osd%d\n", osd->o_osd);
err = __reset_osd(osdc, osd);
if (err)
return;
-
+ /*
+ * Build up a list of requests to resend by traversing the
+ * osd's list of requests. Requests for a given object are
+ * sent in tid order, and that is also the order they're
+ * kept on this list. Therefore all requests that are in
+ * flight will be found first, followed by all requests that
+ * have not yet been sent. And to resend requests while
+ * preserving this order we will want to put any sent
+ * requests back on the front of the osd client's unsent
+ * list.
+ *
+ * So we build a separate ordered list of already-sent
+ * requests for the affected osd and splice it onto the
+ * front of the osd client's unsent list. Once we've seen a
+ * request that has not yet been sent we're done. Those
+ * requests are already sitting right where they belong.
+ */
list_for_each_entry(req, &osd->o_requests, r_osd_item) {
- list_move(&req->r_req_lru_item, &osdc->req_unsent);
- dout("requeued %p tid %llu osd%d\n", req, req->r_tid,
+ if (!req->r_sent)
+ break;
+ list_move_tail(&req->r_req_lru_item, &resend);
+ dout("requeueing %p tid %llu osd%d\n", req, req->r_tid,
osd->o_osd);
if (!req->r_linger)
req->r_flags |= CEPH_OSD_FLAG_RETRY;
}
+ list_splice(&resend, &osdc->req_unsent);
+ /*
+ * Linger requests are re-registered before sending, which
+ * sets up a new tid for each. We add them to the unsent
+ * list at the end to keep things in tid order.
+ */
list_for_each_entry_safe(req, nreq, &osd->o_linger_requests,
r_linger_osd) {
/*
@@ -581,8 +884,8 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
*/
BUG_ON(!list_empty(&req->r_req_lru_item));
__register_request(osdc, req);
- list_add(&req->r_req_lru_item, &osdc->req_unsent);
- list_add(&req->r_osd_item, &req->r_osd->o_requests);
+ list_add_tail(&req->r_req_lru_item, &osdc->req_unsent);
+ list_add_tail(&req->r_osd_item, &req->r_osd->o_requests);
__unregister_linger_request(osdc, req);
dout("requeued lingering %p tid %llu osd%d\n", req, req->r_tid,
osd->o_osd);
@@ -654,8 +957,7 @@ static void put_osd(struct ceph_osd *osd)
if (atomic_dec_and_test(&osd->o_ref) && osd->o_auth.authorizer) {
struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth;
- if (ac->ops && ac->ops->destroy_authorizer)
- ac->ops->destroy_authorizer(ac, osd->o_auth.authorizer);
+ ceph_auth_destroy_authorizer(ac, osd->o_auth.authorizer);
kfree(osd);
}
}
@@ -820,14 +1122,6 @@ static void __register_request(struct ceph_osd_client *osdc,
}
}
-static void register_request(struct ceph_osd_client *osdc,
- struct ceph_osd_request *req)
-{
- mutex_lock(&osdc->request_mutex);
- __register_request(osdc, req);
- mutex_unlock(&osdc->request_mutex);
-}
-
/*
* called under osdc->request_mutex
*/
@@ -882,6 +1176,7 @@ static void __register_linger_request(struct ceph_osd_client *osdc,
struct ceph_osd_request *req)
{
dout("__register_linger_request %p\n", req);
+ ceph_osdc_get_request(req);
list_add_tail(&req->r_linger_item, &osdc->req_linger);
if (req->r_osd)
list_add_tail(&req->r_linger_osd,
@@ -904,6 +1199,7 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc,
if (list_empty(&req->r_osd_item))
req->r_osd = NULL;
}
+ ceph_osdc_put_request(req);
}
void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
@@ -911,8 +1207,8 @@ void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
{
mutex_lock(&osdc->request_mutex);
if (req->r_linger) {
+ req->r_linger = 0;
__unregister_linger_request(osdc, req);
- ceph_osdc_put_request(req);
}
mutex_unlock(&osdc->request_mutex);
}
@@ -924,11 +1220,6 @@ void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
if (!req->r_linger) {
dout("set_request_linger %p\n", req);
req->r_linger = 1;
- /*
- * caller is now responsible for calling
- * unregister_linger_request
- */
- ceph_osdc_get_request(req);
}
}
EXPORT_SYMBOL(ceph_osdc_set_request_linger);
@@ -952,8 +1243,8 @@ static int __map_request(struct ceph_osd_client *osdc,
int err;
dout("map_request %p tid %lld\n", req, req->r_tid);
- err = ceph_calc_object_layout(&pgid, req->r_oid,
- &req->r_file_layout, osdc->osdmap);
+ err = ceph_calc_ceph_pg(&pgid, req->r_oid, osdc->osdmap,
+ ceph_file_layout_pg_pool(req->r_file_layout));
if (err) {
list_move(&req->r_req_lru_item, &osdc->req_notarget);
return err;
@@ -1007,10 +1298,10 @@ static int __map_request(struct ceph_osd_client *osdc,
if (req->r_osd) {
__remove_osd_from_lru(req->r_osd);
- list_add(&req->r_osd_item, &req->r_osd->o_requests);
- list_move(&req->r_req_lru_item, &osdc->req_unsent);
+ list_add_tail(&req->r_osd_item, &req->r_osd->o_requests);
+ list_move_tail(&req->r_req_lru_item, &osdc->req_unsent);
} else {
- list_move(&req->r_req_lru_item, &osdc->req_notarget);
+ list_move_tail(&req->r_req_lru_item, &osdc->req_notarget);
}
err = 1; /* osd or pg changed */
@@ -1045,8 +1336,10 @@ static void __send_request(struct ceph_osd_client *osdc,
list_move_tail(&req->r_req_lru_item, &osdc->req_lru);
ceph_msg_get(req->r_request); /* send consumes a ref */
- ceph_con_send(&req->r_osd->o_con, req->r_request);
+
req->r_sent = req->r_osd->o_incarnation;
+
+ ceph_con_send(&req->r_osd->o_con, req->r_request);
}
/*
@@ -1134,31 +1427,9 @@ static void handle_osds_timeout(struct work_struct *work)
static void complete_request(struct ceph_osd_request *req)
{
- if (req->r_safe_callback)
- req->r_safe_callback(req, NULL);
complete_all(&req->r_safe_completion); /* fsync waiter */
}
-static int __decode_pgid(void **p, void *end, struct ceph_pg *pgid)
-{
- __u8 v;
-
- ceph_decode_need(p, end, 1 + 8 + 4 + 4, bad);
- v = ceph_decode_8(p);
- if (v > 1) {
- pr_warning("do not understand pg encoding %d > 1", v);
- return -EINVAL;
- }
- pgid->pool = ceph_decode_64(p);
- pgid->seed = ceph_decode_32(p);
- *p += 4;
- return 0;
-
-bad:
- pr_warning("incomplete pg encoding");
- return -EINVAL;
-}
-
/*
* handle osd op reply. either call the callback if it is specified,
* or do the completion to wake up the waiting thread.
@@ -1170,7 +1441,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
struct ceph_osd_request *req;
u64 tid;
int object_len;
- int numops, payload_len, flags;
+ unsigned int numops;
+ int payload_len, flags;
s32 result;
s32 retry_attempt;
struct ceph_pg pg;
@@ -1178,7 +1450,9 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
u32 reassert_epoch;
u64 reassert_version;
u32 osdmap_epoch;
- int i;
+ int already_completed;
+ u32 bytes;
+ unsigned int i;
tid = le64_to_cpu(msg->hdr.tid);
dout("handle_reply %p tid %llu\n", msg, tid);
@@ -1191,7 +1465,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
ceph_decode_need(&p, end, object_len, bad);
p += object_len;
- err = __decode_pgid(&p, end, &pg);
+ err = ceph_decode_pgid(&p, end, &pg);
if (err)
goto bad;
@@ -1207,8 +1481,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
req = __lookup_request(osdc, tid);
if (req == NULL) {
dout("handle_reply tid %llu dne\n", tid);
- mutex_unlock(&osdc->request_mutex);
- return;
+ goto bad_mutex;
}
ceph_osdc_get_request(req);
@@ -1233,9 +1506,10 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
payload_len += len;
p += sizeof(*op);
}
- if (payload_len != le32_to_cpu(msg->hdr.data_len)) {
+ bytes = le32_to_cpu(msg->hdr.data_len);
+ if (payload_len != bytes) {
pr_warning("sum of op payload lens %d != data_len %d",
- payload_len, le32_to_cpu(msg->hdr.data_len));
+ payload_len, bytes);
goto bad_put;
}
@@ -1244,21 +1518,11 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
for (i = 0; i < numops; i++)
req->r_reply_op_result[i] = ceph_decode_32(&p);
- /*
- * if this connection filled our message, drop our reference now, to
- * avoid a (safe but slower) revoke later.
- */
- if (req->r_con_filling_msg == con && req->r_reply == msg) {
- dout(" dropping con_filling_msg ref %p\n", con);
- req->r_con_filling_msg = NULL;
- con->ops->put(con);
- }
+ already_completed = req->r_got_reply;
if (!req->r_got_reply) {
- unsigned int bytes;
req->r_result = result;
- bytes = le32_to_cpu(msg->hdr.data_len);
dout("handle_reply result %d bytes %d\n", req->r_result,
bytes);
if (req->r_result == 0)
@@ -1288,13 +1552,21 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
mutex_unlock(&osdc->request_mutex);
- if (req->r_callback)
- req->r_callback(req, msg);
- else
- complete_all(&req->r_completion);
+ if (!already_completed) {
+ if (req->r_unsafe_callback &&
+ result >= 0 && !(flags & CEPH_OSD_FLAG_ONDISK))
+ req->r_unsafe_callback(req, true);
+ if (req->r_callback)
+ req->r_callback(req, msg);
+ else
+ complete_all(&req->r_completion);
+ }
- if (flags & CEPH_OSD_FLAG_ONDISK)
+ if (flags & CEPH_OSD_FLAG_ONDISK) {
+ if (req->r_unsafe_callback && already_completed)
+ req->r_unsafe_callback(req, false);
complete_request(req);
+ }
done:
dout("req=%p req->r_linger=%d\n", req, req->r_linger);
@@ -1303,6 +1575,8 @@ done:
bad_put:
ceph_osdc_put_request(req);
+bad_mutex:
+ mutex_unlock(&osdc->request_mutex);
bad:
pr_err("corrupt osd_op_reply got %d %d\n",
(int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len));
@@ -1357,8 +1631,10 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
dout("%p tid %llu restart on osd%d\n",
req, req->r_tid,
req->r_osd ? req->r_osd->o_osd : -1);
+ ceph_osdc_get_request(req);
__unregister_request(osdc, req);
__register_linger_request(osdc, req);
+ ceph_osdc_put_request(req);
continue;
}
@@ -1399,13 +1675,13 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
__register_request(osdc, req);
__unregister_linger_request(osdc, req);
}
+ reset_changed_osds(osdc);
mutex_unlock(&osdc->request_mutex);
if (needmap) {
dout("%d requests for down osds, need new map\n", needmap);
ceph_monc_request_next_osdmap(&osdc->client->monc);
}
- reset_changed_osds(osdc);
}
@@ -1736,6 +2012,104 @@ bad:
}
/*
+ * build new request AND message
+ *
+ */
+void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off,
+ struct ceph_snap_context *snapc, u64 snap_id,
+ struct timespec *mtime)
+{
+ struct ceph_msg *msg = req->r_request;
+ void *p;
+ size_t msg_size;
+ int flags = req->r_flags;
+ u64 data_len;
+ unsigned int i;
+
+ req->r_snapid = snap_id;
+ req->r_snapc = ceph_get_snap_context(snapc);
+
+ /* encode request */
+ msg->hdr.version = cpu_to_le16(4);
+
+ p = msg->front.iov_base;
+ ceph_encode_32(&p, 1); /* client_inc is always 1 */
+ req->r_request_osdmap_epoch = p;
+ p += 4;
+ req->r_request_flags = p;
+ p += 4;
+ if (req->r_flags & CEPH_OSD_FLAG_WRITE)
+ ceph_encode_timespec(p, mtime);
+ p += sizeof(struct ceph_timespec);
+ req->r_request_reassert_version = p;
+ p += sizeof(struct ceph_eversion); /* will get filled in */
+
+ /* oloc */
+ ceph_encode_8(&p, 4);
+ ceph_encode_8(&p, 4);
+ ceph_encode_32(&p, 8 + 4 + 4);
+ req->r_request_pool = p;
+ p += 8;
+ ceph_encode_32(&p, -1); /* preferred */
+ ceph_encode_32(&p, 0); /* key len */
+
+ ceph_encode_8(&p, 1);
+ req->r_request_pgid = p;
+ p += 8 + 4;
+ ceph_encode_32(&p, -1); /* preferred */
+
+ /* oid */
+ ceph_encode_32(&p, req->r_oid_len);
+ memcpy(p, req->r_oid, req->r_oid_len);
+ dout("oid '%.*s' len %d\n", req->r_oid_len, req->r_oid, req->r_oid_len);
+ p += req->r_oid_len;
+
+ /* ops--can imply data */
+ ceph_encode_16(&p, (u16)req->r_num_ops);
+ data_len = 0;
+ for (i = 0; i < req->r_num_ops; i++) {
+ data_len += osd_req_encode_op(req, p, i);
+ p += sizeof(struct ceph_osd_op);
+ }
+
+ /* snaps */
+ ceph_encode_64(&p, req->r_snapid);
+ ceph_encode_64(&p, req->r_snapc ? req->r_snapc->seq : 0);
+ ceph_encode_32(&p, req->r_snapc ? req->r_snapc->num_snaps : 0);
+ if (req->r_snapc) {
+ for (i = 0; i < snapc->num_snaps; i++) {
+ ceph_encode_64(&p, req->r_snapc->snaps[i]);
+ }
+ }
+
+ req->r_request_attempts = p;
+ p += 4;
+
+ /* data */
+ if (flags & CEPH_OSD_FLAG_WRITE) {
+ u16 data_off;
+
+ /*
+ * The header "data_off" is a hint to the receiver
+ * allowing it to align received data into its
+ * buffers such that there's no need to re-copy
+ * it before writing it to disk (direct I/O).
+ */
+ data_off = (u16) (off & 0xffff);
+ req->r_request->hdr.data_off = cpu_to_le16(data_off);
+ }
+ req->r_request->hdr.data_len = cpu_to_le32(data_len);
+
+ BUG_ON(p > msg->front.iov_base + msg->front.iov_len);
+ msg_size = p - msg->front.iov_base;
+ msg->front.iov_len = msg_size;
+ msg->hdr.front_len = cpu_to_le32(msg_size);
+
+ dout("build_request msg_size was %d\n", (int)msg_size);
+}
+EXPORT_SYMBOL(ceph_osdc_build_request);
+
+/*
* Register request, send initial attempt.
*/
int ceph_osdc_start_request(struct ceph_osd_client *osdc,
@@ -1744,41 +2118,27 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
{
int rc = 0;
- req->r_request->pages = req->r_pages;
- req->r_request->nr_pages = req->r_num_pages;
-#ifdef CONFIG_BLOCK
- req->r_request->bio = req->r_bio;
-#endif
- req->r_request->trail = &req->r_trail;
-
- register_request(osdc, req);
-
down_read(&osdc->map_sem);
mutex_lock(&osdc->request_mutex);
- /*
- * a racing kick_requests() may have sent the message for us
- * while we dropped request_mutex above, so only send now if
- * the request still han't been touched yet.
- */
- if (req->r_sent == 0) {
- rc = __map_request(osdc, req, 0);
- if (rc < 0) {
- if (nofail) {
- dout("osdc_start_request failed map, "
- " will retry %lld\n", req->r_tid);
- rc = 0;
- }
- goto out_unlock;
- }
- if (req->r_osd == NULL) {
- dout("send_request %p no up osds in pg\n", req);
- ceph_monc_request_next_osdmap(&osdc->client->monc);
- } else {
- __send_request(osdc, req);
+ __register_request(osdc, req);
+ req->r_sent = 0;
+ req->r_got_reply = 0;
+ rc = __map_request(osdc, req, 0);
+ if (rc < 0) {
+ if (nofail) {
+ dout("osdc_start_request failed map, "
+ " will retry %lld\n", req->r_tid);
+ rc = 0;
}
- rc = 0;
+ goto out_unlock;
}
-
+ if (req->r_osd == NULL) {
+ dout("send_request %p no up osds in pg\n", req);
+ ceph_monc_request_next_osdmap(&osdc->client->monc);
+ } else {
+ __send_queued(osdc);
+ }
+ rc = 0;
out_unlock:
mutex_unlock(&osdc->request_mutex);
up_read(&osdc->map_sem);
@@ -1940,18 +2300,22 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino,
vino.snap, off, *plen);
- req = ceph_osdc_new_request(osdc, layout, vino, off, plen,
+ req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 1,
CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
- NULL, 0, truncate_seq, truncate_size, NULL,
- false, page_align);
+ NULL, truncate_seq, truncate_size,
+ false);
if (IS_ERR(req))
return PTR_ERR(req);
/* it may be a short read due to an object boundary */
- req->r_pages = pages;
- dout("readpages final extent is %llu~%llu (%d pages align %d)\n",
- off, *plen, req->r_num_pages, page_align);
+ osd_req_op_extent_osd_data_pages(req, 0,
+ pages, *plen, page_align, false, false);
+
+ dout("readpages final extent is %llu~%llu (%llu bytes align %d)\n",
+ off, *plen, *plen, page_align);
+
+ ceph_osdc_build_request(req, off, NULL, vino.snap, NULL);
rc = ceph_osdc_start_request(osdc, req, false);
if (!rc)
@@ -1978,20 +2342,21 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
int rc = 0;
int page_align = off & ~PAGE_MASK;
- BUG_ON(vino.snap != CEPH_NOSNAP);
- req = ceph_osdc_new_request(osdc, layout, vino, off, &len,
+ BUG_ON(vino.snap != CEPH_NOSNAP); /* snapshots aren't writeable */
+ req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 1,
CEPH_OSD_OP_WRITE,
CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
- snapc, 0,
- truncate_seq, truncate_size, mtime,
- true, page_align);
+ snapc, truncate_seq, truncate_size,
+ true);
if (IS_ERR(req))
return PTR_ERR(req);
/* it may be a short write due to an object boundary */
- req->r_pages = pages;
- dout("writepages %llu~%llu (%d pages)\n", off, len,
- req->r_num_pages);
+ osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_align,
+ false, false);
+ dout("writepages %llu~%llu (%llu bytes)\n", off, len, len);
+
+ ceph_osdc_build_request(req, off, snapc, CEPH_NOSNAP, mtime);
rc = ceph_osdc_start_request(osdc, req, true);
if (!rc)
@@ -2005,6 +2370,26 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
}
EXPORT_SYMBOL(ceph_osdc_writepages);
+int ceph_osdc_setup(void)
+{
+ BUG_ON(ceph_osd_request_cache);
+ ceph_osd_request_cache = kmem_cache_create("ceph_osd_request",
+ sizeof (struct ceph_osd_request),
+ __alignof__(struct ceph_osd_request),
+ 0, NULL);
+
+ return ceph_osd_request_cache ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL(ceph_osdc_setup);
+
+void ceph_osdc_cleanup(void)
+{
+ BUG_ON(!ceph_osd_request_cache);
+ kmem_cache_destroy(ceph_osd_request_cache);
+ ceph_osd_request_cache = NULL;
+}
+EXPORT_SYMBOL(ceph_osdc_cleanup);
+
/*
* handle incoming message
*/
@@ -2064,17 +2449,16 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
goto out;
}
- if (req->r_con_filling_msg) {
+ if (req->r_reply->con)
dout("%s revoking msg %p from old con %p\n", __func__,
- req->r_reply, req->r_con_filling_msg);
- ceph_msg_revoke_incoming(req->r_reply);
- req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
- req->r_con_filling_msg = NULL;
- }
+ req->r_reply, req->r_reply->con);
+ ceph_msg_revoke_incoming(req->r_reply);
if (front > req->r_reply->front.iov_len) {
- pr_warning("get_reply front %d > preallocated %d\n",
- front, (int)req->r_reply->front.iov_len);
+ pr_warning("get_reply front %d > preallocated %d (%u#%llu)\n",
+ front, (int)req->r_reply->front.iov_len,
+ (unsigned int)con->peer_name.type,
+ le64_to_cpu(con->peer_name.num));
m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, GFP_NOFS, false);
if (!m)
goto out;
@@ -2084,26 +2468,29 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
m = ceph_msg_get(req->r_reply);
if (data_len > 0) {
- int want = calc_pages_for(req->r_page_alignment, data_len);
-
- if (req->r_pages && unlikely(req->r_num_pages < want)) {
- pr_warning("tid %lld reply has %d bytes %d pages, we"
- " had only %d pages ready\n", tid, data_len,
- want, req->r_num_pages);
- *skip = 1;
- ceph_msg_put(m);
- m = NULL;
- goto out;
+ struct ceph_osd_data *osd_data;
+
+ /*
+ * XXX This is assuming there is only one op containing
+ * XXX page data. Probably OK for reads, but this
+ * XXX ought to be done more generally.
+ */
+ osd_data = osd_req_op_extent_osd_data(req, 0);
+ if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) {
+ if (osd_data->pages &&
+ unlikely(osd_data->length < data_len)) {
+
+ pr_warning("tid %lld reply has %d bytes "
+ "we had only %llu bytes ready\n",
+ tid, data_len, osd_data->length);
+ *skip = 1;
+ ceph_msg_put(m);
+ m = NULL;
+ goto out;
+ }
}
- m->pages = req->r_pages;
- m->nr_pages = req->r_num_pages;
- m->page_alignment = req->r_page_alignment;
-#ifdef CONFIG_BLOCK
- m->bio = req->r_bio;
-#endif
}
*skip = 0;
- req->r_con_filling_msg = con->ops->get(con);
dout("get_reply tid %lld %p\n", tid, m);
out:
@@ -2168,13 +2555,17 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
struct ceph_auth_handshake *auth = &o->o_auth;
if (force_new && auth->authorizer) {
- if (ac->ops && ac->ops->destroy_authorizer)
- ac->ops->destroy_authorizer(ac, auth->authorizer);
+ ceph_auth_destroy_authorizer(ac, auth->authorizer);
auth->authorizer = NULL;
}
- if (!auth->authorizer && ac->ops && ac->ops->create_authorizer) {
- int ret = ac->ops->create_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
- auth);
+ if (!auth->authorizer) {
+ int ret = ceph_auth_create_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
+ auth);
+ if (ret)
+ return ERR_PTR(ret);
+ } else {
+ int ret = ceph_auth_update_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
+ auth);
if (ret)
return ERR_PTR(ret);
}
@@ -2190,11 +2581,7 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len)
struct ceph_osd_client *osdc = o->o_osdc;
struct ceph_auth_client *ac = osdc->client->monc.auth;
- /*
- * XXX If ac->ops or ac->ops->verify_authorizer_reply is null,
- * XXX which do we do: succeed or fail?
- */
- return ac->ops->verify_authorizer_reply(ac, o->o_auth.authorizer, len);
+ return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer, len);
}
static int invalidate_authorizer(struct ceph_connection *con)
@@ -2203,9 +2590,7 @@ static int invalidate_authorizer(struct ceph_connection *con)
struct ceph_osd_client *osdc = o->o_osdc;
struct ceph_auth_client *ac = osdc->client->monc.auth;
- if (ac->ops && ac->ops->invalidate_authorizer)
- ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_OSD);
-
+ ceph_auth_invalidate_authorizer(ac, CEPH_ENTITY_TYPE_OSD);
return ceph_monc_validate_auth(&osdc->client->monc);
}
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 4543b9aba40c..603ddd92db19 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -654,24 +654,6 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
return 0;
}
-static int __decode_pgid(void **p, void *end, struct ceph_pg *pg)
-{
- u8 v;
-
- ceph_decode_need(p, end, 1+8+4+4, bad);
- v = ceph_decode_8(p);
- if (v != 1)
- goto bad;
- pg->pool = ceph_decode_64(p);
- pg->seed = ceph_decode_32(p);
- *p += 4; /* skip preferred */
- return 0;
-
-bad:
- dout("error decoding pgid\n");
- return -EINVAL;
-}
-
/*
* decode a full map.
*/
@@ -765,7 +747,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
struct ceph_pg pgid;
struct ceph_pg_mapping *pg;
- err = __decode_pgid(p, end, &pgid);
+ err = ceph_decode_pgid(p, end, &pgid);
if (err)
goto bad;
ceph_decode_need(p, end, sizeof(u32), bad);
@@ -983,7 +965,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
struct ceph_pg pgid;
u32 pglen;
- err = __decode_pgid(p, end, &pgid);
+ err = ceph_decode_pgid(p, end, &pgid);
if (err)
goto bad;
ceph_decode_need(p, end, sizeof(u32), bad);
@@ -1111,27 +1093,22 @@ EXPORT_SYMBOL(ceph_calc_file_object_mapping);
* calculate an object layout (i.e. pgid) from an oid,
* file_layout, and osdmap
*/
-int ceph_calc_object_layout(struct ceph_pg *pg,
- const char *oid,
- struct ceph_file_layout *fl,
- struct ceph_osdmap *osdmap)
+int ceph_calc_ceph_pg(struct ceph_pg *pg, const char *oid,
+ struct ceph_osdmap *osdmap, uint64_t pool)
{
- unsigned int num, num_mask;
- struct ceph_pg_pool_info *pool;
+ struct ceph_pg_pool_info *pool_info;
BUG_ON(!osdmap);
- pg->pool = le32_to_cpu(fl->fl_pg_pool);
- pool = __lookup_pg_pool(&osdmap->pg_pools, pg->pool);
- if (!pool)
+ pool_info = __lookup_pg_pool(&osdmap->pg_pools, pool);
+ if (!pool_info)
return -EIO;
- pg->seed = ceph_str_hash(pool->object_hash, oid, strlen(oid));
- num = pool->pg_num;
- num_mask = pool->pg_num_mask;
+ pg->pool = pool;
+ pg->seed = ceph_str_hash(pool_info->object_hash, oid, strlen(oid));
- dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pg->pool, pg->seed);
+ dout("%s '%s' pgid %lld.%x\n", __func__, oid, pg->pool, pg->seed);
return 0;
}
-EXPORT_SYMBOL(ceph_calc_object_layout);
+EXPORT_SYMBOL(ceph_calc_ceph_pg);
/*
* Calculate raw osd vector for the given pgid. Return pointer to osd
diff --git a/net/ceph/snapshot.c b/net/ceph/snapshot.c
new file mode 100644
index 000000000000..154683f5f14c
--- /dev/null
+++ b/net/ceph/snapshot.c
@@ -0,0 +1,78 @@
+/*
+ * snapshot.c Ceph snapshot context utility routines (part of libceph)
+ *
+ * Copyright (C) 2013 Inktank Storage, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <stddef.h>
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/ceph/libceph.h>
+
+/*
+ * Ceph snapshot contexts are reference counted objects, and the
+ * returned structure holds a single reference. Acquire additional
+ * references with ceph_get_snap_context(), and release them with
+ * ceph_put_snap_context(). When the reference count reaches zero
+ * the entire structure is freed.
+ */
+
+/*
+ * Create a new ceph snapshot context large enough to hold the
+ * indicated number of snapshot ids (which can be 0). Caller has
+ * to fill in snapc->seq and snapc->snaps[0..snap_count-1].
+ *
+ * Returns a null pointer if an error occurs.
+ */
+struct ceph_snap_context *ceph_create_snap_context(u32 snap_count,
+ gfp_t gfp_flags)
+{
+ struct ceph_snap_context *snapc;
+ size_t size;
+
+ size = sizeof (struct ceph_snap_context);
+ size += snap_count * sizeof (snapc->snaps[0]);
+ snapc = kzalloc(size, gfp_flags);
+ if (!snapc)
+ return NULL;
+
+ atomic_set(&snapc->nref, 1);
+ snapc->num_snaps = snap_count;
+
+ return snapc;
+}
+EXPORT_SYMBOL(ceph_create_snap_context);
+
+struct ceph_snap_context *ceph_get_snap_context(struct ceph_snap_context *sc)
+{
+ if (sc)
+ atomic_inc(&sc->nref);
+ return sc;
+}
+EXPORT_SYMBOL(ceph_get_snap_context);
+
+void ceph_put_snap_context(struct ceph_snap_context *sc)
+{
+ if (!sc)
+ return;
+ if (atomic_dec_and_test(&sc->nref)) {
+ /*printk(" deleting snap_context %p\n", sc);*/
+ kfree(sc);
+ }
+}
+EXPORT_SYMBOL(ceph_put_snap_context);
diff --git a/net/compat.c b/net/compat.c
index 79ae88485001..f0a1ba6c8086 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -734,19 +734,25 @@ static unsigned char nas[21] = {
asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags)
{
- return sys_sendmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
+ if (flags & MSG_CMSG_COMPAT)
+ return -EINVAL;
+ return __sys_sendmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
}
asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg,
unsigned int vlen, unsigned int flags)
{
+ if (flags & MSG_CMSG_COMPAT)
+ return -EINVAL;
return __sys_sendmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
flags | MSG_CMSG_COMPAT);
}
asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags)
{
- return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
+ if (flags & MSG_CMSG_COMPAT)
+ return -EINVAL;
+ return __sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
}
asmlinkage long compat_sys_recv(int fd, void __user *buf, size_t len, unsigned int flags)
@@ -768,6 +774,9 @@ asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
int datagrams;
struct timespec ktspec;
+ if (flags & MSG_CMSG_COMPAT)
+ return -EINVAL;
+
if (COMPAT_USE_64BIT_TIME)
return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
flags | MSG_CMSG_COMPAT,
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 368f9c3f9dc6..b71423db7785 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -78,9 +78,10 @@ static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int syn
return autoremove_wake_function(wait, mode, sync, key);
}
/*
- * Wait for a packet..
+ * Wait for the last received packet to be different from skb
*/
-static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
+static int wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
+ const struct sk_buff *skb)
{
int error;
DEFINE_WAIT_FUNC(wait, receiver_wake_function);
@@ -92,7 +93,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
if (error)
goto out_err;
- if (!skb_queue_empty(&sk->sk_receive_queue))
+ if (sk->sk_receive_queue.prev != skb)
goto out;
/* Socket shut down? */
@@ -131,9 +132,9 @@ out_noerr:
* __skb_recv_datagram - Receive a datagram skbuff
* @sk: socket
* @flags: MSG_ flags
+ * @peeked: returns non-zero if this packet has been seen before
* @off: an offset in bytes to peek skb from. Returns an offset
* within an skb where data actually starts
- * @peeked: returns non-zero if this packet has been seen before
* @err: error code returned
*
* Get a datagram skbuff, understands the peeking, nonblocking wakeups
@@ -161,7 +162,7 @@ out_noerr:
struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
int *peeked, int *off, int *err)
{
- struct sk_buff *skb;
+ struct sk_buff *skb, *last;
long timeo;
/*
* Caller is allowed not to check sk->sk_err before skb_recv_datagram()
@@ -182,13 +183,17 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
*/
unsigned long cpu_flags;
struct sk_buff_head *queue = &sk->sk_receive_queue;
+ int _off = *off;
+ last = (struct sk_buff *)queue;
spin_lock_irqsave(&queue->lock, cpu_flags);
skb_queue_walk(queue, skb) {
+ last = skb;
*peeked = skb->peeked;
if (flags & MSG_PEEK) {
- if (*off >= skb->len && skb->len) {
- *off -= skb->len;
+ if (_off >= skb->len && (skb->len || _off ||
+ skb->peeked)) {
+ _off -= skb->len;
continue;
}
skb->peeked = 1;
@@ -197,6 +202,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
__skb_unlink(skb, queue);
spin_unlock_irqrestore(&queue->lock, cpu_flags);
+ *off = _off;
return skb;
}
spin_unlock_irqrestore(&queue->lock, cpu_flags);
@@ -206,7 +212,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
if (!timeo)
goto no_packet;
- } while (!wait_for_packet(sk, err, &timeo));
+ } while (!wait_for_more_packets(sk, err, &timeo, last));
return NULL;
@@ -749,7 +755,9 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
/* exceptional events? */
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR;
+ mask |= POLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
+
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= POLLRDHUP | POLLIN | POLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
diff --git a/net/core/dev.c b/net/core/dev.c
index b24ab0e98eb4..faebb398fb46 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -200,7 +200,7 @@ static inline void rps_unlock(struct softnet_data *sd)
}
/* Device list insertion */
-static int list_netdevice(struct net_device *dev)
+static void list_netdevice(struct net_device *dev)
{
struct net *net = dev_net(dev);
@@ -214,8 +214,6 @@ static int list_netdevice(struct net_device *dev)
write_unlock_bh(&dev_base_lock);
dev_base_seq_inc(net);
-
- return 0;
}
/* Device list removal
@@ -794,6 +792,40 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
EXPORT_SYMBOL(dev_get_by_index);
/**
+ * netdev_get_name - get a netdevice name, knowing its ifindex.
+ * @net: network namespace
+ * @name: a pointer to the buffer where the name will be stored.
+ * @ifindex: the ifindex of the interface to get the name from.
+ *
+ * The use of raw_seqcount_begin() and cond_resched() before
+ * retrying is required as we want to give the writers a chance
+ * to complete when CONFIG_PREEMPT is not set.
+ */
+int netdev_get_name(struct net *net, char *name, int ifindex)
+{
+ struct net_device *dev;
+ unsigned int seq;
+
+retry:
+ seq = raw_seqcount_begin(&devnet_rename_seq);
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(net, ifindex);
+ if (!dev) {
+ rcu_read_unlock();
+ return -ENODEV;
+ }
+
+ strcpy(name, dev->name);
+ rcu_read_unlock();
+ if (read_seqcount_retry(&devnet_rename_seq, seq)) {
+ cond_resched();
+ goto retry;
+ }
+
+ return 0;
+}
+
+/**
* dev_getbyhwaddr_rcu - find a device by its hardware address
* @net: the applicable net namespace
* @type: media type of device
@@ -2210,30 +2242,51 @@ out:
}
EXPORT_SYMBOL(skb_checksum_help);
-/**
- * skb_mac_gso_segment - mac layer segmentation handler.
- * @skb: buffer to segment
- * @features: features for the output path (see dev->features)
- */
-struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
- netdev_features_t features)
+__be16 skb_network_protocol(struct sk_buff *skb)
{
- struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
- struct packet_offload *ptype;
__be16 type = skb->protocol;
int vlan_depth = ETH_HLEN;
- while (type == htons(ETH_P_8021Q)) {
+ /* Tunnel gso handlers can set protocol to ethernet. */
+ if (type == htons(ETH_P_TEB)) {
+ struct ethhdr *eth;
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
+ return 0;
+
+ eth = (struct ethhdr *)skb_mac_header(skb);
+ type = eth->h_proto;
+ }
+
+ while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
struct vlan_hdr *vh;
if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
- return ERR_PTR(-EINVAL);
+ return 0;
vh = (struct vlan_hdr *)(skb->data + vlan_depth);
type = vh->h_vlan_encapsulated_proto;
vlan_depth += VLAN_HLEN;
}
+ return type;
+}
+
+/**
+ * skb_mac_gso_segment - mac layer segmentation handler.
+ * @skb: buffer to segment
+ * @features: features for the output path (see dev->features)
+ */
+struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
+ struct packet_offload *ptype;
+ __be16 type = skb_network_protocol(skb);
+
+ if (unlikely(!type))
+ return ERR_PTR(-EINVAL);
+
__skb_pull(skb, skb->mac_len);
rcu_read_lock();
@@ -2400,24 +2453,12 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
return 0;
}
-static bool can_checksum_protocol(netdev_features_t features, __be16 protocol)
-{
- return ((features & NETIF_F_GEN_CSUM) ||
- ((features & NETIF_F_V4_CSUM) &&
- protocol == htons(ETH_P_IP)) ||
- ((features & NETIF_F_V6_CSUM) &&
- protocol == htons(ETH_P_IPV6)) ||
- ((features & NETIF_F_FCOE_CRC) &&
- protocol == htons(ETH_P_FCOE)));
-}
-
static netdev_features_t harmonize_features(struct sk_buff *skb,
__be16 protocol, netdev_features_t features)
{
if (skb->ip_summed != CHECKSUM_NONE &&
!can_checksum_protocol(features, protocol)) {
features &= ~NETIF_F_ALL_CSUM;
- features &= ~NETIF_F_SG;
} else if (illegal_highdma(skb->dev, skb)) {
features &= ~NETIF_F_SG;
}
@@ -2433,20 +2474,22 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
features &= ~NETIF_F_GSO_MASK;
- if (protocol == htons(ETH_P_8021Q)) {
+ if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
protocol = veh->h_vlan_encapsulated_proto;
} else if (!vlan_tx_tag_present(skb)) {
return harmonize_features(skb, protocol, features);
}
- features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX);
+ features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_STAG_TX);
- if (protocol != htons(ETH_P_8021Q)) {
+ if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD)) {
return harmonize_features(skb, protocol, features);
} else {
features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
- NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX;
+ NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_STAG_TX;
return harmonize_features(skb, protocol, features);
}
}
@@ -2458,7 +2501,7 @@ EXPORT_SYMBOL(netif_skb_features);
* 2. skb is fragmented and the device does not support SG.
*/
static inline int skb_needs_linearize(struct sk_buff *skb,
- int features)
+ netdev_features_t features)
{
return skb_is_nonlinear(skb) &&
((skb_has_frag_list(skb) &&
@@ -2487,8 +2530,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
features = netif_skb_features(skb);
if (vlan_tx_tag_present(skb) &&
- !(features & NETIF_F_HW_VLAN_TX)) {
- skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+ !vlan_hw_offload_capable(features, skb->vlan_proto)) {
+ skb = __vlan_put_tag(skb, skb->vlan_proto,
+ vlan_tx_tag_get(skb));
if (unlikely(!skb))
goto out;
@@ -2547,13 +2591,6 @@ gso:
skb->next = nskb->next;
nskb->next = NULL;
- /*
- * If device doesn't need nskb->dst, release it right now while
- * its hot in this cpu cache
- */
- if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
- skb_dst_drop(nskb);
-
if (!list_empty(&ptype_all))
dev_queue_xmit_nit(nskb, dev);
@@ -2573,8 +2610,11 @@ gso:
} while (skb->next);
out_kfree_gso_skb:
- if (likely(skb->next == NULL))
+ if (likely(skb->next == NULL)) {
skb->destructor = DEV_GSO_CB(skb)->destructor;
+ consume_skb(skb);
+ return rc;
+ }
out_kfree_skb:
kfree_skb(skb);
out:
@@ -2592,6 +2632,7 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
*/
if (shinfo->gso_size) {
unsigned int hdr_len;
+ u16 gso_segs = shinfo->gso_segs;
/* mac layer + network layer */
hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
@@ -2601,7 +2642,12 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
hdr_len += tcp_hdrlen(skb);
else
hdr_len += sizeof(struct udphdr);
- qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len;
+
+ if (shinfo->gso_type & SKB_GSO_DODGY)
+ gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
+ shinfo->gso_size);
+
+ qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;
}
}
@@ -3329,7 +3375,7 @@ EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
* netdev_rx_handler_unregister - unregister receive handler
* @dev: device to unregister a handler from
*
- * Unregister a receive hander from a device.
+ * Unregister a receive handler from a device.
*
* The caller must hold the rtnl_mutex.
*/
@@ -3358,6 +3404,7 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
case __constant_htons(ETH_P_IP):
case __constant_htons(ETH_P_IPV6):
case __constant_htons(ETH_P_8021Q):
+ case __constant_htons(ETH_P_8021AD):
return true;
default:
return false;
@@ -3398,7 +3445,8 @@ another_round:
__this_cpu_inc(softnet_data.processed);
- if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
+ if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
+ skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
skb = vlan_untag(skb);
if (unlikely(!skb))
goto unlock;
@@ -4066,6 +4114,9 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
napi->gro_list = NULL;
napi->skb = NULL;
napi->poll = poll;
+ if (weight > NAPI_POLL_WEIGHT)
+ pr_err_once("netif_napi_add() called with weight %d on device %s\n",
+ weight, dev->name);
napi->weight = weight;
list_add(&napi->dev_list, &dev->napi_list);
napi->dev = dev;
@@ -4927,20 +4978,25 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
}
- /* Fix illegal SG+CSUM combinations. */
- if ((features & NETIF_F_SG) &&
- !(features & NETIF_F_ALL_CSUM)) {
- netdev_dbg(dev,
- "Dropping NETIF_F_SG since no checksum feature.\n");
- features &= ~NETIF_F_SG;
- }
-
/* TSO requires that SG is present as well. */
if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
features &= ~NETIF_F_ALL_TSO;
}
+ if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) &&
+ !(features & NETIF_F_IP_CSUM)) {
+ netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n");
+ features &= ~NETIF_F_TSO;
+ features &= ~NETIF_F_TSO_ECN;
+ }
+
+ if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) &&
+ !(features & NETIF_F_IPV6_CSUM)) {
+ netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n");
+ features &= ~NETIF_F_TSO6;
+ }
+
/* TSO ECN requires that TSO is present as well. */
if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
features &= ~NETIF_F_TSO_ECN;
@@ -5171,7 +5227,8 @@ int register_netdevice(struct net_device *dev)
}
}
- if (((dev->hw_features | dev->features) & NETIF_F_HW_VLAN_FILTER) &&
+ if (((dev->hw_features | dev->features) &
+ NETIF_F_HW_VLAN_CTAG_FILTER) &&
(!dev->netdev_ops->ndo_vlan_rx_add_vid ||
!dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
@@ -5208,6 +5265,10 @@ int register_netdevice(struct net_device *dev)
*/
dev->vlan_features |= NETIF_F_HIGHDMA;
+ /* Make NETIF_F_SG inheritable to tunnel devices.
+ */
+ dev->hw_enc_features |= NETIF_F_SG;
+
ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
ret = notifier_to_errno(ret);
if (ret)
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index abdc9e6ef33e..6cda4e2c2132 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -22,7 +22,8 @@
static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
const unsigned char *addr, int addr_len,
- unsigned char addr_type, bool global)
+ unsigned char addr_type, bool global,
+ bool sync)
{
struct netdev_hw_addr *ha;
int alloc_size;
@@ -37,7 +38,8 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
ha->type = addr_type;
ha->refcount = 1;
ha->global_use = global;
- ha->synced = 0;
+ ha->synced = sync;
+ ha->sync_cnt = 0;
list_add_tail_rcu(&ha->list, &list->list);
list->count++;
@@ -46,7 +48,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
const unsigned char *addr, int addr_len,
- unsigned char addr_type, bool global)
+ unsigned char addr_type, bool global, bool sync)
{
struct netdev_hw_addr *ha;
@@ -63,43 +65,62 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
else
ha->global_use = true;
}
+ if (sync) {
+ if (ha->synced)
+ return -EEXIST;
+ else
+ ha->synced = true;
+ }
ha->refcount++;
return 0;
}
}
- return __hw_addr_create_ex(list, addr, addr_len, addr_type, global);
+ return __hw_addr_create_ex(list, addr, addr_len, addr_type, global,
+ sync);
}
static int __hw_addr_add(struct netdev_hw_addr_list *list,
const unsigned char *addr, int addr_len,
unsigned char addr_type)
{
- return __hw_addr_add_ex(list, addr, addr_len, addr_type, false);
+ return __hw_addr_add_ex(list, addr, addr_len, addr_type, false, false);
+}
+
+static int __hw_addr_del_entry(struct netdev_hw_addr_list *list,
+ struct netdev_hw_addr *ha, bool global,
+ bool sync)
+{
+ if (global && !ha->global_use)
+ return -ENOENT;
+
+ if (sync && !ha->synced)
+ return -ENOENT;
+
+ if (global)
+ ha->global_use = false;
+
+ if (sync)
+ ha->synced = false;
+
+ if (--ha->refcount)
+ return 0;
+ list_del_rcu(&ha->list);
+ kfree_rcu(ha, rcu_head);
+ list->count--;
+ return 0;
}
static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
const unsigned char *addr, int addr_len,
- unsigned char addr_type, bool global)
+ unsigned char addr_type, bool global, bool sync)
{
struct netdev_hw_addr *ha;
list_for_each_entry(ha, &list->list, list) {
if (!memcmp(ha->addr, addr, addr_len) &&
- (ha->type == addr_type || !addr_type)) {
- if (global) {
- if (!ha->global_use)
- break;
- else
- ha->global_use = false;
- }
- if (--ha->refcount)
- return 0;
- list_del_rcu(&ha->list);
- kfree_rcu(ha, rcu_head);
- list->count--;
- return 0;
- }
+ (ha->type == addr_type || !addr_type))
+ return __hw_addr_del_entry(list, ha, global, sync);
}
return -ENOENT;
}
@@ -108,7 +129,61 @@ static int __hw_addr_del(struct netdev_hw_addr_list *list,
const unsigned char *addr, int addr_len,
unsigned char addr_type)
{
- return __hw_addr_del_ex(list, addr, addr_len, addr_type, false);
+ return __hw_addr_del_ex(list, addr, addr_len, addr_type, false, false);
+}
+
+static int __hw_addr_sync_one(struct netdev_hw_addr_list *to_list,
+ struct netdev_hw_addr *ha,
+ int addr_len)
+{
+ int err;
+
+ err = __hw_addr_add_ex(to_list, ha->addr, addr_len, ha->type,
+ false, true);
+ if (err && err != -EEXIST)
+ return err;
+
+ if (!err) {
+ ha->sync_cnt++;
+ ha->refcount++;
+ }
+
+ return 0;
+}
+
+static void __hw_addr_unsync_one(struct netdev_hw_addr_list *to_list,
+ struct netdev_hw_addr_list *from_list,
+ struct netdev_hw_addr *ha,
+ int addr_len)
+{
+ int err;
+
+ err = __hw_addr_del_ex(to_list, ha->addr, addr_len, ha->type,
+ false, true);
+ if (err)
+ return;
+ ha->sync_cnt--;
+ /* address on from list is not marked synced */
+ __hw_addr_del_entry(from_list, ha, false, false);
+}
+
+static int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list,
+ struct netdev_hw_addr_list *from_list,
+ int addr_len)
+{
+ int err = 0;
+ struct netdev_hw_addr *ha, *tmp;
+
+ list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
+ if (ha->sync_cnt == ha->refcount) {
+ __hw_addr_unsync_one(to_list, from_list, ha, addr_len);
+ } else {
+ err = __hw_addr_sync_one(to_list, ha, addr_len);
+ if (err)
+ break;
+ }
+ }
+ return err;
}
int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
@@ -152,6 +227,11 @@ void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
}
EXPORT_SYMBOL(__hw_addr_del_multiple);
+/* This function only works where there is a strict 1-1 relationship
+ * between source and destionation of they synch. If you ever need to
+ * sync addresses to more then 1 destination, you need to use
+ * __hw_addr_sync_multiple().
+ */
int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
struct netdev_hw_addr_list *from_list,
int addr_len)
@@ -160,17 +240,12 @@ int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
struct netdev_hw_addr *ha, *tmp;
list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
- if (!ha->synced) {
- err = __hw_addr_add(to_list, ha->addr,
- addr_len, ha->type);
+ if (!ha->sync_cnt) {
+ err = __hw_addr_sync_one(to_list, ha, addr_len);
if (err)
break;
- ha->synced++;
- ha->refcount++;
- } else if (ha->refcount == 1) {
- __hw_addr_del(to_list, ha->addr, addr_len, ha->type);
- __hw_addr_del(from_list, ha->addr, addr_len, ha->type);
- }
+ } else if (ha->refcount == 1)
+ __hw_addr_unsync_one(to_list, from_list, ha, addr_len);
}
return err;
}
@@ -183,13 +258,8 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
struct netdev_hw_addr *ha, *tmp;
list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
- if (ha->synced) {
- __hw_addr_del(to_list, ha->addr,
- addr_len, ha->type);
- ha->synced--;
- __hw_addr_del(from_list, ha->addr,
- addr_len, ha->type);
- }
+ if (ha->sync_cnt)
+ __hw_addr_unsync_one(to_list, from_list, ha, addr_len);
}
}
EXPORT_SYMBOL(__hw_addr_unsync);
@@ -406,7 +476,7 @@ int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr)
}
}
err = __hw_addr_create_ex(&dev->uc, addr, dev->addr_len,
- NETDEV_HW_ADDR_T_UNICAST, true);
+ NETDEV_HW_ADDR_T_UNICAST, true, false);
if (!err)
__dev_set_rx_mode(dev);
out:
@@ -469,7 +539,8 @@ EXPORT_SYMBOL(dev_uc_del);
* locked by netif_addr_lock_bh.
*
* This function is intended to be called from the dev->set_rx_mode
- * function of layered software devices.
+ * function of layered software devices. This function assumes that
+ * addresses will only ever be synced to the @to devices and no other.
*/
int dev_uc_sync(struct net_device *to, struct net_device *from)
{
@@ -488,6 +559,36 @@ int dev_uc_sync(struct net_device *to, struct net_device *from)
EXPORT_SYMBOL(dev_uc_sync);
/**
+ * dev_uc_sync_multiple - Synchronize device's unicast list to another
+ * device, but allow for multiple calls to sync to multiple devices.
+ * @to: destination device
+ * @from: source device
+ *
+ * Add newly added addresses to the destination device and release
+ * addresses that have been deleted from the source. The source device
+ * must be locked by netif_addr_lock_bh.
+ *
+ * This function is intended to be called from the dev->set_rx_mode
+ * function of layered software devices. It allows for a single source
+ * device to be synced to multiple destination devices.
+ */
+int dev_uc_sync_multiple(struct net_device *to, struct net_device *from)
+{
+ int err = 0;
+
+ if (to->addr_len != from->addr_len)
+ return -EINVAL;
+
+ netif_addr_lock_nested(to);
+ err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len);
+ if (!err)
+ __dev_set_rx_mode(to);
+ netif_addr_unlock(to);
+ return err;
+}
+EXPORT_SYMBOL(dev_uc_sync_multiple);
+
+/**
* dev_uc_unsync - Remove synchronized addresses from the destination device
* @to: destination device
* @from: source device
@@ -559,7 +660,7 @@ int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr)
}
}
err = __hw_addr_create_ex(&dev->mc, addr, dev->addr_len,
- NETDEV_HW_ADDR_T_MULTICAST, true);
+ NETDEV_HW_ADDR_T_MULTICAST, true, false);
if (!err)
__dev_set_rx_mode(dev);
out:
@@ -575,7 +676,7 @@ static int __dev_mc_add(struct net_device *dev, const unsigned char *addr,
netif_addr_lock_bh(dev);
err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len,
- NETDEV_HW_ADDR_T_MULTICAST, global);
+ NETDEV_HW_ADDR_T_MULTICAST, global, false);
if (!err)
__dev_set_rx_mode(dev);
netif_addr_unlock_bh(dev);
@@ -615,7 +716,7 @@ static int __dev_mc_del(struct net_device *dev, const unsigned char *addr,
netif_addr_lock_bh(dev);
err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len,
- NETDEV_HW_ADDR_T_MULTICAST, global);
+ NETDEV_HW_ADDR_T_MULTICAST, global, false);
if (!err)
__dev_set_rx_mode(dev);
netif_addr_unlock_bh(dev);
@@ -679,6 +780,36 @@ int dev_mc_sync(struct net_device *to, struct net_device *from)
EXPORT_SYMBOL(dev_mc_sync);
/**
+ * dev_mc_sync_multiple - Synchronize device's unicast list to another
+ * device, but allow for multiple calls to sync to multiple devices.
+ * @to: destination device
+ * @from: source device
+ *
+ * Add newly added addresses to the destination device and release
+ * addresses that have no users left. The source device must be
+ * locked by netif_addr_lock_bh.
+ *
+ * This function is intended to be called from the ndo_set_rx_mode
+ * function of layered software devices. It allows for a single
+ * source device to be synced to multiple destination devices.
+ */
+int dev_mc_sync_multiple(struct net_device *to, struct net_device *from)
+{
+ int err = 0;
+
+ if (to->addr_len != from->addr_len)
+ return -EINVAL;
+
+ netif_addr_lock_nested(to);
+ err = __hw_addr_sync_multiple(&to->mc, &from->mc, to->addr_len);
+ if (!err)
+ __dev_set_rx_mode(to);
+ netif_addr_unlock(to);
+ return err;
+}
+EXPORT_SYMBOL(dev_mc_sync_multiple);
+
+/**
* dev_mc_unsync - Remove synchronized addresses from the destination device
* @to: destination device
* @from: source device
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 6cc0481faade..5b7d0e1d0664 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -19,9 +19,8 @@
static int dev_ifname(struct net *net, struct ifreq __user *arg)
{
- struct net_device *dev;
struct ifreq ifr;
- unsigned seq;
+ int error;
/*
* Fetch the caller's info block.
@@ -30,19 +29,9 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
return -EFAULT;
-retry:
- seq = read_seqcount_begin(&devnet_rename_seq);
- rcu_read_lock();
- dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
- if (!dev) {
- rcu_read_unlock();
- return -ENODEV;
- }
-
- strcpy(ifr.ifr_name, dev->name);
- rcu_read_unlock();
- if (read_seqcount_retry(&devnet_rename_seq, seq))
- goto retry;
+ error = netdev_get_name(net, ifr.ifr_name, ifr.ifr_ifindex);
+ if (error)
+ return error;
if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
return -EFAULT;
diff --git a/net/core/dst.c b/net/core/dst.c
index 35fd12f1a69c..df9cc810ec8e 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -320,27 +320,28 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
EXPORT_SYMBOL(__dst_destroy_metrics_generic);
/**
- * skb_dst_set_noref - sets skb dst, without a reference
+ * __skb_dst_set_noref - sets skb dst, without a reference
* @skb: buffer
* @dst: dst entry
+ * @force: if force is set, use noref version even for DST_NOCACHE entries
*
* Sets skb dst, assuming a reference was not taken on dst
* skb_dst_drop() should not dst_release() this dst
*/
-void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
+void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, bool force)
{
WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
/* If dst not in cache, we must take a reference, because
* dst_release() will destroy dst as soon as its refcount becomes zero
*/
- if (unlikely(dst->flags & DST_NOCACHE)) {
+ if (unlikely((dst->flags & DST_NOCACHE) && !force)) {
dst_hold(dst);
skb_dst_set(skb, dst);
} else {
skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
}
}
-EXPORT_SYMBOL(skb_dst_set_noref);
+EXPORT_SYMBOL(__skb_dst_set_noref);
/* Dirty hack. We did it in 2.2 (in __dst_free),
* we have _very_ good reasons not to repeat
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 3e9b2c3e30f0..ce91766eeca9 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -60,10 +60,13 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6",
[NETIF_F_HIGHDMA_BIT] = "highdma",
[NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist",
- [NETIF_F_HW_VLAN_TX_BIT] = "tx-vlan-hw-insert",
+ [NETIF_F_HW_VLAN_CTAG_TX_BIT] = "tx-vlan-hw-insert",
- [NETIF_F_HW_VLAN_RX_BIT] = "rx-vlan-hw-parse",
- [NETIF_F_HW_VLAN_FILTER_BIT] = "rx-vlan-filter",
+ [NETIF_F_HW_VLAN_CTAG_RX_BIT] = "rx-vlan-hw-parse",
+ [NETIF_F_HW_VLAN_CTAG_FILTER_BIT] = "rx-vlan-filter",
+ [NETIF_F_HW_VLAN_STAG_TX_BIT] = "tx-vlan-stag-hw-insert",
+ [NETIF_F_HW_VLAN_STAG_RX_BIT] = "rx-vlan-stag-hw-parse",
+ [NETIF_F_HW_VLAN_STAG_FILTER_BIT] = "rx-vlan-stag-filter",
[NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged",
[NETIF_F_GSO_BIT] = "tx-generic-segmentation",
[NETIF_F_LLTX_BIT] = "tx-lockless",
@@ -78,6 +81,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation",
[NETIF_F_FSO_BIT] = "tx-fcoe-segmentation",
[NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation",
+ [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
[NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp",
@@ -266,18 +270,19 @@ static int ethtool_set_one_feature(struct net_device *dev,
#define ETH_ALL_FLAGS (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | \
ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH)
-#define ETH_ALL_FEATURES (NETIF_F_LRO | NETIF_F_HW_VLAN_RX | \
- NETIF_F_HW_VLAN_TX | NETIF_F_NTUPLE | NETIF_F_RXHASH)
+#define ETH_ALL_FEATURES (NETIF_F_LRO | NETIF_F_HW_VLAN_CTAG_RX | \
+ NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_NTUPLE | \
+ NETIF_F_RXHASH)
static u32 __ethtool_get_flags(struct net_device *dev)
{
u32 flags = 0;
- if (dev->features & NETIF_F_LRO) flags |= ETH_FLAG_LRO;
- if (dev->features & NETIF_F_HW_VLAN_RX) flags |= ETH_FLAG_RXVLAN;
- if (dev->features & NETIF_F_HW_VLAN_TX) flags |= ETH_FLAG_TXVLAN;
- if (dev->features & NETIF_F_NTUPLE) flags |= ETH_FLAG_NTUPLE;
- if (dev->features & NETIF_F_RXHASH) flags |= ETH_FLAG_RXHASH;
+ if (dev->features & NETIF_F_LRO) flags |= ETH_FLAG_LRO;
+ if (dev->features & NETIF_F_HW_VLAN_CTAG_RX) flags |= ETH_FLAG_RXVLAN;
+ if (dev->features & NETIF_F_HW_VLAN_CTAG_TX) flags |= ETH_FLAG_TXVLAN;
+ if (dev->features & NETIF_F_NTUPLE) flags |= ETH_FLAG_NTUPLE;
+ if (dev->features & NETIF_F_RXHASH) flags |= ETH_FLAG_RXHASH;
return flags;
}
@@ -290,8 +295,8 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data)
return -EINVAL;
if (data & ETH_FLAG_LRO) features |= NETIF_F_LRO;
- if (data & ETH_FLAG_RXVLAN) features |= NETIF_F_HW_VLAN_RX;
- if (data & ETH_FLAG_TXVLAN) features |= NETIF_F_HW_VLAN_TX;
+ if (data & ETH_FLAG_RXVLAN) features |= NETIF_F_HW_VLAN_CTAG_RX;
+ if (data & ETH_FLAG_TXVLAN) features |= NETIF_F_HW_VLAN_CTAG_TX;
if (data & ETH_FLAG_NTUPLE) features |= NETIF_F_NTUPLE;
if (data & ETH_FLAG_RXHASH) features |= NETIF_F_RXHASH;
@@ -1416,7 +1421,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
void __user *useraddr = ifr->ifr_data;
u32 ethcmd;
int rc;
- u32 old_features;
+ netdev_features_t old_features;
if (!dev || !netif_device_present(dev))
return -ENODEV;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 58a4ba27dfe3..d5a9f8ead0d8 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -266,7 +266,7 @@ errout:
return err;
}
-static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
{
struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
@@ -415,7 +415,7 @@ errout:
return err;
}
-static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
{
struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
diff --git a/net/core/filter.c b/net/core/filter.c
index 2e20b55a7830..6438f29ff266 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -348,6 +348,9 @@ load_b:
case BPF_S_ANC_VLAN_TAG_PRESENT:
A = !!vlan_tx_tag_present(skb);
continue;
+ case BPF_S_ANC_PAY_OFFSET:
+ A = __skb_get_poff(skb);
+ continue;
case BPF_S_ANC_NLATTR: {
struct nlattr *nla;
@@ -612,6 +615,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
ANCILLARY(ALU_XOR_X);
ANCILLARY(VLAN_TAG);
ANCILLARY(VLAN_TAG_PRESENT);
+ ANCILLARY(PAY_OFFSET);
}
/* ancillary operation unknown or unsupported */
@@ -774,7 +778,7 @@ int sk_detach_filter(struct sock *sk)
}
EXPORT_SYMBOL_GPL(sk_detach_filter);
-static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
+void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
{
static const u16 decodes[] = {
[BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K,
@@ -814,6 +818,7 @@ static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
[BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS,
[BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS,
[BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS,
[BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN,
[BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND,
[BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND,
diff --git a/net/core/flow.c b/net/core/flow.c
index 2bfd081c59f7..7102f166482d 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -323,6 +323,24 @@ static void flow_cache_flush_tasklet(unsigned long data)
complete(&info->completion);
}
+/*
+ * Return whether a cpu needs flushing. Conservatively, we assume
+ * the presence of any entries means the core may require flushing,
+ * since the flow_cache_ops.check() function may assume it's running
+ * on the same core as the per-cpu cache component.
+ */
+static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu)
+{
+ struct flow_cache_percpu *fcp;
+ int i;
+
+ fcp = per_cpu_ptr(fc->percpu, cpu);
+ for (i = 0; i < flow_cache_hash_size(fc); i++)
+ if (!hlist_empty(&fcp->hash_table[i]))
+ return 0;
+ return 1;
+}
+
static void flow_cache_flush_per_cpu(void *data)
{
struct flow_flush_info *info = data;
@@ -337,22 +355,40 @@ void flow_cache_flush(void)
{
struct flow_flush_info info;
static DEFINE_MUTEX(flow_flush_sem);
+ cpumask_var_t mask;
+ int i, self;
+
+ /* Track which cpus need flushing to avoid disturbing all cores. */
+ if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+ return;
+ cpumask_clear(mask);
/* Don't want cpus going down or up during this. */
get_online_cpus();
mutex_lock(&flow_flush_sem);
info.cache = &flow_cache_global;
- atomic_set(&info.cpuleft, num_online_cpus());
+ for_each_online_cpu(i)
+ if (!flow_cache_percpu_empty(info.cache, i))
+ cpumask_set_cpu(i, mask);
+ atomic_set(&info.cpuleft, cpumask_weight(mask));
+ if (atomic_read(&info.cpuleft) == 0)
+ goto done;
+
init_completion(&info.completion);
local_bh_disable();
- smp_call_function(flow_cache_flush_per_cpu, &info, 0);
- flow_cache_flush_tasklet((unsigned long)&info);
+ self = cpumask_test_and_clear_cpu(smp_processor_id(), mask);
+ on_each_cpu_mask(mask, flow_cache_flush_per_cpu, &info, 0);
+ if (self)
+ flow_cache_flush_tasklet((unsigned long)&info);
local_bh_enable();
wait_for_completion(&info.completion);
+
+done:
mutex_unlock(&flow_flush_sem);
put_online_cpus();
+ free_cpumask_var(mask);
}
static void flow_cache_flush_task(struct work_struct *work)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index e187bf06d673..00ee068efc1c 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -5,6 +5,10 @@
#include <linux/if_vlan.h>
#include <net/ip.h>
#include <net/ipv6.h>
+#include <linux/igmp.h>
+#include <linux/icmp.h>
+#include <linux/sctp.h>
+#include <linux/dccp.h>
#include <linux/if_tunnel.h>
#include <linux/if_pppox.h>
#include <linux/ppp_defs.h>
@@ -119,6 +123,17 @@ ipv6:
nhoff += 4;
if (hdr->flags & GRE_SEQ)
nhoff += 4;
+ if (proto == htons(ETH_P_TEB)) {
+ const struct ethhdr *eth;
+ struct ethhdr _eth;
+
+ eth = skb_header_pointer(skb, nhoff,
+ sizeof(_eth), &_eth);
+ if (!eth)
+ return false;
+ proto = eth->h_proto;
+ nhoff += sizeof(*eth);
+ }
goto again;
}
break;
@@ -217,6 +232,59 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
}
EXPORT_SYMBOL(__skb_tx_hash);
+/* __skb_get_poff() returns the offset to the payload as far as it could
+ * be dissected. The main user is currently BPF, so that we can dynamically
+ * truncate packets without needing to push actual payload to the user
+ * space and can analyze headers only, instead.
+ */
+u32 __skb_get_poff(const struct sk_buff *skb)
+{
+ struct flow_keys keys;
+ u32 poff = 0;
+
+ if (!skb_flow_dissect(skb, &keys))
+ return 0;
+
+ poff += keys.thoff;
+ switch (keys.ip_proto) {
+ case IPPROTO_TCP: {
+ const struct tcphdr *tcph;
+ struct tcphdr _tcph;
+
+ tcph = skb_header_pointer(skb, poff, sizeof(_tcph), &_tcph);
+ if (!tcph)
+ return poff;
+
+ poff += max_t(u32, sizeof(struct tcphdr), tcph->doff * 4);
+ break;
+ }
+ case IPPROTO_UDP:
+ case IPPROTO_UDPLITE:
+ poff += sizeof(struct udphdr);
+ break;
+ /* For the rest, we do not really care about header
+ * extensions at this point for now.
+ */
+ case IPPROTO_ICMP:
+ poff += sizeof(struct icmphdr);
+ break;
+ case IPPROTO_ICMPV6:
+ poff += sizeof(struct icmp6hdr);
+ break;
+ case IPPROTO_IGMP:
+ poff += sizeof(struct igmphdr);
+ break;
+ case IPPROTO_DCCP:
+ poff += sizeof(struct dccp_hdr);
+ break;
+ case IPPROTO_SCTP:
+ poff += sizeof(struct sctphdr);
+ break;
+ }
+
+ return poff;
+}
+
static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
{
if (unlikely(queue_index >= dev->real_num_tx_queues)) {
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 7e7aeb01de45..de178e462682 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -75,31 +75,6 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a
/*
* Copy kernel to iovec. Returns -EFAULT on error.
- *
- * Note: this modifies the original iovec.
- */
-
-int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
-{
- while (len > 0) {
- if (iov->iov_len) {
- int copy = min_t(unsigned int, iov->iov_len, len);
- if (copy_to_user(iov->iov_base, kdata, copy))
- return -EFAULT;
- kdata += copy;
- len -= copy;
- iov->iov_len -= copy;
- iov->iov_base += copy;
- }
- iov++;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(memcpy_toiovec);
-
-/*
- * Copy kernel to iovec. Returns -EFAULT on error.
*/
int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
@@ -125,31 +100,6 @@ int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
EXPORT_SYMBOL(memcpy_toiovecend);
/*
- * Copy iovec to kernel. Returns -EFAULT on error.
- *
- * Note: this modifies the original iovec.
- */
-
-int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
-{
- while (len > 0) {
- if (iov->iov_len) {
- int copy = min_t(unsigned int, len, iov->iov_len);
- if (copy_from_user(kdata, iov->iov_base, copy))
- return -EFAULT;
- len -= copy;
- kdata += copy;
- iov->iov_base += copy;
- iov->iov_len -= copy;
- }
- iov++;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(memcpy_fromiovec);
-
-/*
* Copy iovec from kernel. Returns -EFAULT on error.
*/
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 3863b8f639c5..5c56b217b999 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -39,21 +39,13 @@
#include <linux/string.h>
#include <linux/log2.h>
+#define DEBUG
#define NEIGH_DEBUG 1
-
-#define NEIGH_PRINTK(x...) printk(x)
-#define NEIGH_NOPRINTK(x...) do { ; } while(0)
-#define NEIGH_PRINTK1 NEIGH_NOPRINTK
-#define NEIGH_PRINTK2 NEIGH_NOPRINTK
-
-#if NEIGH_DEBUG >= 1
-#undef NEIGH_PRINTK1
-#define NEIGH_PRINTK1 NEIGH_PRINTK
-#endif
-#if NEIGH_DEBUG >= 2
-#undef NEIGH_PRINTK2
-#define NEIGH_PRINTK2 NEIGH_PRINTK
-#endif
+#define neigh_dbg(level, fmt, ...) \
+do { \
+ if (level <= NEIGH_DEBUG) \
+ pr_debug(fmt, ##__VA_ARGS__); \
+} while (0)
#define PNEIGH_HASHMASK 0xF
@@ -246,7 +238,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
n->nud_state = NUD_NOARP;
else
n->nud_state = NUD_NONE;
- NEIGH_PRINTK2("neigh %p is stray.\n", n);
+ neigh_dbg(2, "neigh %p is stray\n", n);
}
write_unlock(&n->lock);
neigh_cleanup_and_release(n);
@@ -542,7 +534,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
lockdep_is_held(&tbl->lock)));
rcu_assign_pointer(nht->hash_buckets[hash_val], n);
write_unlock_bh(&tbl->lock);
- NEIGH_PRINTK2("neigh %p is created.\n", n);
+ neigh_dbg(2, "neigh %p is created\n", n);
rc = n;
out:
return rc;
@@ -725,7 +717,7 @@ void neigh_destroy(struct neighbour *neigh)
dev_put(dev);
neigh_parms_put(neigh->parms);
- NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
+ neigh_dbg(2, "neigh %p is destroyed\n", neigh);
atomic_dec(&neigh->tbl->entries);
kfree_rcu(neigh, rcu);
@@ -739,7 +731,7 @@ EXPORT_SYMBOL(neigh_destroy);
*/
static void neigh_suspect(struct neighbour *neigh)
{
- NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
+ neigh_dbg(2, "neigh %p is suspected\n", neigh);
neigh->output = neigh->ops->output;
}
@@ -751,7 +743,7 @@ static void neigh_suspect(struct neighbour *neigh)
*/
static void neigh_connect(struct neighbour *neigh)
{
- NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
+ neigh_dbg(2, "neigh %p is connected\n", neigh);
neigh->output = neigh->ops->connected_output;
}
@@ -852,7 +844,7 @@ static void neigh_invalidate(struct neighbour *neigh)
struct sk_buff *skb;
NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
- NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
+ neigh_dbg(2, "neigh %p is failed\n", neigh);
neigh->updated = jiffies;
/* It is very thin place. report_unreachable is very complicated
@@ -904,17 +896,17 @@ static void neigh_timer_handler(unsigned long arg)
if (state & NUD_REACHABLE) {
if (time_before_eq(now,
neigh->confirmed + neigh->parms->reachable_time)) {
- NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
+ neigh_dbg(2, "neigh %p is still alive\n", neigh);
next = neigh->confirmed + neigh->parms->reachable_time;
} else if (time_before_eq(now,
neigh->used + neigh->parms->delay_probe_time)) {
- NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
+ neigh_dbg(2, "neigh %p is delayed\n", neigh);
neigh->nud_state = NUD_DELAY;
neigh->updated = jiffies;
neigh_suspect(neigh);
next = now + neigh->parms->delay_probe_time;
} else {
- NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
+ neigh_dbg(2, "neigh %p is suspected\n", neigh);
neigh->nud_state = NUD_STALE;
neigh->updated = jiffies;
neigh_suspect(neigh);
@@ -923,14 +915,14 @@ static void neigh_timer_handler(unsigned long arg)
} else if (state & NUD_DELAY) {
if (time_before_eq(now,
neigh->confirmed + neigh->parms->delay_probe_time)) {
- NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
+ neigh_dbg(2, "neigh %p is now reachable\n", neigh);
neigh->nud_state = NUD_REACHABLE;
neigh->updated = jiffies;
neigh_connect(neigh);
notify = 1;
next = neigh->confirmed + neigh->parms->reachable_time;
} else {
- NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
+ neigh_dbg(2, "neigh %p is probed\n", neigh);
neigh->nud_state = NUD_PROBE;
neigh->updated = jiffies;
atomic_set(&neigh->probes, 0);
@@ -997,7 +989,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
return 1;
}
} else if (neigh->nud_state & NUD_STALE) {
- NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
+ neigh_dbg(2, "neigh %p is delayed\n", neigh);
neigh->nud_state = NUD_DELAY;
neigh->updated = jiffies;
neigh_add_timer(neigh,
@@ -1320,8 +1312,7 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
out:
return rc;
discard:
- NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
- dst, neigh);
+ neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh);
out_kfree_skb:
rc = -EINVAL;
kfree_skb(skb);
@@ -1498,7 +1489,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
}
}
write_unlock_bh(&tbl->lock);
- NEIGH_PRINTK1("neigh_parms_release: not found\n");
+ neigh_dbg(1, "%s: not found\n", __func__);
}
EXPORT_SYMBOL(neigh_parms_release);
@@ -1613,7 +1604,7 @@ int neigh_table_clear(struct neigh_table *tbl)
}
EXPORT_SYMBOL(neigh_table_clear);
-static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct ndmsg *ndm;
@@ -1677,7 +1668,7 @@ out:
return err;
}
-static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct ndmsg *ndm;
@@ -1955,7 +1946,7 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
[NDTPA_LOCKTIME] = { .type = NLA_U64 },
};
-static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct neigh_table *tbl;
@@ -2714,7 +2705,7 @@ static int neigh_stat_seq_open(struct inode *inode, struct file *file)
if (!ret) {
struct seq_file *sf = file->private_data;
- sf->private = PDE(inode)->data;
+ sf->private = PDE_DATA(inode);
}
return ret;
};
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 3174f1998ee6..569d355fec3e 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -271,7 +271,7 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
else
seq_printf(seq, "%04x", ntohs(pt->type));
- seq_printf(seq, " %-8s %pF\n",
+ seq_printf(seq, " %-8s %pf\n",
pt->dev ? pt->dev->name : "", pt->func);
}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 7427ab5e27d8..981fed397d1d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -606,21 +606,11 @@ static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
return sprintf(buf, "%lu\n", val);
}
-static void rps_dev_flow_table_release_work(struct work_struct *work)
-{
- struct rps_dev_flow_table *table = container_of(work,
- struct rps_dev_flow_table, free_work);
-
- vfree(table);
-}
-
static void rps_dev_flow_table_release(struct rcu_head *rcu)
{
struct rps_dev_flow_table *table = container_of(rcu,
struct rps_dev_flow_table, rcu);
-
- INIT_WORK(&table->free_work, rps_dev_flow_table_release_work);
- schedule_work(&table->free_work);
+ vfree(table);
}
static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 80e271d9e64b..f97652036754 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -10,7 +10,8 @@
#include <linux/idr.h>
#include <linux/rculist.h>
#include <linux/nsproxy.h>
-#include <linux/proc_fs.h>
+#include <linux/fs.h>
+#include <linux/proc_ns.h>
#include <linux/file.h>
#include <linux/export.h>
#include <linux/user_namespace.h>
@@ -336,7 +337,7 @@ EXPORT_SYMBOL_GPL(__put_net);
struct net *get_net_ns_by_fd(int fd)
{
- struct proc_inode *ei;
+ struct proc_ns *ei;
struct file *file;
struct net *net;
@@ -344,7 +345,7 @@ struct net *get_net_ns_by_fd(int fd)
if (IS_ERR(file))
return ERR_CAST(file);
- ei = PROC_I(file_inode(file));
+ ei = get_proc_ns(file_inode(file));
if (ei->ns_ops == &netns_operations)
net = get_net(ei->ns);
else
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index fa32899006a2..35a9f0804b6f 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -12,6 +12,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/moduleparam.h>
+#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/string.h>
@@ -47,7 +48,7 @@ static struct sk_buff_head skb_pool;
static atomic_t trapped;
-static struct srcu_struct netpoll_srcu;
+DEFINE_STATIC_SRCU(netpoll_srcu);
#define USEC_PER_POLL 50
#define NETPOLL_RX_ENABLED 1
@@ -206,17 +207,17 @@ static void netpoll_poll_dev(struct net_device *dev)
* the dev_open/close paths use this to block netpoll activity
* while changing device state
*/
- if (!mutex_trylock(&ni->dev_lock))
+ if (down_trylock(&ni->dev_lock))
return;
if (!netif_running(dev)) {
- mutex_unlock(&ni->dev_lock);
+ up(&ni->dev_lock);
return;
}
ops = dev->netdev_ops;
if (!ops->ndo_poll_controller) {
- mutex_unlock(&ni->dev_lock);
+ up(&ni->dev_lock);
return;
}
@@ -225,7 +226,7 @@ static void netpoll_poll_dev(struct net_device *dev)
poll_napi(dev);
- mutex_unlock(&ni->dev_lock);
+ up(&ni->dev_lock);
if (dev->flags & IFF_SLAVE) {
if (ni) {
@@ -255,7 +256,7 @@ int netpoll_rx_disable(struct net_device *dev)
idx = srcu_read_lock(&netpoll_srcu);
ni = srcu_dereference(dev->npinfo, &netpoll_srcu);
if (ni)
- mutex_lock(&ni->dev_lock);
+ down(&ni->dev_lock);
srcu_read_unlock(&netpoll_srcu, idx);
return 0;
}
@@ -267,7 +268,7 @@ void netpoll_rx_enable(struct net_device *dev)
rcu_read_lock();
ni = rcu_dereference(dev->npinfo);
if (ni)
- mutex_unlock(&ni->dev_lock);
+ up(&ni->dev_lock);
rcu_read_unlock();
}
EXPORT_SYMBOL(netpoll_rx_enable);
@@ -383,8 +384,9 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
if (__netif_tx_trylock(txq)) {
if (!netif_xmit_stopped(txq)) {
if (vlan_tx_tag_present(skb) &&
- !(netif_skb_features(skb) & NETIF_F_HW_VLAN_TX)) {
- skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+ !vlan_hw_offload_capable(netif_skb_features(skb),
+ skb->vlan_proto)) {
+ skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb));
if (unlikely(!skb))
break;
skb->vlan_tci = 0;
@@ -1046,7 +1048,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
INIT_LIST_HEAD(&npinfo->rx_np);
spin_lock_init(&npinfo->rx_lock);
- mutex_init(&npinfo->dev_lock);
+ sema_init(&npinfo->dev_lock, 1);
skb_queue_head_init(&npinfo->neigh_tx);
skb_queue_head_init(&npinfo->txq);
INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
@@ -1212,7 +1214,6 @@ EXPORT_SYMBOL(netpoll_setup);
static int __init netpoll_init(void)
{
skb_queue_head_init(&skb_pool);
- init_srcu_struct(&netpoll_srcu);
return 0;
}
core_initcall(netpoll_init);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 6048fc1da1c2..11f2704c3810 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -508,7 +508,7 @@ out:
static int pgctrl_open(struct inode *inode, struct file *file)
{
- return single_open(file, pgctrl_show, PDE(inode)->data);
+ return single_open(file, pgctrl_show, PDE_DATA(inode));
}
static const struct file_operations pktgen_fops = {
@@ -1685,7 +1685,7 @@ static ssize_t pktgen_if_write(struct file *file,
static int pktgen_if_open(struct inode *inode, struct file *file)
{
- return single_open(file, pktgen_if_show, PDE(inode)->data);
+ return single_open(file, pktgen_if_show, PDE_DATA(inode));
}
static const struct file_operations pktgen_if_fops = {
@@ -1823,7 +1823,7 @@ out:
static int pktgen_thread_open(struct inode *inode, struct file *file)
{
- return single_open(file, pktgen_thread_show, PDE(inode)->data);
+ return single_open(file, pktgen_thread_show, PDE_DATA(inode));
}
static const struct file_operations pktgen_thread_fops = {
@@ -1904,7 +1904,7 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
if (pkt_dev->odev != dev)
continue;
- remove_proc_entry(pkt_dev->entry->name, pn->proc_dir);
+ proc_remove(pkt_dev->entry);
pkt_dev->entry = proc_create_data(dev->name, 0600,
pn->proc_dir,
@@ -2198,7 +2198,7 @@ static inline int f_pick(struct pktgen_dev *pkt_dev)
pkt_dev->curfl = 0; /*reset */
}
} else {
- flow = random32() % pkt_dev->cflows;
+ flow = prandom_u32() % pkt_dev->cflows;
pkt_dev->curfl = flow;
if (pkt_dev->flows[flow].count > pkt_dev->lflow) {
@@ -2246,7 +2246,7 @@ static void set_cur_queue_map(struct pktgen_dev *pkt_dev)
else if (pkt_dev->queue_map_min <= pkt_dev->queue_map_max) {
__u16 t;
if (pkt_dev->flags & F_QUEUE_MAP_RND) {
- t = random32() %
+ t = prandom_u32() %
(pkt_dev->queue_map_max -
pkt_dev->queue_map_min + 1)
+ pkt_dev->queue_map_min;
@@ -2278,7 +2278,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
__u32 tmp;
if (pkt_dev->flags & F_MACSRC_RND)
- mc = random32() % pkt_dev->src_mac_count;
+ mc = prandom_u32() % pkt_dev->src_mac_count;
else {
mc = pkt_dev->cur_src_mac_offset++;
if (pkt_dev->cur_src_mac_offset >=
@@ -2304,7 +2304,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
__u32 tmp;
if (pkt_dev->flags & F_MACDST_RND)
- mc = random32() % pkt_dev->dst_mac_count;
+ mc = prandom_u32() % pkt_dev->dst_mac_count;
else {
mc = pkt_dev->cur_dst_mac_offset++;
@@ -2331,21 +2331,21 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
for (i = 0; i < pkt_dev->nr_labels; i++)
if (pkt_dev->labels[i] & MPLS_STACK_BOTTOM)
pkt_dev->labels[i] = MPLS_STACK_BOTTOM |
- ((__force __be32)random32() &
+ ((__force __be32)prandom_u32() &
htonl(0x000fffff));
}
if ((pkt_dev->flags & F_VID_RND) && (pkt_dev->vlan_id != 0xffff)) {
- pkt_dev->vlan_id = random32() & (4096-1);
+ pkt_dev->vlan_id = prandom_u32() & (4096 - 1);
}
if ((pkt_dev->flags & F_SVID_RND) && (pkt_dev->svlan_id != 0xffff)) {
- pkt_dev->svlan_id = random32() & (4096 - 1);
+ pkt_dev->svlan_id = prandom_u32() & (4096 - 1);
}
if (pkt_dev->udp_src_min < pkt_dev->udp_src_max) {
if (pkt_dev->flags & F_UDPSRC_RND)
- pkt_dev->cur_udp_src = random32() %
+ pkt_dev->cur_udp_src = prandom_u32() %
(pkt_dev->udp_src_max - pkt_dev->udp_src_min)
+ pkt_dev->udp_src_min;
@@ -2358,7 +2358,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
if (pkt_dev->udp_dst_min < pkt_dev->udp_dst_max) {
if (pkt_dev->flags & F_UDPDST_RND) {
- pkt_dev->cur_udp_dst = random32() %
+ pkt_dev->cur_udp_dst = prandom_u32() %
(pkt_dev->udp_dst_max - pkt_dev->udp_dst_min)
+ pkt_dev->udp_dst_min;
} else {
@@ -2375,7 +2375,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
if (imn < imx) {
__u32 t;
if (pkt_dev->flags & F_IPSRC_RND)
- t = random32() % (imx - imn) + imn;
+ t = prandom_u32() % (imx - imn) + imn;
else {
t = ntohl(pkt_dev->cur_saddr);
t++;
@@ -2396,17 +2396,15 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
__be32 s;
if (pkt_dev->flags & F_IPDST_RND) {
- t = random32() % (imx - imn) + imn;
- s = htonl(t);
-
- while (ipv4_is_loopback(s) ||
- ipv4_is_multicast(s) ||
- ipv4_is_lbcast(s) ||
- ipv4_is_zeronet(s) ||
- ipv4_is_local_multicast(s)) {
- t = random32() % (imx - imn) + imn;
+ do {
+ t = prandom_u32() %
+ (imx - imn) + imn;
s = htonl(t);
- }
+ } while (ipv4_is_loopback(s) ||
+ ipv4_is_multicast(s) ||
+ ipv4_is_lbcast(s) ||
+ ipv4_is_zeronet(s) ||
+ ipv4_is_local_multicast(s));
pkt_dev->cur_daddr = s;
} else {
t = ntohl(pkt_dev->cur_daddr);
@@ -2437,7 +2435,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
for (i = 0; i < 4; i++) {
pkt_dev->cur_in6_daddr.s6_addr32[i] =
- (((__force __be32)random32() |
+ (((__force __be32)prandom_u32() |
pkt_dev->min_in6_daddr.s6_addr32[i]) &
pkt_dev->max_in6_daddr.s6_addr32[i]);
}
@@ -2447,7 +2445,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
if (pkt_dev->min_pkt_size < pkt_dev->max_pkt_size) {
__u32 t;
if (pkt_dev->flags & F_TXSIZE_RND) {
- t = random32() %
+ t = prandom_u32() %
(pkt_dev->max_pkt_size - pkt_dev->min_pkt_size)
+ pkt_dev->min_pkt_size;
} else {
@@ -3576,8 +3574,6 @@ static void _rem_dev_from_if_list(struct pktgen_thread *t,
static int pktgen_remove_device(struct pktgen_thread *t,
struct pktgen_dev *pkt_dev)
{
- struct pktgen_net *pn = t->net;
-
pr_debug("remove_device pkt_dev=%p\n", pkt_dev);
if (pkt_dev->running) {
@@ -3597,7 +3593,7 @@ static int pktgen_remove_device(struct pktgen_thread *t,
_rem_dev_from_if_list(t, pkt_dev);
if (pkt_dev->entry)
- remove_proc_entry(pkt_dev->entry->name, pn->proc_dir);
+ proc_remove(pkt_dev->entry);
#ifdef CONFIG_XFRM
free_SAs(pkt_dev);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 23854b51a259..a08bd2b7fe3f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -517,32 +517,6 @@ out:
return err;
}
-static const int rtm_min[RTM_NR_FAMILIES] =
-{
- [RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
- [RTM_FAM(RTM_NEWADDR)] = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
- [RTM_FAM(RTM_NEWROUTE)] = NLMSG_LENGTH(sizeof(struct rtmsg)),
- [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)),
- [RTM_FAM(RTM_NEWQDISC)] = NLMSG_LENGTH(sizeof(struct tcmsg)),
- [RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)),
- [RTM_FAM(RTM_NEWTFILTER)] = NLMSG_LENGTH(sizeof(struct tcmsg)),
- [RTM_FAM(RTM_NEWACTION)] = NLMSG_LENGTH(sizeof(struct tcamsg)),
- [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
- [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
-};
-
-static const int rta_max[RTM_NR_FAMILIES] =
-{
- [RTM_FAM(RTM_NEWLINK)] = IFLA_MAX,
- [RTM_FAM(RTM_NEWADDR)] = IFA_MAX,
- [RTM_FAM(RTM_NEWROUTE)] = RTA_MAX,
- [RTM_FAM(RTM_NEWRULE)] = FRA_MAX,
- [RTM_FAM(RTM_NEWQDISC)] = TCA_MAX,
- [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX,
- [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX,
- [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX,
-};
-
int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, int echo)
{
struct sock *rtnl = net->rtnl;
@@ -1539,7 +1513,7 @@ errout:
return err;
}
-static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
@@ -1580,7 +1554,7 @@ errout:
return err;
}
-static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
const struct rtnl_link_ops *ops;
@@ -1711,7 +1685,7 @@ static int rtnl_group_changelink(struct net *net, int group,
return 0;
}
-static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
const struct rtnl_link_ops *ops;
@@ -1866,7 +1840,7 @@ out:
}
}
-static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh)
{
struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
@@ -1957,8 +1931,11 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
if (rtnl_msg_handlers[idx] == NULL ||
rtnl_msg_handlers[idx][type].dumpit == NULL)
continue;
- if (idx > s_idx)
+ if (idx > s_idx) {
memset(&cb->args[0], 0, sizeof(cb->args));
+ cb->prev_seq = 0;
+ cb->seq = 0;
+ }
if (rtnl_msg_handlers[idx][type].dumpit(skb, cb))
break;
}
@@ -2051,7 +2028,39 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
}
-static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+/**
+ * ndo_dflt_fdb_add - default netdevice operation to add an FDB entry
+ */
+int ndo_dflt_fdb_add(struct ndmsg *ndm,
+ struct nlattr *tb[],
+ struct net_device *dev,
+ const unsigned char *addr,
+ u16 flags)
+{
+ int err = -EINVAL;
+
+ /* If aging addresses are supported device will need to
+ * implement its own handler for this.
+ */
+ if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
+ pr_info("%s: FDB only supports static addresses\n", dev->name);
+ return err;
+ }
+
+ if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
+ err = dev_uc_add_excl(dev, addr);
+ else if (is_multicast_ether_addr(addr))
+ err = dev_mc_add_excl(dev, addr);
+
+ /* Only return duplicate errors if NLM_F_EXCL is set */
+ if (err == -EEXIST && !(flags & NLM_F_EXCL))
+ err = 0;
+
+ return err;
+}
+EXPORT_SYMBOL(ndo_dflt_fdb_add);
+
+static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct ndmsg *ndm;
@@ -2082,7 +2091,7 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
}
addr = nla_data(tb[NDA_LLADDR]);
- if (!is_valid_ether_addr(addr)) {
+ if (is_zero_ether_addr(addr)) {
pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ether address\n");
return -EINVAL;
}
@@ -2103,10 +2112,13 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
}
/* Embedded bridge, macvlan, and any other device support */
- if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_add) {
- err = dev->netdev_ops->ndo_fdb_add(ndm, tb,
- dev, addr,
- nlh->nlmsg_flags);
+ if ((ndm->ndm_flags & NTF_SELF)) {
+ if (dev->netdev_ops->ndo_fdb_add)
+ err = dev->netdev_ops->ndo_fdb_add(ndm, tb, dev, addr,
+ nlh->nlmsg_flags);
+ else
+ err = ndo_dflt_fdb_add(ndm, tb, dev, addr,
+ nlh->nlmsg_flags);
if (!err) {
rtnl_fdb_notify(dev, addr, RTM_NEWNEIGH);
@@ -2117,7 +2129,36 @@ out:
return err;
}
-static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+/**
+ * ndo_dflt_fdb_del - default netdevice operation to delete an FDB entry
+ */
+int ndo_dflt_fdb_del(struct ndmsg *ndm,
+ struct nlattr *tb[],
+ struct net_device *dev,
+ const unsigned char *addr)
+{
+ int err = -EOPNOTSUPP;
+
+ /* If aging addresses are supported device will need to
+ * implement its own handler for this.
+ */
+ if (ndm->ndm_state & NUD_PERMANENT) {
+ pr_info("%s: FDB only supports static addresses\n", dev->name);
+ return -EINVAL;
+ }
+
+ if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
+ err = dev_uc_del(dev, addr);
+ else if (is_multicast_ether_addr(addr))
+ err = dev_mc_del(dev, addr);
+ else
+ err = -EINVAL;
+
+ return err;
+}
+EXPORT_SYMBOL(ndo_dflt_fdb_del);
+
+static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct ndmsg *ndm;
@@ -2151,7 +2192,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
}
addr = nla_data(tb[NDA_LLADDR]);
- if (!is_valid_ether_addr(addr)) {
+ if (is_zero_ether_addr(addr)) {
pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ether address\n");
return -EINVAL;
}
@@ -2174,8 +2215,11 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
}
/* Embedded bridge, macvlan, and any other device support */
- if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_del) {
- err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr);
+ if (ndm->ndm_flags & NTF_SELF) {
+ if (dev->netdev_ops->ndo_fdb_del)
+ err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr);
+ else
+ err = ndo_dflt_fdb_del(ndm, tb, dev, addr);
if (!err) {
rtnl_fdb_notify(dev, addr, RTM_DELNEIGH);
@@ -2220,7 +2264,7 @@ skip:
* @dev: netdevice
*
* Default netdevice operation to dump the existing unicast address list.
- * Returns zero on success.
+ * Returns number of addresses from list put in skb.
*/
int ndo_dflt_fdb_dump(struct sk_buff *skb,
struct netlink_callback *cb,
@@ -2260,6 +2304,8 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (dev->netdev_ops->ndo_fdb_dump)
idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, idx);
+ else
+ idx = ndo_dflt_fdb_dump(skb, cb, dev, idx);
}
rcu_read_unlock();
@@ -2411,8 +2457,7 @@ errout:
return err;
}
-static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
- void *arg)
+static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
@@ -2482,8 +2527,7 @@ out:
return err;
}
-static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
- void *arg)
+static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
@@ -2553,10 +2597,6 @@ out:
return err;
}
-/* Protected by RTNL sempahore. */
-static struct rtattr **rta_buf;
-static int rtattr_max;
-
/* Process one rtnetlink message. */
static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
@@ -2564,7 +2604,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
struct net *net = sock_net(skb->sk);
rtnl_doit_func doit;
int sz_idx, kind;
- int min_len;
int family;
int type;
int err;
@@ -2576,10 +2615,10 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
type -= RTM_BASE;
/* All the messages must have at least 1 byte length */
- if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg)))
+ if (nlmsg_len(nlh) < sizeof(struct rtgenmsg))
return 0;
- family = ((struct rtgenmsg *)NLMSG_DATA(nlh))->rtgen_family;
+ family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
sz_idx = type>>2;
kind = type&3;
@@ -2612,32 +2651,11 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
return err;
}
- memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
-
- min_len = rtm_min[sz_idx];
- if (nlh->nlmsg_len < min_len)
- return -EINVAL;
-
- if (nlh->nlmsg_len > min_len) {
- int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
- struct rtattr *attr = (void *)nlh + NLMSG_ALIGN(min_len);
-
- while (RTA_OK(attr, attrlen)) {
- unsigned int flavor = attr->rta_type & NLA_TYPE_MASK;
- if (flavor) {
- if (flavor > rta_max[sz_idx])
- return -EINVAL;
- rta_buf[flavor-1] = attr;
- }
- attr = RTA_NEXT(attr, attrlen);
- }
- }
-
doit = rtnl_get_doit(family, type);
if (doit == NULL)
return -EOPNOTSUPP;
- return doit(skb, nlh, (void *)&rta_buf[0]);
+ return doit(skb, nlh);
}
static void rtnetlink_rcv(struct sk_buff *skb)
@@ -2707,16 +2725,6 @@ static struct pernet_operations rtnetlink_net_ops = {
void __init rtnetlink_init(void)
{
- int i;
-
- rtattr_max = 0;
- for (i = 0; i < ARRAY_SIZE(rta_max); i++)
- if (rta_max[i] > rtattr_max)
- rtattr_max = rta_max[i];
- rta_buf = kmalloc(rtattr_max * sizeof(struct rtattr *), GFP_KERNEL);
- if (!rta_buf)
- panic("rtnetlink_init: cannot allocate rta_buf\n");
-
if (register_pernet_subsys(&rtnetlink_net_ops))
panic("rtnetlink_init: cannot initialize rtnetlink\n");
diff --git a/net/core/scm.c b/net/core/scm.c
index 2dc6cdaaae8a..03795d0147f2 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -187,22 +187,6 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
p->creds.uid = uid;
p->creds.gid = gid;
-
- if (!p->cred ||
- !uid_eq(p->cred->euid, uid) ||
- !gid_eq(p->cred->egid, gid)) {
- struct cred *cred;
- err = -ENOMEM;
- cred = prepare_creds();
- if (!cred)
- goto error;
-
- cred->uid = cred->euid = uid;
- cred->gid = cred->egid = gid;
- if (p->cred)
- put_cred(p->cred);
- p->cred = cred;
- }
break;
}
default:
@@ -306,8 +290,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
/* Bump the usage count and install the file. */
sock = sock_from_file(fp[i], &err);
if (sock) {
- sock_update_netprioidx(sock->sk, current);
- sock_update_classid(sock->sk, current);
+ sock_update_netprioidx(sock->sk);
+ sock_update_classid(sock->sk);
}
fd_install(new_fd, get_file(fp[i]));
}
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index e61a8bb7fce7..6a2f13cee86a 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -12,12 +12,10 @@
static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
-static int __init net_secret_init(void)
+void net_secret_init(void)
{
get_random_bytes(net_secret, sizeof(net_secret));
- return 0;
}
-late_initcall(net_secret_init);
#ifdef CONFIG_INET
static u32 seq_scale(u32 seq)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 33245ef54c3b..1c1738cc4538 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -179,6 +179,33 @@ out:
*
*/
+struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node)
+{
+ struct sk_buff *skb;
+
+ /* Get the HEAD */
+ skb = kmem_cache_alloc_node(skbuff_head_cache,
+ gfp_mask & ~__GFP_DMA, node);
+ if (!skb)
+ goto out;
+
+ /*
+ * Only clear those fields we need to clear, not those that we will
+ * actually initialise below. Hence, don't put any more fields after
+ * the tail pointer in struct sk_buff!
+ */
+ memset(skb, 0, offsetof(struct sk_buff, tail));
+ skb->head = NULL;
+ skb->truesize = sizeof(struct sk_buff);
+ atomic_set(&skb->users, 1);
+
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+ skb->mac_header = ~0U;
+#endif
+out:
+ return skb;
+}
+
/**
* __alloc_skb - allocate a network buffer
* @size: size to allocate
@@ -456,15 +483,8 @@ EXPORT_SYMBOL(skb_add_rx_frag);
static void skb_drop_list(struct sk_buff **listp)
{
- struct sk_buff *list = *listp;
-
+ kfree_skb_list(*listp);
*listp = NULL;
-
- do {
- struct sk_buff *this = list;
- list = list->next;
- kfree_skb(this);
- } while (list);
}
static inline void skb_drop_fraglist(struct sk_buff *skb)
@@ -584,7 +604,8 @@ static void skb_release_head_state(struct sk_buff *skb)
static void skb_release_all(struct sk_buff *skb)
{
skb_release_head_state(skb);
- skb_release_data(skb);
+ if (likely(skb->head))
+ skb_release_data(skb);
}
/**
@@ -623,6 +644,17 @@ void kfree_skb(struct sk_buff *skb)
}
EXPORT_SYMBOL(kfree_skb);
+void kfree_skb_list(struct sk_buff *segs)
+{
+ while (segs) {
+ struct sk_buff *next = segs->next;
+
+ kfree_skb(segs);
+ segs = next;
+ }
+}
+EXPORT_SYMBOL(kfree_skb_list);
+
/**
* skb_tx_error - report an sk_buff xmit error
* @skb: buffer that triggered an error
@@ -673,6 +705,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->mac_header = old->mac_header;
new->inner_transport_header = old->inner_transport_header;
new->inner_network_header = old->inner_network_header;
+ new->inner_mac_header = old->inner_mac_header;
skb_dst_copy(new, old);
new->rxhash = old->rxhash;
new->ooo_okay = old->ooo_okay;
@@ -706,6 +739,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->tc_verd = old->tc_verd;
#endif
#endif
+ new->vlan_proto = old->vlan_proto;
new->vlan_tci = old->vlan_tci;
skb_copy_secmark(new, old);
@@ -867,6 +901,18 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
}
EXPORT_SYMBOL(skb_clone);
+static void skb_headers_offset_update(struct sk_buff *skb, int off)
+{
+ /* {transport,network,mac}_header and tail are relative to skb->head */
+ skb->transport_header += off;
+ skb->network_header += off;
+ if (skb_mac_header_was_set(skb))
+ skb->mac_header += off;
+ skb->inner_transport_header += off;
+ skb->inner_network_header += off;
+ skb->inner_mac_header += off;
+}
+
static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
{
#ifndef NET_SKBUFF_DATA_USES_OFFSET
@@ -879,13 +925,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
__copy_skb_header(new, old);
#ifndef NET_SKBUFF_DATA_USES_OFFSET
- /* {transport,network,mac}_header are relative to skb->head */
- new->transport_header += offset;
- new->network_header += offset;
- if (skb_mac_header_was_set(new))
- new->mac_header += offset;
- new->inner_transport_header += offset;
- new->inner_network_header += offset;
+ skb_headers_offset_update(new, offset);
#endif
skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
@@ -1077,14 +1117,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
#else
skb->end = skb->head + size;
#endif
- /* {transport,network,mac}_header and tail are relative to skb->head */
skb->tail += off;
- skb->transport_header += off;
- skb->network_header += off;
- if (skb_mac_header_was_set(skb))
- skb->mac_header += off;
- skb->inner_transport_header += off;
- skb->inner_network_header += off;
+ skb_headers_offset_update(skb, off);
/* Only adjust this if it actually is csum_start rather than csum */
if (skb->ip_summed == CHECKSUM_PARTIAL)
skb->csum_start += nhead;
@@ -1180,12 +1214,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
if (n->ip_summed == CHECKSUM_PARTIAL)
n->csum_start += off;
#ifdef NET_SKBUFF_DATA_USES_OFFSET
- n->transport_header += off;
- n->network_header += off;
- if (skb_mac_header_was_set(skb))
- n->mac_header += off;
- n->inner_transport_header += off;
- n->inner_network_header += off;
+ skb_headers_offset_update(n, off);
#endif
return n;
@@ -2741,12 +2770,19 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
unsigned int tnl_hlen = skb_tnl_header_len(skb);
unsigned int headroom;
unsigned int len;
+ __be16 proto;
+ bool csum;
int sg = !!(features & NETIF_F_SG);
int nfrags = skb_shinfo(skb)->nr_frags;
int err = -ENOMEM;
int i = 0;
int pos;
+ proto = skb_network_protocol(skb);
+ if (unlikely(!proto))
+ return ERR_PTR(-EINVAL);
+
+ csum = !!can_checksum_protocol(features, proto);
__skb_push(skb, doffset);
headroom = skb_headroom(skb);
pos = skb_headlen(skb);
@@ -2884,6 +2920,12 @@ skip_fraglist:
nskb->data_len = len - hsize;
nskb->len += nskb->data_len;
nskb->truesize += nskb->data_len;
+
+ if (!csum) {
+ nskb->csum = skb_checksum(nskb, doffset,
+ nskb->len - doffset, 0);
+ nskb->ip_summed = CHECKSUM_NONE;
+ }
} while ((offset += len) < skb->len);
return segs;
@@ -3289,12 +3331,8 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
if (!sk)
return;
- skb = skb_clone(orig_skb, GFP_ATOMIC);
- if (!skb)
- return;
-
if (hwtstamps) {
- *skb_hwtstamps(skb) =
+ *skb_hwtstamps(orig_skb) =
*hwtstamps;
} else {
/*
@@ -3302,9 +3340,13 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
* so keep the shared tx_flags and only
* store software time stamp
*/
- skb->tstamp = ktime_get_real();
+ orig_skb->tstamp = ktime_get_real();
}
+ skb = skb_clone(orig_skb, GFP_ATOMIC);
+ if (!skb)
+ return;
+
serr = SKB_EXT_ERR(skb);
memset(serr, 0, sizeof(*serr));
serr->ee.ee_errno = ENOMSG;
@@ -3361,6 +3403,7 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum_start = skb_headroom(skb) + start;
skb->csum_offset = off;
+ skb_set_transport_header(skb, start);
return true;
}
EXPORT_SYMBOL_GPL(skb_partial_csum_set);
diff --git a/net/core/sock.c b/net/core/sock.c
index b261a7977746..d6d024cfaaaf 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -210,7 +210,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = {
"sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
"sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
"sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
- "sk_lock-AF_NFC" , "sk_lock-AF_MAX"
+ "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX"
};
static const char *const af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
@@ -226,7 +226,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
"slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
"slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
- "slock-AF_NFC" , "slock-AF_MAX"
+ "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX"
};
static const char *const af_family_clock_key_strings[AF_MAX+1] = {
"clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
@@ -242,7 +242,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = {
"clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
"clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
"clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
- "clock-AF_NFC" , "clock-AF_MAX"
+ "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX"
};
/*
@@ -571,9 +571,7 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval,
int ret = -ENOPROTOOPT;
#ifdef CONFIG_NETDEVICES
struct net *net = sock_net(sk);
- struct net_device *dev;
char devname[IFNAMSIZ];
- unsigned seq;
if (sk->sk_bound_dev_if == 0) {
len = 0;
@@ -584,20 +582,9 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval,
if (len < IFNAMSIZ)
goto out;
-retry:
- seq = read_seqcount_begin(&devnet_rename_seq);
- rcu_read_lock();
- dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
- ret = -ENODEV;
- if (!dev) {
- rcu_read_unlock();
+ ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
+ if (ret)
goto out;
- }
-
- strcpy(devname, dev->name);
- rcu_read_unlock();
- if (read_seqcount_retry(&devnet_rename_seq, seq))
- goto retry;
len = strlen(devname) + 1;
@@ -907,6 +894,10 @@ set_rcvbuf:
sock_valbool_flag(sk, SOCK_NOFCS, valbool);
break;
+ case SO_SELECT_ERR_QUEUE:
+ sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
+ break;
+
default:
ret = -ENOPROTOOPT;
break;
@@ -1160,6 +1151,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
break;
+ case SO_SELECT_ERR_QUEUE:
+ v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
+ break;
+
default:
return -ENOPROTOOPT;
}
@@ -1209,18 +1204,6 @@ static void sock_copy(struct sock *nsk, const struct sock *osk)
#endif
}
-/*
- * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes
- * un-modified. Special care is taken when initializing object to zero.
- */
-static inline void sk_prot_clear_nulls(struct sock *sk, int size)
-{
- if (offsetof(struct sock, sk_node.next) != 0)
- memset(sk, 0, offsetof(struct sock, sk_node.next));
- memset(&sk->sk_node.pprev, 0,
- size - offsetof(struct sock, sk_node.pprev));
-}
-
void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
{
unsigned long nulls1, nulls2;
@@ -1298,13 +1281,12 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
module_put(owner);
}
-#ifdef CONFIG_CGROUPS
#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
-void sock_update_classid(struct sock *sk, struct task_struct *task)
+void sock_update_classid(struct sock *sk)
{
u32 classid;
- classid = task_cls_classid(task);
+ classid = task_cls_classid(current);
if (classid != sk->sk_classid)
sk->sk_classid = classid;
}
@@ -1312,16 +1294,15 @@ EXPORT_SYMBOL(sock_update_classid);
#endif
#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
-void sock_update_netprioidx(struct sock *sk, struct task_struct *task)
+void sock_update_netprioidx(struct sock *sk)
{
if (in_interrupt())
return;
- sk->sk_cgrp_prioidx = task_netprioidx(task);
+ sk->sk_cgrp_prioidx = task_netprioidx(current);
}
EXPORT_SYMBOL_GPL(sock_update_netprioidx);
#endif
-#endif
/**
* sk_alloc - All socket objects are allocated here
@@ -1347,8 +1328,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
sock_net_set(sk, get_net(net));
atomic_set(&sk->sk_wmem_alloc, 1);
- sock_update_classid(sk, current);
- sock_update_netprioidx(sk, current);
+ sock_update_classid(sk);
+ sock_update_netprioidx(sk);
}
return sk;
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index a29e90cf36b7..a0e9cf6379de 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -49,6 +49,44 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype)
}
EXPORT_SYMBOL_GPL(sock_diag_put_meminfo);
+int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk,
+ struct sk_buff *skb, int attrtype)
+{
+ struct nlattr *attr;
+ struct sk_filter *filter;
+ unsigned int len;
+ int err = 0;
+
+ if (!ns_capable(user_ns, CAP_NET_ADMIN)) {
+ nla_reserve(skb, attrtype, 0);
+ return 0;
+ }
+
+ rcu_read_lock();
+
+ filter = rcu_dereference(sk->sk_filter);
+ len = filter ? filter->len * sizeof(struct sock_filter) : 0;
+
+ attr = nla_reserve(skb, attrtype, len);
+ if (attr == NULL) {
+ err = -EMSGSIZE;
+ goto out;
+ }
+
+ if (filter) {
+ struct sock_filter *fb = (struct sock_filter *)nla_data(attr);
+ int i;
+
+ for (i = 0; i < filter->len; i++, fb++)
+ sk_decode_filter(&filter->insns[i], fb);
+ }
+
+out:
+ rcu_read_unlock();
+ return err;
+}
+EXPORT_SYMBOL(sock_diag_put_filterinfo);
+
void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
{
mutex_lock(&sock_diag_table_mutex);
diff --git a/net/core/utils.c b/net/core/utils.c
index e3487e461939..aa88e23fc87a 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -17,6 +17,7 @@
#include <linux/module.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
+#include <linux/ctype.h>
#include <linux/inet.h>
#include <linux/mm.h>
#include <linux/net.h>
@@ -337,27 +338,3 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
csum_unfold(*sum)));
}
EXPORT_SYMBOL(inet_proto_csum_replace16);
-
-int mac_pton(const char *s, u8 *mac)
-{
- int i;
-
- /* XX:XX:XX:XX:XX:XX */
- if (strlen(s) < 3 * ETH_ALEN - 1)
- return 0;
-
- /* Don't dirty result unless string is valid MAC. */
- for (i = 0; i < ETH_ALEN; i++) {
- if (!strchr("0123456789abcdefABCDEF", s[i * 3]))
- return 0;
- if (!strchr("0123456789abcdefABCDEF", s[i * 3 + 1]))
- return 0;
- if (i != ETH_ALEN - 1 && s[i * 3 + 2] != ':')
- return 0;
- }
- for (i = 0; i < ETH_ALEN; i++) {
- mac[i] = (hex_to_bin(s[i * 3]) << 4) | hex_to_bin(s[i * 3 + 1]);
- }
- return 1;
-}
-EXPORT_SYMBOL(mac_pton);
diff --git a/net/dcb/dcbevent.c b/net/dcb/dcbevent.c
index 1d9eb7c60a68..4f72fc40bf02 100644
--- a/net/dcb/dcbevent.c
+++ b/net/dcb/dcbevent.c
@@ -20,6 +20,7 @@
#include <linux/rtnetlink.h>
#include <linux/notifier.h>
#include <linux/export.h>
+#include <net/dcbevent.h>
static ATOMIC_NOTIFIER_HEAD(dcbevent_notif_chain);
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 21291f1abcd6..40d5829ed36a 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1658,7 +1658,7 @@ static const struct reply_func reply_funcs[DCB_CMD_MAX+1] = {
[DCB_CMD_CEE_GET] = { RTM_GETDCB, dcbnl_cee_get },
};
-static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct net_device *netdev;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 4f9f5eb478f1..ebc54fef85a5 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -500,8 +500,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
return &rt->dst;
}
-static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
- struct request_values *rv_unused)
+static int dccp_v4_send_response(struct sock *sk, struct request_sock *req)
{
int err = -1;
struct sk_buff *skb;
@@ -658,7 +657,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
dreq->dreq_gss = dreq->dreq_iss;
dreq->dreq_service = service;
- if (dccp_v4_send_response(sk, req, NULL))
+ if (dccp_v4_send_response(sk, req))
goto drop_and_free;
inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 6e05981f271e..9c61f9c02fdb 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -213,8 +213,7 @@ out:
}
-static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
- struct request_values *rv_unused)
+static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
{
struct inet6_request_sock *ireq6 = inet6_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -428,7 +427,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
dreq->dreq_gss = dreq->dreq_iss;
dreq->dreq_service = service;
- if (dccp_v6_send_response(sk, req, NULL))
+ if (dccp_v6_send_response(sk, req))
goto drop_and_free;
inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index c8da116d84a4..7d9197063ebb 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -563,7 +563,7 @@ static const struct nla_policy dn_ifa_policy[IFA_MAX+1] = {
.len = IFNAMSIZ - 1 },
};
-static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[IFA_MAX+1];
@@ -607,7 +607,7 @@ errout:
return err;
}
-static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[IFA_MAX+1];
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index e36614eccc04..57dc159245ec 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -145,22 +145,10 @@ static inline struct dn_fib_info *dn_fib_find_info(const struct dn_fib_info *nfi
return NULL;
}
-__le16 dn_fib_get_attr16(struct rtattr *attr, int attrlen, int type)
+static int dn_fib_count_nhs(const struct nlattr *attr)
{
- while(RTA_OK(attr,attrlen)) {
- if (attr->rta_type == type)
- return *(__le16*)RTA_DATA(attr);
- attr = RTA_NEXT(attr, attrlen);
- }
-
- return 0;
-}
-
-static int dn_fib_count_nhs(struct rtattr *rta)
-{
- int nhs = 0;
- struct rtnexthop *nhp = RTA_DATA(rta);
- int nhlen = RTA_PAYLOAD(rta);
+ struct rtnexthop *nhp = nla_data(attr);
+ int nhs = 0, nhlen = nla_len(attr);
while(nhlen >= (int)sizeof(struct rtnexthop)) {
if ((nhlen -= nhp->rtnh_len) < 0)
@@ -172,10 +160,11 @@ static int dn_fib_count_nhs(struct rtattr *rta)
return nhs;
}
-static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
+static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr,
+ const struct rtmsg *r)
{
- struct rtnexthop *nhp = RTA_DATA(rta);
- int nhlen = RTA_PAYLOAD(rta);
+ struct rtnexthop *nhp = nla_data(attr);
+ int nhlen = nla_len(attr);
change_nexthops(fi) {
int attrlen = nhlen - sizeof(struct rtnexthop);
@@ -187,7 +176,10 @@ static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct rtattr *rta, cons
nh->nh_weight = nhp->rtnh_hops + 1;
if (attrlen) {
- nh->nh_gw = dn_fib_get_attr16(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
+ struct nlattr *gw_attr;
+
+ gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY);
+ nh->nh_gw = gw_attr ? nla_get_le16(gw_attr) : 0;
}
nhp = RTNH_NEXT(nhp);
} endfor_nexthops(fi);
@@ -268,7 +260,8 @@ out:
}
-struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta *rta, const struct nlmsghdr *nlh, int *errp)
+struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct nlattr *attrs[],
+ const struct nlmsghdr *nlh, int *errp)
{
int err;
struct dn_fib_info *fi = NULL;
@@ -281,11 +274,9 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta
if (dn_fib_props[r->rtm_type].scope > r->rtm_scope)
goto err_inval;
- if (rta->rta_mp) {
- nhs = dn_fib_count_nhs(rta->rta_mp);
- if (nhs == 0)
- goto err_inval;
- }
+ if (attrs[RTA_MULTIPATH] &&
+ (nhs = dn_fib_count_nhs(attrs[RTA_MULTIPATH])) == 0)
+ goto err_inval;
fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct dn_fib_nh), GFP_KERNEL);
err = -ENOBUFS;
@@ -295,53 +286,65 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta
fi->fib_protocol = r->rtm_protocol;
fi->fib_nhs = nhs;
fi->fib_flags = r->rtm_flags;
- if (rta->rta_priority)
- fi->fib_priority = *rta->rta_priority;
- if (rta->rta_mx) {
- int attrlen = RTA_PAYLOAD(rta->rta_mx);
- struct rtattr *attr = RTA_DATA(rta->rta_mx);
- while(RTA_OK(attr, attrlen)) {
- unsigned int flavour = attr->rta_type;
+ if (attrs[RTA_PRIORITY])
+ fi->fib_priority = nla_get_u32(attrs[RTA_PRIORITY]);
+
+ if (attrs[RTA_METRICS]) {
+ struct nlattr *attr;
+ int rem;
- if (flavour) {
- if (flavour > RTAX_MAX)
+ nla_for_each_nested(attr, attrs[RTA_METRICS], rem) {
+ int type = nla_type(attr);
+
+ if (type) {
+ if (type > RTAX_MAX || nla_len(attr) < 4)
goto err_inval;
- fi->fib_metrics[flavour-1] = *(unsigned int *)RTA_DATA(attr);
+
+ fi->fib_metrics[type-1] = nla_get_u32(attr);
}
- attr = RTA_NEXT(attr, attrlen);
}
}
- if (rta->rta_prefsrc)
- memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 2);
- if (rta->rta_mp) {
- if ((err = dn_fib_get_nhs(fi, rta->rta_mp, r)) != 0)
+ if (attrs[RTA_PREFSRC])
+ fi->fib_prefsrc = nla_get_le16(attrs[RTA_PREFSRC]);
+
+ if (attrs[RTA_MULTIPATH]) {
+ if ((err = dn_fib_get_nhs(fi, attrs[RTA_MULTIPATH], r)) != 0)
goto failure;
- if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
+
+ if (attrs[RTA_OIF] &&
+ fi->fib_nh->nh_oif != nla_get_u32(attrs[RTA_OIF]))
goto err_inval;
- if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 2))
+
+ if (attrs[RTA_GATEWAY] &&
+ fi->fib_nh->nh_gw != nla_get_le16(attrs[RTA_GATEWAY]))
goto err_inval;
} else {
struct dn_fib_nh *nh = fi->fib_nh;
- if (rta->rta_oif)
- nh->nh_oif = *rta->rta_oif;
- if (rta->rta_gw)
- memcpy(&nh->nh_gw, rta->rta_gw, 2);
+
+ if (attrs[RTA_OIF])
+ nh->nh_oif = nla_get_u32(attrs[RTA_OIF]);
+
+ if (attrs[RTA_GATEWAY])
+ nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]);
+
nh->nh_flags = r->rtm_flags;
nh->nh_weight = 1;
}
if (r->rtm_type == RTN_NAT) {
- if (rta->rta_gw == NULL || nhs != 1 || rta->rta_oif)
+ if (!attrs[RTA_GATEWAY] || nhs != 1 || attrs[RTA_OIF])
goto err_inval;
- memcpy(&fi->fib_nh->nh_gw, rta->rta_gw, 2);
+
+ fi->fib_nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]);
goto link_it;
}
if (dn_fib_props[r->rtm_type].error) {
- if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
+ if (attrs[RTA_GATEWAY] || attrs[RTA_OIF] || attrs[RTA_MULTIPATH])
goto err_inval;
+
goto link_it;
}
@@ -367,8 +370,8 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta
}
if (fi->fib_prefsrc) {
- if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
- memcmp(&fi->fib_prefsrc, rta->rta_dst, 2))
+ if (r->rtm_type != RTN_LOCAL || !attrs[RTA_DST] ||
+ fi->fib_prefsrc != nla_get_le16(attrs[RTA_DST]))
if (dnet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
goto err_inval;
}
@@ -486,39 +489,21 @@ void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res)
spin_unlock_bh(&dn_fib_multipath_lock);
}
-
-static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta)
-{
- int i;
-
- for(i = 1; i <= RTA_MAX; i++) {
- struct rtattr *attr = rta[i-1];
- if (attr) {
- if (RTA_PAYLOAD(attr) < 4 && RTA_PAYLOAD(attr) != 2)
- return -EINVAL;
- if (i != RTA_MULTIPATH && i != RTA_METRICS &&
- i != RTA_TABLE)
- rta[i-1] = (struct rtattr *)RTA_DATA(attr);
- }
- }
-
- return 0;
-}
-
-static inline u32 rtm_get_table(struct rtattr **rta, u8 table)
+static inline u32 rtm_get_table(struct nlattr *attrs[], u8 table)
{
- if (rta[RTA_TABLE - 1])
- table = nla_get_u32((struct nlattr *) rta[RTA_TABLE - 1]);
+ if (attrs[RTA_TABLE])
+ table = nla_get_u32(attrs[RTA_TABLE]);
return table;
}
-static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct dn_fib_table *tb;
- struct rtattr **rta = arg;
- struct rtmsg *r = NLMSG_DATA(nlh);
+ struct rtmsg *r = nlmsg_data(nlh);
+ struct nlattr *attrs[RTA_MAX+1];
+ int err;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
@@ -526,22 +511,24 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *
if (!net_eq(net, &init_net))
return -EINVAL;
- if (dn_fib_check_attr(r, rta))
- return -EINVAL;
+ err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy);
+ if (err < 0)
+ return err;
- tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 0);
- if (tb)
- return tb->delete(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb));
+ tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 0);
+ if (!tb)
+ return -ESRCH;
- return -ESRCH;
+ return tb->delete(tb, r, attrs, nlh, &NETLINK_CB(skb));
}
-static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct dn_fib_table *tb;
- struct rtattr **rta = arg;
- struct rtmsg *r = NLMSG_DATA(nlh);
+ struct rtmsg *r = nlmsg_data(nlh);
+ struct nlattr *attrs[RTA_MAX+1];
+ int err;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
@@ -549,14 +536,15 @@ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *
if (!net_eq(net, &init_net))
return -EINVAL;
- if (dn_fib_check_attr(r, rta))
- return -EINVAL;
+ err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy);
+ if (err < 0)
+ return err;
- tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 1);
- if (tb)
- return tb->insert(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb));
+ tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 1);
+ if (!tb)
+ return -ENOBUFS;
- return -ENOBUFS;
+ return tb->insert(tb, r, attrs, nlh, &NETLINK_CB(skb));
}
static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa)
@@ -566,10 +554,31 @@ static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifad
struct nlmsghdr nlh;
struct rtmsg rtm;
} req;
- struct dn_kern_rta rta;
+ struct {
+ struct nlattr hdr;
+ __le16 dst;
+ } dst_attr = {
+ .dst = dst,
+ };
+ struct {
+ struct nlattr hdr;
+ __le16 prefsrc;
+ } prefsrc_attr = {
+ .prefsrc = ifa->ifa_local,
+ };
+ struct {
+ struct nlattr hdr;
+ u32 oif;
+ } oif_attr = {
+ .oif = ifa->ifa_dev->dev->ifindex,
+ };
+ struct nlattr *attrs[RTA_MAX+1] = {
+ [RTA_DST] = (struct nlattr *) &dst_attr,
+ [RTA_PREFSRC] = (struct nlattr * ) &prefsrc_attr,
+ [RTA_OIF] = (struct nlattr *) &oif_attr,
+ };
memset(&req.rtm, 0, sizeof(req.rtm));
- memset(&rta, 0, sizeof(rta));
if (type == RTN_UNICAST)
tb = dn_fib_get_table(RT_MIN_TABLE, 1);
@@ -591,14 +600,10 @@ static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifad
req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
req.rtm.rtm_type = type;
- rta.rta_dst = &dst;
- rta.rta_prefsrc = &ifa->ifa_local;
- rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
-
if (cmd == RTM_NEWROUTE)
- tb->insert(tb, &req.rtm, &rta, &req.nlh, NULL);
+ tb->insert(tb, &req.rtm, attrs, &req.nlh, NULL);
else
- tb->delete(tb, &req.rtm, &rta, &req.nlh, NULL);
+ tb->delete(tb, &req.rtm, attrs, &req.nlh, NULL);
}
static void dn_fib_add_ifaddr(struct dn_ifaddr *ifa)
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 5ac0e153ef83..fe32388ea24f 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1613,23 +1613,41 @@ errout:
return -EMSGSIZE;
}
+const struct nla_policy rtm_dn_policy[RTA_MAX + 1] = {
+ [RTA_DST] = { .type = NLA_U16 },
+ [RTA_SRC] = { .type = NLA_U16 },
+ [RTA_IIF] = { .type = NLA_U32 },
+ [RTA_OIF] = { .type = NLA_U32 },
+ [RTA_GATEWAY] = { .type = NLA_U16 },
+ [RTA_PRIORITY] = { .type = NLA_U32 },
+ [RTA_PREFSRC] = { .type = NLA_U16 },
+ [RTA_METRICS] = { .type = NLA_NESTED },
+ [RTA_MULTIPATH] = { .type = NLA_NESTED },
+ [RTA_TABLE] = { .type = NLA_U32 },
+ [RTA_MARK] = { .type = NLA_U32 },
+};
+
/*
* This is called by both endnodes and routers now.
*/
-static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
+static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(in_skb->sk);
- struct rtattr **rta = arg;
struct rtmsg *rtm = nlmsg_data(nlh);
struct dn_route *rt = NULL;
struct dn_skb_cb *cb;
int err;
struct sk_buff *skb;
struct flowidn fld;
+ struct nlattr *tb[RTA_MAX+1];
if (!net_eq(net, &init_net))
return -EINVAL;
+ err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_dn_policy);
+ if (err < 0)
+ return err;
+
memset(&fld, 0, sizeof(fld));
fld.flowidn_proto = DNPROTO_NSP;
@@ -1639,12 +1657,14 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
skb_reset_mac_header(skb);
cb = DN_SKB_CB(skb);
- if (rta[RTA_SRC-1])
- memcpy(&fld.saddr, RTA_DATA(rta[RTA_SRC-1]), 2);
- if (rta[RTA_DST-1])
- memcpy(&fld.daddr, RTA_DATA(rta[RTA_DST-1]), 2);
- if (rta[RTA_IIF-1])
- memcpy(&fld.flowidn_iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
+ if (tb[RTA_SRC])
+ fld.saddr = nla_get_le16(tb[RTA_SRC]);
+
+ if (tb[RTA_DST])
+ fld.daddr = nla_get_le16(tb[RTA_DST]);
+
+ if (tb[RTA_IIF])
+ fld.flowidn_iif = nla_get_u32(tb[RTA_IIF]);
if (fld.flowidn_iif) {
struct net_device *dev;
@@ -1669,10 +1689,9 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
if (!err && -rt->dst.error)
err = rt->dst.error;
} else {
- int oif = 0;
- if (rta[RTA_OIF - 1])
- memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int));
- fld.flowidn_oif = oif;
+ if (tb[RTA_OIF])
+ fld.flowidn_oif = nla_get_u32(tb[RTA_OIF]);
+
err = dn_route_output_key((struct dst_entry **)&rt, &fld, 0);
}
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 6c2445bcaba1..86e3807052e9 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -19,7 +19,6 @@
#include <linux/sockios.h>
#include <linux/init.h>
#include <linux/skbuff.h>
-#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <linux/proc_fs.h>
#include <linux/netdevice.h>
@@ -224,26 +223,27 @@ static struct dn_zone *dn_new_zone(struct dn_hash *table, int z)
}
-static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern_rta *rta, struct dn_fib_info *fi)
+static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct nlattr *attrs[], struct dn_fib_info *fi)
{
struct rtnexthop *nhp;
int nhlen;
- if (rta->rta_priority && *rta->rta_priority != fi->fib_priority)
+ if (attrs[RTA_PRIORITY] &&
+ nla_get_u32(attrs[RTA_PRIORITY]) != fi->fib_priority)
return 1;
- if (rta->rta_oif || rta->rta_gw) {
- if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
- (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 2) == 0))
+ if (attrs[RTA_OIF] || attrs[RTA_GATEWAY]) {
+ if ((!attrs[RTA_OIF] || nla_get_u32(attrs[RTA_OIF]) == fi->fib_nh->nh_oif) &&
+ (!attrs[RTA_GATEWAY] || nla_get_le16(attrs[RTA_GATEWAY]) != fi->fib_nh->nh_gw))
return 0;
return 1;
}
- if (rta->rta_mp == NULL)
+ if (!attrs[RTA_MULTIPATH])
return 0;
- nhp = RTA_DATA(rta->rta_mp);
- nhlen = RTA_PAYLOAD(rta->rta_mp);
+ nhp = nla_data(attrs[RTA_MULTIPATH]);
+ nhlen = nla_len(attrs[RTA_MULTIPATH]);
for_nexthops(fi) {
int attrlen = nhlen - sizeof(struct rtnexthop);
@@ -254,7 +254,10 @@ static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern
if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
return 1;
if (attrlen) {
- gw = dn_fib_get_attr16(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
+ struct nlattr *gw_attr;
+
+ gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY);
+ gw = gw_attr ? nla_get_le16(gw_attr) : 0;
if (gw && gw != nh->nh_gw)
return 1;
@@ -488,7 +491,7 @@ int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (!net_eq(net, &init_net))
return 0;
- if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
+ if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
((struct rtmsg *)nlmsg_data(cb->nlh))->rtm_flags&RTM_F_CLONED)
return dn_cache_dump(skb, cb);
@@ -517,7 +520,8 @@ out:
return skb->len;
}
-static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req)
+static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[],
+ struct nlmsghdr *n, struct netlink_skb_parms *req)
{
struct dn_hash *table = (struct dn_hash *)tb->data;
struct dn_fib_node *new_f, *f, **fp, **del_fp;
@@ -536,15 +540,14 @@ static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct
return -ENOBUFS;
dz_key_0(key);
- if (rta->rta_dst) {
- __le16 dst;
- memcpy(&dst, rta->rta_dst, 2);
+ if (attrs[RTA_DST]) {
+ __le16 dst = nla_get_le16(attrs[RTA_DST]);
if (dst & ~DZ_MASK(dz))
return -EINVAL;
key = dz_key(dst, dz);
}
- if ((fi = dn_fib_create_info(r, rta, n, &err)) == NULL)
+ if ((fi = dn_fib_create_info(r, attrs, n, &err)) == NULL)
return err;
if (dz->dz_nent > (dz->dz_divisor << 2) &&
@@ -654,7 +657,8 @@ out:
}
-static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req)
+static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[],
+ struct nlmsghdr *n, struct netlink_skb_parms *req)
{
struct dn_hash *table = (struct dn_hash*)tb->data;
struct dn_fib_node **fp, **del_fp, *f;
@@ -671,9 +675,8 @@ static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct
return -ESRCH;
dz_key_0(key);
- if (rta->rta_dst) {
- __le16 dst;
- memcpy(&dst, rta->rta_dst, 2);
+ if (attrs[RTA_DST]) {
+ __le16 dst = nla_get_le16(attrs[RTA_DST]);
if (dst & ~DZ_MASK(dz))
return -EINVAL;
key = dz_key(dst, dz);
@@ -703,7 +706,7 @@ static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct
(r->rtm_scope == RT_SCOPE_NOWHERE || f->fn_scope == r->rtm_scope) &&
(!r->rtm_protocol ||
fi->fib_protocol == r->rtm_protocol) &&
- dn_fib_nh_match(r, n, rta, fi) == 0)
+ dn_fib_nh_match(r, n, attrs, fi) == 0)
del_fp = fp;
}
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index dfe42012a044..2a7efe388344 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -19,7 +19,7 @@
#include <linux/netdevice.h>
#include <linux/netfilter.h>
#include <linux/spinlock.h>
-#include <linux/netlink.h>
+#include <net/netlink.h>
#include <linux/netfilter_decnet.h>
#include <net/sock.h>
@@ -39,21 +39,21 @@ static struct sk_buff *dnrmg_build_message(struct sk_buff *rt_skb, int *errp)
unsigned char *ptr;
struct nf_dn_rtmsg *rtm;
- size = NLMSG_SPACE(rt_skb->len);
- size += NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg));
- skb = alloc_skb(size, GFP_ATOMIC);
+ size = NLMSG_ALIGN(rt_skb->len) +
+ NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg));
+ skb = nlmsg_new(size, GFP_ATOMIC);
if (!skb) {
*errp = -ENOMEM;
return NULL;
}
old_tail = skb->tail;
- nlh = nlmsg_put(skb, 0, 0, 0, size - sizeof(*nlh), 0);
+ nlh = nlmsg_put(skb, 0, 0, 0, size, 0);
if (!nlh) {
kfree_skb(skb);
*errp = -ENOMEM;
return NULL;
}
- rtm = (struct nf_dn_rtmsg *)NLMSG_DATA(nlh);
+ rtm = (struct nf_dn_rtmsg *)nlmsg_data(nlh);
rtm->nfdn_ifindex = rt_skb->dev->ifindex;
ptr = NFDN_RTMSG(rtm);
skb_copy_from_linear_data(rt_skb, ptr, rt_skb->len);
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 2bc62ea857c8..0eb5d5e76dfb 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -1,6 +1,7 @@
/*
* net/dsa/dsa.c - Hardware switch handling
* Copyright (c) 2008-2009 Marvell Semiconductor
+ * Copyright (c) 2013 Florian Fainelli <florian@openwrt.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -14,6 +15,9 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <net/dsa.h>
+#include <linux/of.h>
+#include <linux/of_mdio.h>
+#include <linux/of_platform.h>
#include "dsa_priv.h"
char dsa_driver_version[] = "0.1";
@@ -287,34 +291,239 @@ static struct net_device *dev_to_net_device(struct device *dev)
return NULL;
}
+#ifdef CONFIG_OF
+static int dsa_of_setup_routing_table(struct dsa_platform_data *pd,
+ struct dsa_chip_data *cd,
+ int chip_index,
+ struct device_node *link)
+{
+ int ret;
+ const __be32 *reg;
+ int link_port_addr;
+ int link_sw_addr;
+ struct device_node *parent_sw;
+ int len;
+
+ parent_sw = of_get_parent(link);
+ if (!parent_sw)
+ return -EINVAL;
+
+ reg = of_get_property(parent_sw, "reg", &len);
+ if (!reg || (len != sizeof(*reg) * 2))
+ return -EINVAL;
+
+ link_sw_addr = be32_to_cpup(reg + 1);
+
+ if (link_sw_addr >= pd->nr_chips)
+ return -EINVAL;
+
+ /* First time routing table allocation */
+ if (!cd->rtable) {
+ cd->rtable = kmalloc(pd->nr_chips * sizeof(s8), GFP_KERNEL);
+ if (!cd->rtable)
+ return -ENOMEM;
+
+ /* default to no valid uplink/downlink */
+ memset(cd->rtable, -1, pd->nr_chips * sizeof(s8));
+ }
+
+ reg = of_get_property(link, "reg", NULL);
+ if (!reg) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ link_port_addr = be32_to_cpup(reg);
+
+ cd->rtable[link_sw_addr] = link_port_addr;
+
+ return 0;
+out:
+ kfree(cd->rtable);
+ return ret;
+}
+
+static void dsa_of_free_platform_data(struct dsa_platform_data *pd)
+{
+ int i;
+ int port_index;
+
+ for (i = 0; i < pd->nr_chips; i++) {
+ port_index = 0;
+ while (port_index < DSA_MAX_PORTS) {
+ if (pd->chip[i].port_names[port_index])
+ kfree(pd->chip[i].port_names[port_index]);
+ port_index++;
+ }
+ kfree(pd->chip[i].rtable);
+ }
+ kfree(pd->chip);
+}
+
+static int dsa_of_probe(struct platform_device *pdev)
+{
+ struct device_node *np = pdev->dev.of_node;
+ struct device_node *child, *mdio, *ethernet, *port, *link;
+ struct mii_bus *mdio_bus;
+ struct platform_device *ethernet_dev;
+ struct dsa_platform_data *pd;
+ struct dsa_chip_data *cd;
+ const char *port_name;
+ int chip_index, port_index;
+ const unsigned int *sw_addr, *port_reg;
+ int ret;
+
+ mdio = of_parse_phandle(np, "dsa,mii-bus", 0);
+ if (!mdio)
+ return -EINVAL;
+
+ mdio_bus = of_mdio_find_bus(mdio);
+ if (!mdio_bus)
+ return -EINVAL;
+
+ ethernet = of_parse_phandle(np, "dsa,ethernet", 0);
+ if (!ethernet)
+ return -EINVAL;
+
+ ethernet_dev = of_find_device_by_node(ethernet);
+ if (!ethernet_dev)
+ return -ENODEV;
+
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+ if (!pd)
+ return -ENOMEM;
+
+ pdev->dev.platform_data = pd;
+ pd->netdev = &ethernet_dev->dev;
+ pd->nr_chips = of_get_child_count(np);
+ if (pd->nr_chips > DSA_MAX_SWITCHES)
+ pd->nr_chips = DSA_MAX_SWITCHES;
+
+ pd->chip = kzalloc(pd->nr_chips * sizeof(struct dsa_chip_data),
+ GFP_KERNEL);
+ if (!pd->chip) {
+ ret = -ENOMEM;
+ goto out_free;
+ }
+
+ chip_index = 0;
+ for_each_available_child_of_node(np, child) {
+ cd = &pd->chip[chip_index];
+
+ cd->mii_bus = &mdio_bus->dev;
+
+ sw_addr = of_get_property(child, "reg", NULL);
+ if (!sw_addr)
+ continue;
+
+ cd->sw_addr = be32_to_cpup(sw_addr);
+ if (cd->sw_addr > PHY_MAX_ADDR)
+ continue;
+
+ for_each_available_child_of_node(child, port) {
+ port_reg = of_get_property(port, "reg", NULL);
+ if (!port_reg)
+ continue;
+
+ port_index = be32_to_cpup(port_reg);
+
+ port_name = of_get_property(port, "label", NULL);
+ if (!port_name)
+ continue;
+
+ cd->port_names[port_index] = kstrdup(port_name,
+ GFP_KERNEL);
+ if (!cd->port_names[port_index]) {
+ ret = -ENOMEM;
+ goto out_free_chip;
+ }
+
+ link = of_parse_phandle(port, "link", 0);
+
+ if (!strcmp(port_name, "dsa") && link &&
+ pd->nr_chips > 1) {
+ ret = dsa_of_setup_routing_table(pd, cd,
+ chip_index, link);
+ if (ret)
+ goto out_free_chip;
+ }
+
+ if (port_index == DSA_MAX_PORTS)
+ break;
+ }
+ }
+
+ return 0;
+
+out_free_chip:
+ dsa_of_free_platform_data(pd);
+out_free:
+ kfree(pd);
+ pdev->dev.platform_data = NULL;
+ return ret;
+}
+
+static void dsa_of_remove(struct platform_device *pdev)
+{
+ struct dsa_platform_data *pd = pdev->dev.platform_data;
+
+ if (!pdev->dev.of_node)
+ return;
+
+ dsa_of_free_platform_data(pd);
+ kfree(pd);
+}
+#else
+static inline int dsa_of_probe(struct platform_device *pdev)
+{
+ return 0;
+}
+
+static inline void dsa_of_remove(struct platform_device *pdev)
+{
+}
+#endif
+
static int dsa_probe(struct platform_device *pdev)
{
static int dsa_version_printed;
struct dsa_platform_data *pd = pdev->dev.platform_data;
struct net_device *dev;
struct dsa_switch_tree *dst;
- int i;
+ int i, ret;
if (!dsa_version_printed++)
printk(KERN_NOTICE "Distributed Switch Architecture "
"driver version %s\n", dsa_driver_version);
+ if (pdev->dev.of_node) {
+ ret = dsa_of_probe(pdev);
+ if (ret)
+ return ret;
+
+ pd = pdev->dev.platform_data;
+ }
+
if (pd == NULL || pd->netdev == NULL)
return -EINVAL;
dev = dev_to_net_device(pd->netdev);
- if (dev == NULL)
- return -EINVAL;
+ if (dev == NULL) {
+ ret = -EINVAL;
+ goto out;
+ }
if (dev->dsa_ptr != NULL) {
dev_put(dev);
- return -EEXIST;
+ ret = -EEXIST;
+ goto out;
}
dst = kzalloc(sizeof(*dst), GFP_KERNEL);
if (dst == NULL) {
dev_put(dev);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}
platform_set_drvdata(pdev, dst);
@@ -366,6 +575,11 @@ static int dsa_probe(struct platform_device *pdev)
}
return 0;
+
+out:
+ dsa_of_remove(pdev);
+
+ return ret;
}
static int dsa_remove(struct platform_device *pdev)
@@ -385,6 +599,8 @@ static int dsa_remove(struct platform_device *pdev)
dsa_switch_destroy(ds);
}
+ dsa_of_remove(pdev);
+
return 0;
}
@@ -392,6 +608,12 @@ static void dsa_shutdown(struct platform_device *pdev)
{
}
+static const struct of_device_id dsa_of_match_table[] = {
+ { .compatible = "marvell,dsa", },
+ {}
+};
+MODULE_DEVICE_TABLE(of, dsa_of_match_table);
+
static struct platform_driver dsa_driver = {
.probe = dsa_probe,
.remove = dsa_remove,
@@ -399,6 +621,7 @@ static struct platform_driver dsa_driver = {
.driver = {
.name = "dsa",
.owner = THIS_MODULE,
+ .of_match_table = dsa_of_match_table,
},
};
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index a36c85eab5b4..5359560926bc 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -195,7 +195,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
if (netdev_uses_trailer_tags(dev))
return htons(ETH_P_TRAILER);
- if (ntohs(eth->h_proto) >= 1536)
+ if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
return eth->h_proto;
/*
diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index 43b95ca61114..55e1fd5b3e56 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -104,6 +104,7 @@ static const u8 lowpan_llprefix[] = {0xfe, 0x80};
struct lowpan_dev_info {
struct net_device *real_dev; /* real WPAN device ptr */
struct mutex dev_list_mtx; /* mutex for list ops */
+ unsigned short fragment_tag;
};
struct lowpan_dev_record {
@@ -120,7 +121,6 @@ struct lowpan_fragment {
struct list_head list; /* fragments list */
};
-static unsigned short fragment_tag;
static LIST_HEAD(lowpan_fragments);
static DEFINE_SPINLOCK(flist_lock);
@@ -284,6 +284,9 @@ lowpan_compress_udp_header(u8 **hc06_ptr, struct sk_buff *skb)
/* checksum is always inline */
memcpy(*hc06_ptr, &uh->check, 2);
*hc06_ptr += 2;
+
+ /* skip the UDP header */
+ skb_pull(skb, sizeof(struct udphdr));
}
static inline int lowpan_fetch_skb_u8(struct sk_buff *skb, u8 *val)
@@ -309,9 +312,8 @@ static inline int lowpan_fetch_skb_u16(struct sk_buff *skb, u16 *val)
}
static int
-lowpan_uncompress_udp_header(struct sk_buff *skb)
+lowpan_uncompress_udp_header(struct sk_buff *skb, struct udphdr *uh)
{
- struct udphdr *uh = udp_hdr(skb);
u8 tmp;
if (!uh)
@@ -358,6 +360,14 @@ lowpan_uncompress_udp_header(struct sk_buff *skb)
/* copy checksum */
memcpy(&uh->check, &skb->data[0], 2);
skb_pull(skb, 2);
+
+ /*
+ * UDP lenght needs to be infered from the lower layers
+ * here, we obtain the hint from the remaining size of the
+ * frame
+ */
+ uh->len = htons(skb->len + sizeof(struct udphdr));
+ pr_debug("uncompressed UDP length: src = %d", uh->len);
} else {
pr_debug("ERROR: unsupported NH format\n");
goto err;
@@ -572,17 +582,31 @@ static int lowpan_header_create(struct sk_buff *skb,
* this isn't implemented in mainline yet, so currently we assign 0xff
*/
{
+ mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA;
+ mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev);
+
/* prepare wpan address data */
sa.addr_type = IEEE802154_ADDR_LONG;
- sa.pan_id = 0xff;
-
- da.addr_type = IEEE802154_ADDR_LONG;
- da.pan_id = 0xff;
+ sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
- memcpy(&(da.hwaddr), daddr, 8);
memcpy(&(sa.hwaddr), saddr, 8);
+ /* intra-PAN communications */
+ da.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
- mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA;
+ /*
+ * if the destination address is the broadcast address, use the
+ * corresponding short address
+ */
+ if (lowpan_is_addr_broadcast(daddr)) {
+ da.addr_type = IEEE802154_ADDR_SHORT;
+ da.short_addr = IEEE802154_ADDR_BROADCAST;
+ } else {
+ da.addr_type = IEEE802154_ADDR_LONG;
+ memcpy(&(da.hwaddr), daddr, IEEE802154_ADDR_LEN);
+
+ /* request acknowledgment */
+ mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ;
+ }
return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev,
type, (void *)&da, (void *)&sa, skb->len);
@@ -650,7 +674,7 @@ static void lowpan_fragment_timer_expired(unsigned long entry_addr)
}
static struct lowpan_fragment *
-lowpan_alloc_new_frame(struct sk_buff *skb, u8 len, u16 tag)
+lowpan_alloc_new_frame(struct sk_buff *skb, u16 len, u16 tag)
{
struct lowpan_fragment *frame;
@@ -720,7 +744,7 @@ lowpan_process_data(struct sk_buff *skb)
{
struct lowpan_fragment *frame;
/* slen stores the rightmost 8 bits of the 11 bits length */
- u8 slen, offset;
+ u8 slen, offset = 0;
u16 len, tag;
bool found = false;
@@ -731,6 +755,18 @@ lowpan_process_data(struct sk_buff *skb)
/* adds the 3 MSB to the 8 LSB to retrieve the 11 bits length */
len = ((iphc0 & 7) << 8) | slen;
+ if ((iphc0 & LOWPAN_DISPATCH_MASK) == LOWPAN_DISPATCH_FRAG1) {
+ pr_debug("%s received a FRAG1 packet (tag: %d, "
+ "size of the entire IP packet: %d)",
+ __func__, tag, len);
+ } else { /* FRAGN */
+ if (lowpan_fetch_skb_u8(skb, &offset))
+ goto unlock_and_drop;
+ pr_debug("%s received a FRAGN packet (tag: %d, "
+ "size of the entire IP packet: %d, "
+ "offset: %d)", __func__, tag, len, offset * 8);
+ }
+
/*
* check if frame assembling with the same tag is
* already in progress
@@ -745,17 +781,13 @@ lowpan_process_data(struct sk_buff *skb)
/* alloc new frame structure */
if (!found) {
+ pr_debug("%s first fragment received for tag %d, "
+ "begin packet reassembly", __func__, tag);
frame = lowpan_alloc_new_frame(skb, len, tag);
if (!frame)
goto unlock_and_drop;
}
- if ((iphc0 & LOWPAN_DISPATCH_MASK) == LOWPAN_DISPATCH_FRAG1)
- goto unlock_and_drop;
-
- if (lowpan_fetch_skb_u8(skb, &offset)) /* fetch offset */
- goto unlock_and_drop;
-
/* if payload fits buffer, copy it */
if (likely((offset * 8 + skb->len) <= frame->length))
skb_copy_to_linear_data_offset(frame->skb, offset * 8,
@@ -773,6 +805,9 @@ lowpan_process_data(struct sk_buff *skb)
list_del(&frame->list);
spin_unlock_bh(&flist_lock);
+ pr_debug("%s successfully reassembled fragment "
+ "(tag %d)", __func__, tag);
+
dev_kfree_skb(skb);
skb = frame->skb;
kfree(frame);
@@ -918,10 +953,35 @@ lowpan_process_data(struct sk_buff *skb)
}
/* UDP data uncompression */
- if (iphc0 & LOWPAN_IPHC_NH_C)
- if (lowpan_uncompress_udp_header(skb))
+ if (iphc0 & LOWPAN_IPHC_NH_C) {
+ struct udphdr uh;
+ struct sk_buff *new;
+ if (lowpan_uncompress_udp_header(skb, &uh))
goto drop;
+ /*
+ * replace the compressed UDP head by the uncompressed UDP
+ * header
+ */
+ new = skb_copy_expand(skb, sizeof(struct udphdr),
+ skb_tailroom(skb), GFP_ATOMIC);
+ kfree_skb(skb);
+
+ if (!new)
+ return -ENOMEM;
+
+ skb = new;
+
+ skb_push(skb, sizeof(struct udphdr));
+ skb_reset_transport_header(skb);
+ skb_copy_to_linear_data(skb, &uh, sizeof(struct udphdr));
+
+ lowpan_raw_dump_table(__func__, "raw UDP header dump",
+ (u8 *)&uh, sizeof(uh));
+
+ hdr.nexthdr = UIP_PROTO_UDP;
+ }
+
/* Not fragmented package */
hdr.payload_len = htons(skb->len);
@@ -969,13 +1029,13 @@ static int lowpan_get_mac_header_length(struct sk_buff *skb)
static int
lowpan_fragment_xmit(struct sk_buff *skb, u8 *head,
- int mlen, int plen, int offset)
+ int mlen, int plen, int offset, int type)
{
struct sk_buff *frag;
int hlen, ret;
- /* if payload length is zero, therefore it's a first fragment */
- hlen = (plen == 0 ? LOWPAN_FRAG1_HEAD_SIZE : LOWPAN_FRAGN_HEAD_SIZE);
+ hlen = (type == LOWPAN_DISPATCH_FRAG1) ?
+ LOWPAN_FRAG1_HEAD_SIZE : LOWPAN_FRAGN_HEAD_SIZE;
lowpan_raw_dump_inline(__func__, "6lowpan fragment header", head, hlen);
@@ -1003,14 +1063,14 @@ lowpan_fragment_xmit(struct sk_buff *skb, u8 *head,
}
static int
-lowpan_skb_fragmentation(struct sk_buff *skb)
+lowpan_skb_fragmentation(struct sk_buff *skb, struct net_device *dev)
{
int err, header_length, payload_length, tag, offset = 0;
u8 head[5];
header_length = lowpan_get_mac_header_length(skb);
payload_length = skb->len - header_length;
- tag = fragment_tag++;
+ tag = lowpan_dev_info(dev)->fragment_tag++;
/* first fragment header */
head[0] = LOWPAN_DISPATCH_FRAG1 | ((payload_length >> 8) & 0x7);
@@ -1018,7 +1078,16 @@ lowpan_skb_fragmentation(struct sk_buff *skb)
head[2] = tag >> 8;
head[3] = tag & 0xff;
- err = lowpan_fragment_xmit(skb, head, header_length, 0, 0);
+ err = lowpan_fragment_xmit(skb, head, header_length, LOWPAN_FRAG_SIZE,
+ 0, LOWPAN_DISPATCH_FRAG1);
+
+ if (err) {
+ pr_debug("%s unable to send FRAG1 packet (tag: %d)",
+ __func__, tag);
+ goto exit;
+ }
+
+ offset = LOWPAN_FRAG_SIZE;
/* next fragment header */
head[0] &= ~LOWPAN_DISPATCH_FRAG1;
@@ -1033,10 +1102,17 @@ lowpan_skb_fragmentation(struct sk_buff *skb)
len = payload_length - offset;
err = lowpan_fragment_xmit(skb, head, header_length,
- len, offset);
+ len, offset, LOWPAN_DISPATCH_FRAGN);
+ if (err) {
+ pr_debug("%s unable to send a subsequent FRAGN packet "
+ "(tag: %d, offset: %d", __func__, tag, offset);
+ goto exit;
+ }
+
offset += len;
}
+exit:
return err;
}
@@ -1059,14 +1135,14 @@ static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev)
}
pr_debug("frame is too big, fragmentation is needed\n");
- err = lowpan_skb_fragmentation(skb);
+ err = lowpan_skb_fragmentation(skb, dev);
error:
dev_kfree_skb(skb);
out:
- if (err < 0)
+ if (err)
pr_debug("ERROR: xmit failed\n");
- return (err < 0 ? NETDEV_TX_BUSY : NETDEV_TX_OK);
+ return (err < 0) ? NET_XMIT_DROP : err;
}
static struct wpan_phy *lowpan_get_phy(const struct net_device *dev)
@@ -1087,6 +1163,12 @@ static u16 lowpan_get_short_addr(const struct net_device *dev)
return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev);
}
+static u8 lowpan_get_dsn(const struct net_device *dev)
+{
+ struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
+ return ieee802154_mlme_ops(real_dev)->get_dsn(real_dev);
+}
+
static struct header_ops lowpan_header_ops = {
.create = lowpan_header_create,
};
@@ -1100,6 +1182,7 @@ static struct ieee802154_mlme_ops lowpan_mlme = {
.get_pan_id = lowpan_get_pan_id,
.get_phy = lowpan_get_phy,
.get_short_addr = lowpan_get_short_addr,
+ .get_dsn = lowpan_get_dsn,
};
static void lowpan_setup(struct net_device *dev)
@@ -1203,6 +1286,7 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,
return -ENODEV;
lowpan_dev_info(dev)->real_dev = real_dev;
+ lowpan_dev_info(dev)->fragment_tag = 0;
mutex_init(&lowpan_dev_info(dev)->dev_list_mtx);
entry = kzalloc(sizeof(struct lowpan_dev_record), GFP_KERNEL);
diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h
index bba5f8336317..4b8f917658b5 100644
--- a/net/ieee802154/6lowpan.h
+++ b/net/ieee802154/6lowpan.h
@@ -92,9 +92,10 @@
*/
#define lowpan_is_iid_16_bit_compressable(a) \
((((a)->s6_addr16[4]) == 0) && \
- (((a)->s6_addr16[5]) == 0) && \
- (((a)->s6_addr16[6]) == 0) && \
- ((((a)->s6_addr[14]) & 0x80) == 0))
+ (((a)->s6_addr[10]) == 0) && \
+ (((a)->s6_addr[11]) == 0xff) && \
+ (((a)->s6_addr[12]) == 0xfe) && \
+ (((a)->s6_addr[13]) == 0))
/* multicast address */
#define is_addr_mcast(a) (((a)->s6_addr[0]) == 0xFF)
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
index e0da175f8e5b..581a59504bd5 100644
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -291,6 +291,9 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,
size_t copied = 0;
int err = -EOPNOTSUPP;
struct sk_buff *skb;
+ struct sockaddr_ieee802154 *saddr;
+
+ saddr = (struct sockaddr_ieee802154 *)msg->msg_name;
skb = skb_recv_datagram(sk, flags, noblock, &err);
if (!skb)
@@ -309,6 +312,13 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,
sock_recv_ts_and_drops(msg, sk, skb);
+ if (saddr) {
+ saddr->family = AF_IEEE802154;
+ saddr->addr = mac_cb(skb)->sa;
+ }
+ if (addr_len)
+ *addr_len = sizeof(*saddr);
+
if (flags & MSG_TRUNC)
copied = skb->len;
done:
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index 97351e1d07a4..7e49bbcc6967 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -64,8 +64,8 @@ struct sk_buff *ieee802154_nl_create(int flags, u8 req)
int ieee802154_nl_mcast(struct sk_buff *msg, unsigned int group)
{
- /* XXX: nlh is right at the start of msg */
- void *hdr = genlmsg_data(NLMSG_DATA(msg->data));
+ struct nlmsghdr *nlh = nlmsg_hdr(msg);
+ void *hdr = genlmsg_data(nlmsg_data(nlh));
if (genlmsg_end(msg, hdr) < 0)
goto out;
@@ -97,8 +97,8 @@ struct sk_buff *ieee802154_nl_new_reply(struct genl_info *info,
int ieee802154_nl_reply(struct sk_buff *msg, struct genl_info *info)
{
- /* XXX: nlh is right at the start of msg */
- void *hdr = genlmsg_data(NLMSG_DATA(msg->data));
+ struct nlmsghdr *nlh = nlmsg_hdr(msg);
+ void *hdr = genlmsg_data(nlmsg_data(nlh));
if (genlmsg_end(msg, hdr) < 0)
goto out;
diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index 96bb08abece2..b0bdd8c51e9c 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -315,7 +315,7 @@ static int ieee802154_associate_req(struct sk_buff *skb,
struct net_device *dev;
struct ieee802154_addr addr;
u8 page;
- int ret = -EINVAL;
+ int ret = -EOPNOTSUPP;
if (!info->attrs[IEEE802154_ATTR_CHANNEL] ||
!info->attrs[IEEE802154_ATTR_COORD_PAN_ID] ||
@@ -327,6 +327,8 @@ static int ieee802154_associate_req(struct sk_buff *skb,
dev = ieee802154_nl_get_dev(info);
if (!dev)
return -ENODEV;
+ if (!ieee802154_mlme_ops(dev)->assoc_req)
+ goto out;
if (info->attrs[IEEE802154_ATTR_COORD_HW_ADDR]) {
addr.addr_type = IEEE802154_ADDR_LONG;
@@ -350,6 +352,7 @@ static int ieee802154_associate_req(struct sk_buff *skb,
page,
nla_get_u8(info->attrs[IEEE802154_ATTR_CAPABILITY]));
+out:
dev_put(dev);
return ret;
}
@@ -359,7 +362,7 @@ static int ieee802154_associate_resp(struct sk_buff *skb,
{
struct net_device *dev;
struct ieee802154_addr addr;
- int ret = -EINVAL;
+ int ret = -EOPNOTSUPP;
if (!info->attrs[IEEE802154_ATTR_STATUS] ||
!info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] ||
@@ -369,6 +372,8 @@ static int ieee802154_associate_resp(struct sk_buff *skb,
dev = ieee802154_nl_get_dev(info);
if (!dev)
return -ENODEV;
+ if (!ieee802154_mlme_ops(dev)->assoc_resp)
+ goto out;
addr.addr_type = IEEE802154_ADDR_LONG;
nla_memcpy(addr.hwaddr, info->attrs[IEEE802154_ATTR_DEST_HW_ADDR],
@@ -380,6 +385,7 @@ static int ieee802154_associate_resp(struct sk_buff *skb,
nla_get_u16(info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]),
nla_get_u8(info->attrs[IEEE802154_ATTR_STATUS]));
+out:
dev_put(dev);
return ret;
}
@@ -389,7 +395,7 @@ static int ieee802154_disassociate_req(struct sk_buff *skb,
{
struct net_device *dev;
struct ieee802154_addr addr;
- int ret = -EINVAL;
+ int ret = -EOPNOTSUPP;
if ((!info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] &&
!info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]) ||
@@ -399,6 +405,8 @@ static int ieee802154_disassociate_req(struct sk_buff *skb,
dev = ieee802154_nl_get_dev(info);
if (!dev)
return -ENODEV;
+ if (!ieee802154_mlme_ops(dev)->disassoc_req)
+ goto out;
if (info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]) {
addr.addr_type = IEEE802154_ADDR_LONG;
@@ -415,6 +423,7 @@ static int ieee802154_disassociate_req(struct sk_buff *skb,
ret = ieee802154_mlme_ops(dev)->disassoc_req(dev, &addr,
nla_get_u8(info->attrs[IEEE802154_ATTR_REASON]));
+out:
dev_put(dev);
return ret;
}
@@ -432,7 +441,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
u8 channel, bcn_ord, sf_ord;
u8 page;
int pan_coord, blx, coord_realign;
- int ret;
+ int ret = -EOPNOTSUPP;
if (!info->attrs[IEEE802154_ATTR_COORD_PAN_ID] ||
!info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR] ||
@@ -448,6 +457,8 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
dev = ieee802154_nl_get_dev(info);
if (!dev)
return -ENODEV;
+ if (!ieee802154_mlme_ops(dev)->start_req)
+ goto out;
addr.addr_type = IEEE802154_ADDR_SHORT;
addr.short_addr = nla_get_u16(
@@ -476,6 +487,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
ret = ieee802154_mlme_ops(dev)->start_req(dev, &addr, channel, page,
bcn_ord, sf_ord, pan_coord, blx, coord_realign);
+out:
dev_put(dev);
return ret;
}
@@ -483,7 +495,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info)
{
struct net_device *dev;
- int ret;
+ int ret = -EOPNOTSUPP;
u8 type;
u32 channels;
u8 duration;
@@ -497,6 +509,8 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info)
dev = ieee802154_nl_get_dev(info);
if (!dev)
return -ENODEV;
+ if (!ieee802154_mlme_ops(dev)->scan_req)
+ goto out;
type = nla_get_u8(info->attrs[IEEE802154_ATTR_SCAN_TYPE]);
channels = nla_get_u32(info->attrs[IEEE802154_ATTR_CHANNELS]);
@@ -511,6 +525,7 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info)
ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels, page,
duration);
+out:
dev_put(dev);
return ret;
}
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 7944df768454..8603ca827104 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -166,6 +166,7 @@ config IP_PNP_RARP
config NET_IPIP
tristate "IP: tunneling"
select INET_TUNNEL
+ select NET_IP_TUNNEL
---help---
Tunneling means encapsulating data of one protocol type within
another protocol and sending it over a channel that understands the
@@ -186,9 +187,14 @@ config NET_IPGRE_DEMUX
This is helper module to demultiplex GRE packets on GRE version field criteria.
Required by ip_gre and pptp modules.
+config NET_IP_TUNNEL
+ tristate
+ default n
+
config NET_IPGRE
tristate "IP: GRE tunnels over IP"
depends on (IPV6 || IPV6=n) && NET_IPGRE_DEMUX
+ select NET_IP_TUNNEL
help
Tunneling means encapsulating data of one protocol type within
another protocol and sending it over a channel that understands the
@@ -313,6 +319,7 @@ config SYN_COOKIES
config NET_IPVTI
tristate "Virtual (secure) IP: tunneling"
select INET_TUNNEL
+ select NET_IP_TUNNEL
depends on INET_XFRM_MODE_TUNNEL
---help---
Tunneling means encapsulating data of one protocol type within
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 15ca63ec604e..089cb9f36387 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -13,6 +13,7 @@ obj-y := route.o inetpeer.o protocol.o \
fib_frontend.o fib_semantics.o fib_trie.o \
inet_fragment.o ping.o
+obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c929d9c1c4b6..d01be2a3ae53 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -111,10 +111,10 @@
#include <net/sock.h>
#include <net/raw.h>
#include <net/icmp.h>
-#include <net/ipip.h>
#include <net/inet_common.h>
#include <net/xfrm.h>
#include <net/net_namespace.h>
+#include <net/secure_seq.h>
#ifdef CONFIG_IP_MROUTE
#include <linux/mroute.h>
#endif
@@ -263,8 +263,10 @@ void build_ehash_secret(void)
get_random_bytes(&rnd, sizeof(rnd));
} while (rnd == 0);
- if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0)
+ if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) {
get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
+ net_secret_init();
+ }
}
EXPORT_SYMBOL(build_ehash_secret);
@@ -1283,9 +1285,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
int ihl;
int id;
unsigned int offset = 0;
-
- if (!(features & NETIF_F_V4_CSUM))
- features &= ~NETIF_F_SG;
+ bool tunnel;
if (unlikely(skb_shinfo(skb)->gso_type &
~(SKB_GSO_TCPV4 |
@@ -1293,6 +1293,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
SKB_GSO_DODGY |
SKB_GSO_TCP_ECN |
SKB_GSO_GRE |
+ SKB_GSO_TCPV6 |
+ SKB_GSO_UDP_TUNNEL |
0)))
goto out;
@@ -1307,6 +1309,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
if (unlikely(!pskb_may_pull(skb, ihl)))
goto out;
+ tunnel = !!skb->encapsulation;
+
__skb_pull(skb, ihl);
skb_reset_transport_header(skb);
iph = ip_hdr(skb);
@@ -1326,7 +1330,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
skb = segs;
do {
iph = ip_hdr(skb);
- if (proto == IPPROTO_UDP) {
+ if (!tunnel && proto == IPPROTO_UDP) {
iph->id = htons(id);
iph->frag_off = htons(offset >> 3);
if (skb->next != NULL)
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index fea4929f6200..247ec1951c35 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -654,11 +654,19 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
arp_ptr += dev->addr_len;
memcpy(arp_ptr, &src_ip, 4);
arp_ptr += 4;
- if (target_hw != NULL)
- memcpy(arp_ptr, target_hw, dev->addr_len);
- else
- memset(arp_ptr, 0, dev->addr_len);
- arp_ptr += dev->addr_len;
+
+ switch (dev->type) {
+#if IS_ENABLED(CONFIG_FIREWIRE_NET)
+ case ARPHRD_IEEE1394:
+ break;
+#endif
+ default:
+ if (target_hw != NULL)
+ memcpy(arp_ptr, target_hw, dev->addr_len);
+ else
+ memset(arp_ptr, 0, dev->addr_len);
+ arp_ptr += dev->addr_len;
+ }
memcpy(arp_ptr, &dest_ip, 4);
return skb;
@@ -781,7 +789,14 @@ static int arp_process(struct sk_buff *skb)
arp_ptr += dev->addr_len;
memcpy(&sip, arp_ptr, 4);
arp_ptr += 4;
- arp_ptr += dev->addr_len;
+ switch (dev_type) {
+#if IS_ENABLED(CONFIG_FIREWIRE_NET)
+ case ARPHRD_IEEE1394:
+ break;
+#endif
+ default:
+ arp_ptr += dev->addr_len;
+ }
memcpy(&tip, arp_ptr, 4);
/*
* Check for bad requests for 127.x.x.x and requests for multicast
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index c6287cd978c2..dfc39d4d48b7 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -536,7 +536,7 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
return NULL;
}
-static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[IFA_MAX+1];
@@ -801,7 +801,7 @@ static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
return NULL;
}
-static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct in_ifaddr *ifa;
@@ -1529,6 +1529,8 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
idx = 0;
head = &net->dev_index_head[h];
rcu_read_lock();
+ cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
+ net->dev_base_seq;
hlist_for_each_entry_rcu(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
@@ -1549,6 +1551,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_unlock();
goto done;
}
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
}
cont:
idx++;
@@ -1760,8 +1763,7 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
};
static int inet_netconf_get_devconf(struct sk_buff *in_skb,
- struct nlmsghdr *nlh,
- void *arg)
+ struct nlmsghdr *nlh)
{
struct net *net = sock_net(in_skb->sk);
struct nlattr *tb[NETCONFA_MAX+1];
@@ -1821,6 +1823,77 @@ errout:
return err;
}
+static int inet_netconf_dump_devconf(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ int h, s_h;
+ int idx, s_idx;
+ struct net_device *dev;
+ struct in_device *in_dev;
+ struct hlist_head *head;
+
+ s_h = cb->args[0];
+ s_idx = idx = cb->args[1];
+
+ for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+ idx = 0;
+ head = &net->dev_index_head[h];
+ rcu_read_lock();
+ cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
+ net->dev_base_seq;
+ hlist_for_each_entry_rcu(dev, head, index_hlist) {
+ if (idx < s_idx)
+ goto cont;
+ in_dev = __in_dev_get_rcu(dev);
+ if (!in_dev)
+ goto cont;
+
+ if (inet_netconf_fill_devconf(skb, dev->ifindex,
+ &in_dev->cnf,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNETCONF,
+ NLM_F_MULTI,
+ -1) <= 0) {
+ rcu_read_unlock();
+ goto done;
+ }
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+cont:
+ idx++;
+ }
+ rcu_read_unlock();
+ }
+ if (h == NETDEV_HASHENTRIES) {
+ if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
+ net->ipv4.devconf_all,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNETCONF, NLM_F_MULTI,
+ -1) <= 0)
+ goto done;
+ else
+ h++;
+ }
+ if (h == NETDEV_HASHENTRIES + 1) {
+ if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
+ net->ipv4.devconf_dflt,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNETCONF, NLM_F_MULTI,
+ -1) <= 0)
+ goto done;
+ else
+ h++;
+ }
+done:
+ cb->args[0] = h;
+ cb->args[1] = idx;
+
+ return skb->len;
+}
+
#ifdef CONFIG_SYSCTL
static void devinet_copy_dflt_conf(struct net *net, int i)
@@ -2225,6 +2298,6 @@ void __init devinet_init(void)
rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
- NULL, NULL);
+ inet_netconf_dump_devconf, NULL);
}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index eb4bb12b3eb4..c7629a209f9d 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -604,7 +604,7 @@ errout:
return err;
}
-static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct fib_config cfg;
@@ -626,7 +626,7 @@ errout:
return err;
}
-static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct fib_config cfg;
@@ -957,8 +957,8 @@ static void nl_fib_input(struct sk_buff *skb)
net = sock_net(skb->sk);
nlh = nlmsg_hdr(skb);
- if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
- nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
+ if (skb->len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len ||
+ nlmsg_len(nlh) < sizeof(*frn))
return;
skb = skb_clone(skb, GFP_KERNEL);
@@ -966,7 +966,7 @@ static void nl_fib_input(struct sk_buff *skb)
return;
nlh = nlmsg_hdr(skb);
- frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
+ frn = (struct fib_result_nl *) nlmsg_data(nlh);
tb = fib_get_table(net, frn->tb_id_in);
nl_fib_lookup(frn, tb);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index ff06b7543d9f..49616fed9340 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -125,7 +125,6 @@ struct tnode {
unsigned int empty_children; /* KEYLENGTH bits needed */
union {
struct rcu_head rcu;
- struct work_struct work;
struct tnode *tnode_free;
};
struct rt_trie_node __rcu *child[0];
@@ -383,12 +382,6 @@ static struct tnode *tnode_alloc(size_t size)
return vzalloc(size);
}
-static void __tnode_vfree(struct work_struct *arg)
-{
- struct tnode *tn = container_of(arg, struct tnode, work);
- vfree(tn);
-}
-
static void __tnode_free_rcu(struct rcu_head *head)
{
struct tnode *tn = container_of(head, struct tnode, rcu);
@@ -397,10 +390,8 @@ static void __tnode_free_rcu(struct rcu_head *head)
if (size <= PAGE_SIZE)
kfree(tn);
- else {
- INIT_WORK(&tn->work, __tnode_vfree);
- schedule_work(&tn->work);
- }
+ else
+ vfree(tn);
}
static inline void tnode_free(struct tnode *tn)
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index 7a4c710c4cdd..7856d1651d05 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -27,11 +27,6 @@
static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
static DEFINE_SPINLOCK(gre_proto_lock);
-struct gre_base_hdr {
- __be16 flags;
- __be16 protocol;
-};
-#define GRE_HEADER_SECTION 4
int gre_add_protocol(const struct gre_protocol *proto, u8 version)
{
@@ -126,6 +121,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
int ghl = GRE_HEADER_SECTION;
struct gre_base_hdr *greh;
int mac_len = skb->mac_len;
+ __be16 protocol = skb->protocol;
int tnl_hlen;
bool csum;
@@ -154,13 +150,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
csum = false;
/* setup inner skb. */
- if (greh->protocol == htons(ETH_P_TEB)) {
- struct ethhdr *eth = eth_hdr(skb);
- skb->protocol = eth->h_proto;
- } else {
- skb->protocol = greh->protocol;
- }
-
+ skb->protocol = greh->protocol;
skb->encapsulation = 0;
if (unlikely(!pskb_may_pull(skb, ghl)))
@@ -188,7 +178,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
err = __skb_linearize(skb);
if (err) {
- kfree_skb(segs);
+ kfree_skb_list(segs);
segs = ERR_PTR(err);
goto out;
}
@@ -204,6 +194,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
skb_reset_mac_header(skb);
skb_set_network_header(skb, mac_len);
skb->mac_len = mac_len;
+ skb->protocol = protocol;
} while ((skb = skb->next));
out:
return segs;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 3ac5dff79627..76e10b47e053 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -881,7 +881,7 @@ int icmp_rcv(struct sk_buff *skb)
case CHECKSUM_NONE:
skb->csum = 0;
if (__skb_checksum_complete(skb))
- goto error;
+ goto csum_error;
}
if (!pskb_pull(skb, sizeof(*icmph)))
@@ -929,6 +929,8 @@ int icmp_rcv(struct sk_buff *skb)
drop:
kfree_skb(skb);
return 0;
+csum_error:
+ ICMP_INC_STATS_BH(net, ICMP_MIB_CSUMERRORS);
error:
ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
goto drop;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 786d97aee751..6acb541c9091 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -559,7 +559,7 @@ static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req)
{
- int err = req->rsk_ops->rtx_syn_ack(parent, req, NULL);
+ int err = req->rsk_ops->rtx_syn_ack(parent, req);
if (!err)
req->num_retrans++;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 7afa2c3c788f..5f648751fce2 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -158,7 +158,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
#define EXPIRES_IN_MS(tmo) DIV_ROUND_UP((tmo - jiffies) * 1000, HZ)
- if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
+ if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
+ icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
+ icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
r->idiag_timer = 1;
r->idiag_retrans = icsk->icsk_retransmits;
r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout);
@@ -322,7 +324,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s
}
err = sk_diag_fill(sk, rep, req,
- sk_user_ns(NETLINK_CB(in_skb).ssk),
+ sk_user_ns(NETLINK_CB(in_skb).sk),
NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, 0, nlh);
if (err < 0) {
@@ -628,7 +630,7 @@ static int inet_csk_diag_dump(struct sock *sk,
return 0;
return inet_csk_diag_fill(sk, skb, r,
- sk_user_ns(NETLINK_CB(cb->skb).ssk),
+ sk_user_ns(NETLINK_CB(cb->skb).sk),
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
}
@@ -803,7 +805,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
}
err = inet_diag_fill_req(skb, sk, req,
- sk_user_ns(NETLINK_CB(cb->skb).ssk),
+ sk_user_ns(NETLINK_CB(cb->skb).sk),
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, cb->nlh);
if (err < 0) {
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index f4fd23de9b13..cec539458307 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -23,6 +23,28 @@
#include <net/sock.h>
#include <net/inet_frag.h>
+#include <net/inet_ecn.h>
+
+/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
+ * Value : 0xff if frame should be dropped.
+ * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
+ */
+const u8 ip_frag_ecn_table[16] = {
+ /* at least one fragment had CE, and others ECT_0 or ECT_1 */
+ [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE,
+ [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
+ [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
+
+ /* invalid combinations : drop frame */
+ [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff,
+ [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff,
+ [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff,
+ [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
+ [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff,
+ [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff,
+ [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
+};
+EXPORT_SYMBOL(ip_frag_ecn_table);
static void inet_frag_secret_rebuild(unsigned long dummy)
{
@@ -30,20 +52,27 @@ static void inet_frag_secret_rebuild(unsigned long dummy)
unsigned long now = jiffies;
int i;
+ /* Per bucket lock NOT needed here, due to write lock protection */
write_lock(&f->lock);
+
get_random_bytes(&f->rnd, sizeof(u32));
for (i = 0; i < INETFRAGS_HASHSZ; i++) {
+ struct inet_frag_bucket *hb;
struct inet_frag_queue *q;
struct hlist_node *n;
- hlist_for_each_entry_safe(q, n, &f->hash[i], list) {
+ hb = &f->hash[i];
+ hlist_for_each_entry_safe(q, n, &hb->chain, list) {
unsigned int hval = f->hashfn(q);
if (hval != i) {
+ struct inet_frag_bucket *hb_dest;
+
hlist_del(&q->list);
/* Relink to new hash chain. */
- hlist_add_head(&q->list, &f->hash[hval]);
+ hb_dest = &f->hash[hval];
+ hlist_add_head(&q->list, &hb_dest->chain);
}
}
}
@@ -56,12 +85,15 @@ void inet_frags_init(struct inet_frags *f)
{
int i;
- for (i = 0; i < INETFRAGS_HASHSZ; i++)
- INIT_HLIST_HEAD(&f->hash[i]);
+ for (i = 0; i < INETFRAGS_HASHSZ; i++) {
+ struct inet_frag_bucket *hb = &f->hash[i];
+ spin_lock_init(&hb->chain_lock);
+ INIT_HLIST_HEAD(&hb->chain);
+ }
rwlock_init(&f->lock);
- f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
+ f->rnd = (u32) ((totalram_pages ^ (totalram_pages >> 7)) ^
(jiffies ^ (jiffies >> 6)));
setup_timer(&f->secret_timer, inet_frag_secret_rebuild,
@@ -100,10 +132,18 @@ EXPORT_SYMBOL(inet_frags_exit_net);
static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
{
- write_lock(&f->lock);
+ struct inet_frag_bucket *hb;
+ unsigned int hash;
+
+ read_lock(&f->lock);
+ hash = f->hashfn(fq);
+ hb = &f->hash[hash];
+
+ spin_lock(&hb->chain_lock);
hlist_del(&fq->list);
- fq->net->nqueues--;
- write_unlock(&f->lock);
+ spin_unlock(&hb->chain_lock);
+
+ read_unlock(&f->lock);
inet_frag_lru_del(fq);
}
@@ -182,6 +222,9 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force)
q = list_first_entry(&nf->lru_list,
struct inet_frag_queue, lru_list);
atomic_inc(&q->refcnt);
+ /* Remove q from list to avoid several CPUs grabbing it */
+ list_del_init(&q->lru_list);
+
spin_unlock(&nf->lru_lock);
spin_lock(&q->lock);
@@ -202,27 +245,32 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
struct inet_frag_queue *qp_in, struct inet_frags *f,
void *arg)
{
+ struct inet_frag_bucket *hb;
struct inet_frag_queue *qp;
#ifdef CONFIG_SMP
#endif
unsigned int hash;
- write_lock(&f->lock);
+ read_lock(&f->lock); /* Protects against hash rebuild */
/*
* While we stayed w/o the lock other CPU could update
* the rnd seed, so we need to re-calculate the hash
* chain. Fortunatelly the qp_in can be used to get one.
*/
hash = f->hashfn(qp_in);
+ hb = &f->hash[hash];
+ spin_lock(&hb->chain_lock);
+
#ifdef CONFIG_SMP
/* With SMP race we have to recheck hash table, because
* such entry could be created on other cpu, while we
- * promoted read lock to write lock.
+ * released the hash bucket lock.
*/
- hlist_for_each_entry(qp, &f->hash[hash], list) {
+ hlist_for_each_entry(qp, &hb->chain, list) {
if (qp->net == nf && f->match(qp, arg)) {
atomic_inc(&qp->refcnt);
- write_unlock(&f->lock);
+ spin_unlock(&hb->chain_lock);
+ read_unlock(&f->lock);
qp_in->last_in |= INET_FRAG_COMPLETE;
inet_frag_put(qp_in, f);
return qp;
@@ -234,9 +282,9 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
atomic_inc(&qp->refcnt);
atomic_inc(&qp->refcnt);
- hlist_add_head(&qp->list, &f->hash[hash]);
- nf->nqueues++;
- write_unlock(&f->lock);
+ hlist_add_head(&qp->list, &hb->chain);
+ spin_unlock(&hb->chain_lock);
+ read_unlock(&f->lock);
inet_frag_lru_add(nf, qp);
return qp;
}
@@ -257,6 +305,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
spin_lock_init(&q->lock);
atomic_set(&q->refcnt, 1);
+ INIT_LIST_HEAD(&q->lru_list);
return q;
}
@@ -277,17 +326,23 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
struct inet_frags *f, void *key, unsigned int hash)
__releases(&f->lock)
{
+ struct inet_frag_bucket *hb;
struct inet_frag_queue *q;
int depth = 0;
- hlist_for_each_entry(q, &f->hash[hash], list) {
+ hb = &f->hash[hash];
+
+ spin_lock(&hb->chain_lock);
+ hlist_for_each_entry(q, &hb->chain, list) {
if (q->net == nf && f->match(q, key)) {
atomic_inc(&q->refcnt);
+ spin_unlock(&hb->chain_lock);
read_unlock(&f->lock);
return q;
}
depth++;
}
+ spin_unlock(&hb->chain_lock);
read_unlock(&f->lock);
if (depth <= INETFRAGS_MAXDEPTH)
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
index cc280a3f4f96..1975f52933c5 100644
--- a/net/ipv4/inet_lro.c
+++ b/net/ipv4/inet_lro.c
@@ -29,6 +29,7 @@
#include <linux/module.h>
#include <linux/if_vlan.h>
#include <linux/inet_lro.h>
+#include <net/checksum.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>");
@@ -114,11 +115,9 @@ static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
*(p+2) = lro_desc->tcp_rcv_tsecr;
}
+ csum_replace2(&iph->check, iph->tot_len, htons(lro_desc->ip_tot_len));
iph->tot_len = htons(lro_desc->ip_tot_len);
- iph->check = 0;
- iph->check = ip_fast_csum((u8 *)lro_desc->iph, iph->ihl);
-
tcph->check = 0;
tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0);
lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 52c273ea05c3..b66910aaef4d 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -79,40 +79,11 @@ struct ipq {
struct inet_peer *peer;
};
-/* RFC 3168 support :
- * We want to check ECN values of all fragments, do detect invalid combinations.
- * In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value.
- */
-#define IPFRAG_ECN_NOT_ECT 0x01 /* one frag had ECN_NOT_ECT */
-#define IPFRAG_ECN_ECT_1 0x02 /* one frag had ECN_ECT_1 */
-#define IPFRAG_ECN_ECT_0 0x04 /* one frag had ECN_ECT_0 */
-#define IPFRAG_ECN_CE 0x08 /* one frag had ECN_CE */
-
static inline u8 ip4_frag_ecn(u8 tos)
{
return 1 << (tos & INET_ECN_MASK);
}
-/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
- * Value : 0xff if frame should be dropped.
- * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
- */
-static const u8 ip4_frag_ecn_table[16] = {
- /* at least one fragment had CE, and others ECT_0 or ECT_1 */
- [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE,
- [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
- [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
-
- /* invalid combinations : drop frame */
- [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff,
- [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff,
- [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff,
- [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
- [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff,
- [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff,
- [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
-};
-
static struct inet_frags ip4_frags;
int ip_frag_nqueues(struct net *net)
@@ -557,7 +528,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
ipq_kill(qp);
- ecn = ip4_frag_ecn_table[qp->ecn];
+ ecn = ip_frag_ecn_table[qp->ecn];
if (unlikely(ecn == 0xff)) {
err = -EINVAL;
goto out_fail;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 91d66dbde9c0..2a83591492dd 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -37,7 +37,7 @@
#include <net/ip.h>
#include <net/icmp.h>
#include <net/protocol.h>
-#include <net/ipip.h>
+#include <net/ip_tunnels.h>
#include <net/arp.h>
#include <net/checksum.h>
#include <net/dsfield.h>
@@ -108,15 +108,6 @@
fatal route to network, even if it were you who configured
fatal static route: you are innocent. :-)
-
-
- 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
- practically identical code. It would be good to glue them
- together, but it is not very evident, how to make them modular.
- sit is integral part of IPv6, ipip and gre are naturally modular.
- We could extract common parts (hash table, ioctl etc)
- to a separate module (ip_tunnel.c).
-
Alexey Kuznetsov.
*/
@@ -126,400 +117,137 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
static struct rtnl_link_ops ipgre_link_ops __read_mostly;
static int ipgre_tunnel_init(struct net_device *dev);
-static void ipgre_tunnel_setup(struct net_device *dev);
-static int ipgre_tunnel_bind_dev(struct net_device *dev);
-
-/* Fallback tunnel: no source, no destination, no key, no options */
-
-#define HASH_SIZE 16
static int ipgre_net_id __read_mostly;
-struct ipgre_net {
- struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
-
- struct net_device *fb_tunnel_dev;
-};
-
-/* Tunnel hash table */
-
-/*
- 4 hash tables:
-
- 3: (remote,local)
- 2: (remote,*)
- 1: (*,local)
- 0: (*,*)
+static int gre_tap_net_id __read_mostly;
- We require exact key match i.e. if a key is present in packet
- it will match only tunnel with the same key; if it is not present,
- it will match only keyless tunnel.
-
- All keysless packets, if not matched configured keyless tunnels
- will match fallback tunnel.
- */
+static __sum16 check_checksum(struct sk_buff *skb)
+{
+ __sum16 csum = 0;
-#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
+ switch (skb->ip_summed) {
+ case CHECKSUM_COMPLETE:
+ csum = csum_fold(skb->csum);
-#define tunnels_r_l tunnels[3]
-#define tunnels_r tunnels[2]
-#define tunnels_l tunnels[1]
-#define tunnels_wc tunnels[0]
+ if (!csum)
+ break;
+ /* Fall through. */
-static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
- struct rtnl_link_stats64 *tot)
-{
- int i;
-
- for_each_possible_cpu(i) {
- const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
- u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
- unsigned int start;
-
- do {
- start = u64_stats_fetch_begin_bh(&tstats->syncp);
- rx_packets = tstats->rx_packets;
- tx_packets = tstats->tx_packets;
- rx_bytes = tstats->rx_bytes;
- tx_bytes = tstats->tx_bytes;
- } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
-
- tot->rx_packets += rx_packets;
- tot->tx_packets += tx_packets;
- tot->rx_bytes += rx_bytes;
- tot->tx_bytes += tx_bytes;
+ case CHECKSUM_NONE:
+ skb->csum = 0;
+ csum = __skb_checksum_complete(skb);
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ break;
}
- tot->multicast = dev->stats.multicast;
- tot->rx_crc_errors = dev->stats.rx_crc_errors;
- tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
- tot->rx_length_errors = dev->stats.rx_length_errors;
- tot->rx_frame_errors = dev->stats.rx_frame_errors;
- tot->rx_errors = dev->stats.rx_errors;
-
- tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
- tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
- tot->tx_dropped = dev->stats.tx_dropped;
- tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
- tot->tx_errors = dev->stats.tx_errors;
-
- return tot;
+ return csum;
}
-/* Does key in tunnel parameters match packet */
-static bool ipgre_key_match(const struct ip_tunnel_parm *p,
- __be16 flags, __be32 key)
+static int ip_gre_calc_hlen(__be16 o_flags)
{
- if (p->i_flags & GRE_KEY) {
- if (flags & GRE_KEY)
- return key == p->i_key;
- else
- return false; /* key expected, none present */
- } else
- return !(flags & GRE_KEY);
+ int addend = 4;
+
+ if (o_flags&TUNNEL_CSUM)
+ addend += 4;
+ if (o_flags&TUNNEL_KEY)
+ addend += 4;
+ if (o_flags&TUNNEL_SEQ)
+ addend += 4;
+ return addend;
}
-/* Given src, dst and key, find appropriate for input tunnel. */
-
-static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
- __be32 remote, __be32 local,
- __be16 flags, __be32 key,
- __be16 gre_proto)
+static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
+ bool *csum_err, int *hdr_len)
{
- struct net *net = dev_net(dev);
- int link = dev->ifindex;
- unsigned int h0 = HASH(remote);
- unsigned int h1 = HASH(key);
- struct ip_tunnel *t, *cand = NULL;
- struct ipgre_net *ign = net_generic(net, ipgre_net_id);
- int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
- ARPHRD_ETHER : ARPHRD_IPGRE;
- int score, cand_score = 4;
-
- for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) {
- if (local != t->parms.iph.saddr ||
- remote != t->parms.iph.daddr ||
- !(t->dev->flags & IFF_UP))
- continue;
-
- if (!ipgre_key_match(&t->parms, flags, key))
- continue;
-
- if (t->dev->type != ARPHRD_IPGRE &&
- t->dev->type != dev_type)
- continue;
-
- score = 0;
- if (t->parms.link != link)
- score |= 1;
- if (t->dev->type != dev_type)
- score |= 2;
- if (score == 0)
- return t;
-
- if (score < cand_score) {
- cand = t;
- cand_score = score;
- }
- }
+ unsigned int ip_hlen = ip_hdrlen(skb);
+ const struct gre_base_hdr *greh;
+ __be32 *options;
- for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) {
- if (remote != t->parms.iph.daddr ||
- !(t->dev->flags & IFF_UP))
- continue;
-
- if (!ipgre_key_match(&t->parms, flags, key))
- continue;
-
- if (t->dev->type != ARPHRD_IPGRE &&
- t->dev->type != dev_type)
- continue;
-
- score = 0;
- if (t->parms.link != link)
- score |= 1;
- if (t->dev->type != dev_type)
- score |= 2;
- if (score == 0)
- return t;
-
- if (score < cand_score) {
- cand = t;
- cand_score = score;
- }
- }
+ if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
+ return -EINVAL;
- for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) {
- if ((local != t->parms.iph.saddr &&
- (local != t->parms.iph.daddr ||
- !ipv4_is_multicast(local))) ||
- !(t->dev->flags & IFF_UP))
- continue;
-
- if (!ipgre_key_match(&t->parms, flags, key))
- continue;
-
- if (t->dev->type != ARPHRD_IPGRE &&
- t->dev->type != dev_type)
- continue;
-
- score = 0;
- if (t->parms.link != link)
- score |= 1;
- if (t->dev->type != dev_type)
- score |= 2;
- if (score == 0)
- return t;
-
- if (score < cand_score) {
- cand = t;
- cand_score = score;
- }
- }
+ greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
+ if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
+ return -EINVAL;
- for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) {
- if (t->parms.i_key != key ||
- !(t->dev->flags & IFF_UP))
- continue;
-
- if (t->dev->type != ARPHRD_IPGRE &&
- t->dev->type != dev_type)
- continue;
-
- score = 0;
- if (t->parms.link != link)
- score |= 1;
- if (t->dev->type != dev_type)
- score |= 2;
- if (score == 0)
- return t;
-
- if (score < cand_score) {
- cand = t;
- cand_score = score;
- }
- }
+ tpi->flags = gre_flags_to_tnl_flags(greh->flags);
+ *hdr_len = ip_gre_calc_hlen(tpi->flags);
- if (cand != NULL)
- return cand;
+ if (!pskb_may_pull(skb, *hdr_len))
+ return -EINVAL;
- dev = ign->fb_tunnel_dev;
- if (dev->flags & IFF_UP)
- return netdev_priv(dev);
+ greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
- return NULL;
-}
+ tpi->proto = greh->protocol;
-static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign,
- struct ip_tunnel_parm *parms)
-{
- __be32 remote = parms->iph.daddr;
- __be32 local = parms->iph.saddr;
- __be32 key = parms->i_key;
- unsigned int h = HASH(key);
- int prio = 0;
-
- if (local)
- prio |= 1;
- if (remote && !ipv4_is_multicast(remote)) {
- prio |= 2;
- h ^= HASH(remote);
+ options = (__be32 *)(greh + 1);
+ if (greh->flags & GRE_CSUM) {
+ if (check_checksum(skb)) {
+ *csum_err = true;
+ return -EINVAL;
+ }
+ options++;
}
- return &ign->tunnels[prio][h];
-}
-
-static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign,
- struct ip_tunnel *t)
-{
- return __ipgre_bucket(ign, &t->parms);
-}
-
-static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
-{
- struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
+ if (greh->flags & GRE_KEY) {
+ tpi->key = *options;
+ options++;
+ } else
+ tpi->key = 0;
- rcu_assign_pointer(t->next, rtnl_dereference(*tp));
- rcu_assign_pointer(*tp, t);
-}
+ if (unlikely(greh->flags & GRE_SEQ)) {
+ tpi->seq = *options;
+ options++;
+ } else
+ tpi->seq = 0;
-static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
-{
- struct ip_tunnel __rcu **tp;
- struct ip_tunnel *iter;
-
- for (tp = ipgre_bucket(ign, t);
- (iter = rtnl_dereference(*tp)) != NULL;
- tp = &iter->next) {
- if (t == iter) {
- rcu_assign_pointer(*tp, t->next);
- break;
+ /* WCCP version 1 and 2 protocol decoding.
+ * - Change protocol to IP
+ * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
+ */
+ if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
+ tpi->proto = htons(ETH_P_IP);
+ if ((*(u8 *)options & 0xF0) != 0x40) {
+ *hdr_len += 4;
+ if (!pskb_may_pull(skb, *hdr_len))
+ return -EINVAL;
}
}
-}
-
-static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
- struct ip_tunnel_parm *parms,
- int type)
-{
- __be32 remote = parms->iph.daddr;
- __be32 local = parms->iph.saddr;
- __be32 key = parms->i_key;
- int link = parms->link;
- struct ip_tunnel *t;
- struct ip_tunnel __rcu **tp;
- struct ipgre_net *ign = net_generic(net, ipgre_net_id);
-
- for (tp = __ipgre_bucket(ign, parms);
- (t = rtnl_dereference(*tp)) != NULL;
- tp = &t->next)
- if (local == t->parms.iph.saddr &&
- remote == t->parms.iph.daddr &&
- key == t->parms.i_key &&
- link == t->parms.link &&
- type == t->dev->type)
- break;
-
- return t;
-}
-
-static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
- struct ip_tunnel_parm *parms, int create)
-{
- struct ip_tunnel *t, *nt;
- struct net_device *dev;
- char name[IFNAMSIZ];
- struct ipgre_net *ign = net_generic(net, ipgre_net_id);
-
- t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
- if (t || !create)
- return t;
-
- if (parms->name[0])
- strlcpy(name, parms->name, IFNAMSIZ);
- else
- strcpy(name, "gre%d");
-
- dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
- if (!dev)
- return NULL;
-
- dev_net_set(dev, net);
-
- nt = netdev_priv(dev);
- nt->parms = *parms;
- dev->rtnl_link_ops = &ipgre_link_ops;
-
- dev->mtu = ipgre_tunnel_bind_dev(dev);
-
- if (register_netdevice(dev) < 0)
- goto failed_free;
- /* Can use a lockless transmit, unless we generate output sequences */
- if (!(nt->parms.o_flags & GRE_SEQ))
- dev->features |= NETIF_F_LLTX;
-
- dev_hold(dev);
- ipgre_tunnel_link(ign, nt);
- return nt;
-
-failed_free:
- free_netdev(dev);
- return NULL;
-}
-
-static void ipgre_tunnel_uninit(struct net_device *dev)
-{
- struct net *net = dev_net(dev);
- struct ipgre_net *ign = net_generic(net, ipgre_net_id);
-
- ipgre_tunnel_unlink(ign, netdev_priv(dev));
- dev_put(dev);
+ return 0;
}
-
static void ipgre_err(struct sk_buff *skb, u32 info)
{
-/* All the routers (except for Linux) return only
- 8 bytes of packet payload. It means, that precise relaying of
- ICMP in the real Internet is absolutely infeasible.
-
- Moreover, Cisco "wise men" put GRE key to the third word
- in GRE header. It makes impossible maintaining even soft state for keyed
- GRE tunnels with enabled checksum. Tell them "thank you".
-
- Well, I wonder, rfc1812 was written by Cisco employee,
- what the hell these idiots break standards established
- by themselves???
- */
-
- const struct iphdr *iph = (const struct iphdr *)skb->data;
- __be16 *p = (__be16 *)(skb->data+(iph->ihl<<2));
- int grehlen = (iph->ihl<<2) + 4;
+ /* All the routers (except for Linux) return only
+ 8 bytes of packet payload. It means, that precise relaying of
+ ICMP in the real Internet is absolutely infeasible.
+
+ Moreover, Cisco "wise men" put GRE key to the third word
+ in GRE header. It makes impossible maintaining even soft
+ state for keyed GRE tunnels with enabled checksum. Tell
+ them "thank you".
+
+ Well, I wonder, rfc1812 was written by Cisco employee,
+ what the hell these idiots break standards established
+ by themselves???
+ */
+ struct net *net = dev_net(skb->dev);
+ struct ip_tunnel_net *itn;
+ const struct iphdr *iph;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
struct ip_tunnel *t;
- __be16 flags;
- __be32 key = 0;
+ struct tnl_ptk_info tpi;
+ int hdr_len;
+ bool csum_err = false;
- flags = p[0];
- if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
- if (flags&(GRE_VERSION|GRE_ROUTING))
+ if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len)) {
+ if (!csum_err) /* ignore csum errors. */
return;
- if (flags&GRE_KEY) {
- grehlen += 4;
- if (flags&GRE_CSUM)
- grehlen += 4;
- }
}
- /* If only 8 bytes returned, keyed message will be dropped here */
- if (skb_headlen(skb) < grehlen)
- return;
-
- if (flags & GRE_KEY)
- key = *(((__be32 *)p) + (grehlen / 4) - 1);
-
switch (type) {
default:
case ICMP_PARAMETERPROB:
@@ -548,8 +276,14 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
break;
}
- t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
- flags, key, p[1]);
+ if (tpi.proto == htons(ETH_P_TEB))
+ itn = net_generic(net, gre_tap_net_id);
+ else
+ itn = net_generic(net, ipgre_net_id);
+
+ iph = (const struct iphdr *)skb->data;
+ t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
+ iph->daddr, iph->saddr, tpi.key);
if (t == NULL)
return;
@@ -578,158 +312,33 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
t->err_time = jiffies;
}
-static inline u8
-ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
-{
- u8 inner = 0;
- if (skb->protocol == htons(ETH_P_IP))
- inner = old_iph->tos;
- else if (skb->protocol == htons(ETH_P_IPV6))
- inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
- return INET_ECN_encapsulate(tos, inner);
-}
-
static int ipgre_rcv(struct sk_buff *skb)
{
+ struct net *net = dev_net(skb->dev);
+ struct ip_tunnel_net *itn;
const struct iphdr *iph;
- u8 *h;
- __be16 flags;
- __sum16 csum = 0;
- __be32 key = 0;
- u32 seqno = 0;
struct ip_tunnel *tunnel;
- int offset = 4;
- __be16 gre_proto;
- int err;
+ struct tnl_ptk_info tpi;
+ int hdr_len;
+ bool csum_err = false;
- if (!pskb_may_pull(skb, 16))
+ if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len) < 0)
goto drop;
- iph = ip_hdr(skb);
- h = skb->data;
- flags = *(__be16 *)h;
-
- if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
- /* - Version must be 0.
- - We do not support routing headers.
- */
- if (flags&(GRE_VERSION|GRE_ROUTING))
- goto drop;
-
- if (flags&GRE_CSUM) {
- switch (skb->ip_summed) {
- case CHECKSUM_COMPLETE:
- csum = csum_fold(skb->csum);
- if (!csum)
- break;
- /* fall through */
- case CHECKSUM_NONE:
- skb->csum = 0;
- csum = __skb_checksum_complete(skb);
- skb->ip_summed = CHECKSUM_COMPLETE;
- }
- offset += 4;
- }
- if (flags&GRE_KEY) {
- key = *(__be32 *)(h + offset);
- offset += 4;
- }
- if (flags&GRE_SEQ) {
- seqno = ntohl(*(__be32 *)(h + offset));
- offset += 4;
- }
- }
+ if (tpi.proto == htons(ETH_P_TEB))
+ itn = net_generic(net, gre_tap_net_id);
+ else
+ itn = net_generic(net, ipgre_net_id);
- gre_proto = *(__be16 *)(h + 2);
+ iph = ip_hdr(skb);
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
+ iph->saddr, iph->daddr, tpi.key);
- tunnel = ipgre_tunnel_lookup(skb->dev,
- iph->saddr, iph->daddr, flags, key,
- gre_proto);
if (tunnel) {
- struct pcpu_tstats *tstats;
-
- secpath_reset(skb);
-
- skb->protocol = gre_proto;
- /* WCCP version 1 and 2 protocol decoding.
- * - Change protocol to IP
- * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
- */
- if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
- skb->protocol = htons(ETH_P_IP);
- if ((*(h + offset) & 0xF0) != 0x40)
- offset += 4;
- }
-
- skb->mac_header = skb->network_header;
- __pskb_pull(skb, offset);
- skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
- skb->pkt_type = PACKET_HOST;
-#ifdef CONFIG_NET_IPGRE_BROADCAST
- if (ipv4_is_multicast(iph->daddr)) {
- /* Looped back packet, drop it! */
- if (rt_is_output_route(skb_rtable(skb)))
- goto drop;
- tunnel->dev->stats.multicast++;
- skb->pkt_type = PACKET_BROADCAST;
- }
-#endif
-
- if (((flags&GRE_CSUM) && csum) ||
- (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
- tunnel->dev->stats.rx_crc_errors++;
- tunnel->dev->stats.rx_errors++;
- goto drop;
- }
- if (tunnel->parms.i_flags&GRE_SEQ) {
- if (!(flags&GRE_SEQ) ||
- (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
- tunnel->dev->stats.rx_fifo_errors++;
- tunnel->dev->stats.rx_errors++;
- goto drop;
- }
- tunnel->i_seqno = seqno + 1;
- }
-
- /* Warning: All skb pointers will be invalidated! */
- if (tunnel->dev->type == ARPHRD_ETHER) {
- if (!pskb_may_pull(skb, ETH_HLEN)) {
- tunnel->dev->stats.rx_length_errors++;
- tunnel->dev->stats.rx_errors++;
- goto drop;
- }
-
- iph = ip_hdr(skb);
- skb->protocol = eth_type_trans(skb, tunnel->dev);
- skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
- }
-
- __skb_tunnel_rx(skb, tunnel->dev);
-
- skb_reset_network_header(skb);
- err = IP_ECN_decapsulate(iph, skb);
- if (unlikely(err)) {
- if (log_ecn_error)
- net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
- &iph->saddr, iph->tos);
- if (err > 1) {
- ++tunnel->dev->stats.rx_frame_errors;
- ++tunnel->dev->stats.rx_errors;
- goto drop;
- }
- }
-
- tstats = this_cpu_ptr(tunnel->dev->tstats);
- u64_stats_update_begin(&tstats->syncp);
- tstats->rx_packets++;
- tstats->rx_bytes += skb->len;
- u64_stats_update_end(&tstats->syncp);
-
- gro_cells_receive(&tunnel->gro_cells, skb);
+ ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
return 0;
}
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
-
drop:
kfree_skb(skb);
return 0;
@@ -746,7 +355,7 @@ static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff
skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
return skb;
} else if (skb->ip_summed == CHECKSUM_PARTIAL &&
- tunnel->parms.o_flags&GRE_CSUM) {
+ tunnel->parms.o_flags&TUNNEL_CSUM) {
err = skb_checksum_help(skb);
if (unlikely(err))
goto error;
@@ -760,494 +369,157 @@ error:
return ERR_PTR(err);
}
-static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+static struct sk_buff *gre_build_header(struct sk_buff *skb,
+ const struct tnl_ptk_info *tpi,
+ int hdr_len)
{
- struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats);
- struct ip_tunnel *tunnel = netdev_priv(dev);
- const struct iphdr *old_iph;
- const struct iphdr *tiph;
- struct flowi4 fl4;
- u8 tos;
- __be16 df;
- struct rtable *rt; /* Route to the other host */
- struct net_device *tdev; /* Device to other host */
- struct iphdr *iph; /* Our new IP header */
- unsigned int max_headroom; /* The extra header space needed */
- int gre_hlen;
- __be32 dst;
- int mtu;
- u8 ttl;
- int err;
- int pkt_len;
-
- skb = handle_offloads(tunnel, skb);
- if (IS_ERR(skb)) {
- dev->stats.tx_dropped++;
- return NETDEV_TX_OK;
- }
+ struct gre_base_hdr *greh;
- if (!skb->encapsulation) {
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
- }
+ skb_push(skb, hdr_len);
- old_iph = ip_hdr(skb);
+ greh = (struct gre_base_hdr *)skb->data;
+ greh->flags = tnl_flags_to_gre_flags(tpi->flags);
+ greh->protocol = tpi->proto;
- if (dev->type == ARPHRD_ETHER)
- IPCB(skb)->flags = 0;
+ if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
+ __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
- if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
- gre_hlen = 0;
- tiph = (const struct iphdr *)skb->data;
- } else {
- gre_hlen = tunnel->hlen;
- tiph = &tunnel->parms.iph;
- }
-
- if ((dst = tiph->daddr) == 0) {
- /* NBMA tunnel */
-
- if (skb_dst(skb) == NULL) {
- dev->stats.tx_fifo_errors++;
- goto tx_error;
+ if (tpi->flags&TUNNEL_SEQ) {
+ *ptr = tpi->seq;
+ ptr--;
}
-
- if (skb->protocol == htons(ETH_P_IP)) {
- rt = skb_rtable(skb);
- dst = rt_nexthop(rt, old_iph->daddr);
+ if (tpi->flags&TUNNEL_KEY) {
+ *ptr = tpi->key;
+ ptr--;
}
-#if IS_ENABLED(CONFIG_IPV6)
- else if (skb->protocol == htons(ETH_P_IPV6)) {
- const struct in6_addr *addr6;
- struct neighbour *neigh;
- bool do_tx_error_icmp;
- int addr_type;
-
- neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr);
- if (neigh == NULL)
- goto tx_error;
-
- addr6 = (const struct in6_addr *)&neigh->primary_key;
- addr_type = ipv6_addr_type(addr6);
-
- if (addr_type == IPV6_ADDR_ANY) {
- addr6 = &ipv6_hdr(skb)->daddr;
- addr_type = ipv6_addr_type(addr6);
- }
-
- if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
- do_tx_error_icmp = true;
- else {
- do_tx_error_icmp = false;
- dst = addr6->s6_addr32[3];
- }
- neigh_release(neigh);
- if (do_tx_error_icmp)
- goto tx_error_icmp;
+ if (tpi->flags&TUNNEL_CSUM &&
+ !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) {
+ *(__sum16 *)ptr = 0;
+ *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
+ skb->len, 0));
}
-#endif
- else
- goto tx_error;
}
- ttl = tiph->ttl;
- tos = tiph->tos;
- if (tos & 0x1) {
- tos &= ~0x1;
- if (skb->protocol == htons(ETH_P_IP))
- tos = old_iph->tos;
- else if (skb->protocol == htons(ETH_P_IPV6))
- tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
- }
+ return skb;
+}
- rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr,
- tunnel->parms.o_key, RT_TOS(tos),
- tunnel->parms.link);
- if (IS_ERR(rt)) {
- dev->stats.tx_carrier_errors++;
- goto tx_error;
- }
- tdev = rt->dst.dev;
+static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
+ const struct iphdr *tnl_params,
+ __be16 proto)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct tnl_ptk_info tpi;
- if (tdev == dev) {
- ip_rt_put(rt);
- dev->stats.collisions++;
- goto tx_error;
+ if (likely(!skb->encapsulation)) {
+ skb_reset_inner_headers(skb);
+ skb->encapsulation = 1;
}
- df = tiph->frag_off;
- if (df)
- mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
- else
- mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
-
- if (skb_dst(skb))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+ tpi.flags = tunnel->parms.o_flags;
+ tpi.proto = proto;
+ tpi.key = tunnel->parms.o_key;
+ if (tunnel->parms.o_flags & TUNNEL_SEQ)
+ tunnel->o_seqno++;
+ tpi.seq = htonl(tunnel->o_seqno);
- if (skb->protocol == htons(ETH_P_IP)) {
- df |= (old_iph->frag_off&htons(IP_DF));
-
- if (!skb_is_gso(skb) &&
- (old_iph->frag_off&htons(IP_DF)) &&
- mtu < ntohs(old_iph->tot_len)) {
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
- ip_rt_put(rt);
- goto tx_error;
- }
- }
-#if IS_ENABLED(CONFIG_IPV6)
- else if (skb->protocol == htons(ETH_P_IPV6)) {
- struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
-
- if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
- if ((tunnel->parms.iph.daddr &&
- !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
- rt6->rt6i_dst.plen == 128) {
- rt6->rt6i_flags |= RTF_MODIFIED;
- dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
- }
- }
-
- if (!skb_is_gso(skb) &&
- mtu >= IPV6_MIN_MTU &&
- mtu < skb->len - tunnel->hlen + gre_hlen) {
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- ip_rt_put(rt);
- goto tx_error;
- }
+ /* Push GRE header. */
+ skb = gre_build_header(skb, &tpi, tunnel->hlen);
+ if (unlikely(!skb)) {
+ dev->stats.tx_dropped++;
+ return;
}
-#endif
-
- if (tunnel->err_count > 0) {
- if (time_before(jiffies,
- tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
- tunnel->err_count--;
- dst_link_failure(skb);
- } else
- tunnel->err_count = 0;
- }
+ ip_tunnel_xmit(skb, dev, tnl_params);
+}
- max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
-
- if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
- (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
- struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
- if (max_headroom > dev->needed_headroom)
- dev->needed_headroom = max_headroom;
- if (!new_skb) {
- ip_rt_put(rt);
- dev->stats.tx_dropped++;
- dev_kfree_skb(skb);
- return NETDEV_TX_OK;
- }
- if (skb->sk)
- skb_set_owner_w(new_skb, skb->sk);
- dev_kfree_skb(skb);
- skb = new_skb;
- old_iph = ip_hdr(skb);
- /* Warning : tiph value might point to freed memory */
- }
+static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ const struct iphdr *tnl_params;
- skb_push(skb, gre_hlen);
- skb_reset_network_header(skb);
- skb_set_transport_header(skb, sizeof(*iph));
- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
- IPSKB_REROUTED);
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
-
- /*
- * Push down and install the IPIP header.
- */
+ skb = handle_offloads(tunnel, skb);
+ if (IS_ERR(skb))
+ goto out;
- iph = ip_hdr(skb);
- iph->version = 4;
- iph->ihl = sizeof(struct iphdr) >> 2;
- iph->frag_off = df;
- iph->protocol = IPPROTO_GRE;
- iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
- iph->daddr = fl4.daddr;
- iph->saddr = fl4.saddr;
- iph->ttl = ttl;
-
- tunnel_ip_select_ident(skb, old_iph, &rt->dst);
-
- if (ttl == 0) {
- if (skb->protocol == htons(ETH_P_IP))
- iph->ttl = old_iph->ttl;
-#if IS_ENABLED(CONFIG_IPV6)
- else if (skb->protocol == htons(ETH_P_IPV6))
- iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
-#endif
- else
- iph->ttl = ip4_dst_hoplimit(&rt->dst);
- }
+ if (dev->header_ops) {
+ /* Need space for new headers */
+ if (skb_cow_head(skb, dev->needed_headroom -
+ (tunnel->hlen + sizeof(struct iphdr))))
+ goto free_skb;
- ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
- ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
- htons(ETH_P_TEB) : skb->protocol;
+ tnl_params = (const struct iphdr *)skb->data;
- if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
- __be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4);
+ /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
+ * to gre header.
+ */
+ skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
+ } else {
+ if (skb_cow_head(skb, dev->needed_headroom))
+ goto free_skb;
- if (tunnel->parms.o_flags&GRE_SEQ) {
- ++tunnel->o_seqno;
- *ptr = htonl(tunnel->o_seqno);
- ptr--;
- }
- if (tunnel->parms.o_flags&GRE_KEY) {
- *ptr = tunnel->parms.o_key;
- ptr--;
- }
- /* Skip GRE checksum if skb is getting offloaded. */
- if (!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE) &&
- (tunnel->parms.o_flags&GRE_CSUM)) {
- int offset = skb_transport_offset(skb);
-
- if (skb_has_shared_frag(skb)) {
- err = __skb_linearize(skb);
- if (err)
- goto tx_error;
- }
-
- *ptr = 0;
- *(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset,
- skb->len - offset,
- 0));
- }
+ tnl_params = &tunnel->parms.iph;
}
- nf_reset(skb);
+ __gre_xmit(skb, dev, tnl_params, skb->protocol);
- pkt_len = skb->len - skb_transport_offset(skb);
- err = ip_local_out(skb);
- if (likely(net_xmit_eval(err) == 0)) {
- u64_stats_update_begin(&tstats->syncp);
- tstats->tx_bytes += pkt_len;
- tstats->tx_packets++;
- u64_stats_update_end(&tstats->syncp);
- } else {
- dev->stats.tx_errors++;
- dev->stats.tx_aborted_errors++;
- }
return NETDEV_TX_OK;
-#if IS_ENABLED(CONFIG_IPV6)
-tx_error_icmp:
- dst_link_failure(skb);
-#endif
-tx_error:
- dev->stats.tx_errors++;
+free_skb:
dev_kfree_skb(skb);
+out:
+ dev->stats.tx_dropped++;
return NETDEV_TX_OK;
}
-static int ipgre_tunnel_bind_dev(struct net_device *dev)
+static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
+ struct net_device *dev)
{
- struct net_device *tdev = NULL;
- struct ip_tunnel *tunnel;
- const struct iphdr *iph;
- int hlen = LL_MAX_HEADER;
- int mtu = ETH_DATA_LEN;
- int addend = sizeof(struct iphdr) + 4;
-
- tunnel = netdev_priv(dev);
- iph = &tunnel->parms.iph;
-
- /* Guess output device to choose reasonable mtu and needed_headroom */
-
- if (iph->daddr) {
- struct flowi4 fl4;
- struct rtable *rt;
-
- rt = ip_route_output_gre(dev_net(dev), &fl4,
- iph->daddr, iph->saddr,
- tunnel->parms.o_key,
- RT_TOS(iph->tos),
- tunnel->parms.link);
- if (!IS_ERR(rt)) {
- tdev = rt->dst.dev;
- ip_rt_put(rt);
- }
-
- if (dev->type != ARPHRD_ETHER)
- dev->flags |= IFF_POINTOPOINT;
- }
+ struct ip_tunnel *tunnel = netdev_priv(dev);
- if (!tdev && tunnel->parms.link)
- tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
+ skb = handle_offloads(tunnel, skb);
+ if (IS_ERR(skb))
+ goto out;
- if (tdev) {
- hlen = tdev->hard_header_len + tdev->needed_headroom;
- mtu = tdev->mtu;
- }
- dev->iflink = tunnel->parms.link;
-
- /* Precalculate GRE options length */
- if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
- if (tunnel->parms.o_flags&GRE_CSUM)
- addend += 4;
- if (tunnel->parms.o_flags&GRE_KEY)
- addend += 4;
- if (tunnel->parms.o_flags&GRE_SEQ)
- addend += 4;
- }
- dev->needed_headroom = addend + hlen;
- mtu -= dev->hard_header_len + addend;
+ if (skb_cow_head(skb, dev->needed_headroom))
+ goto free_skb;
- if (mtu < 68)
- mtu = 68;
+ __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
- tunnel->hlen = addend;
- /* TCP offload with GRE SEQ is not supported. */
- if (!(tunnel->parms.o_flags & GRE_SEQ)) {
- dev->features |= NETIF_F_GSO_SOFTWARE;
- dev->hw_features |= NETIF_F_GSO_SOFTWARE;
- }
+ return NETDEV_TX_OK;
- return mtu;
+free_skb:
+ dev_kfree_skb(skb);
+out:
+ dev->stats.tx_dropped++;
+ return NETDEV_TX_OK;
}
-static int
-ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
+static int ipgre_tunnel_ioctl(struct net_device *dev,
+ struct ifreq *ifr, int cmd)
{
int err = 0;
struct ip_tunnel_parm p;
- struct ip_tunnel *t;
- struct net *net = dev_net(dev);
- struct ipgre_net *ign = net_generic(net, ipgre_net_id);
-
- switch (cmd) {
- case SIOCGETTUNNEL:
- t = NULL;
- if (dev == ign->fb_tunnel_dev) {
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
- err = -EFAULT;
- break;
- }
- t = ipgre_tunnel_locate(net, &p, 0);
- }
- if (t == NULL)
- t = netdev_priv(dev);
- memcpy(&p, &t->parms, sizeof(p));
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
- err = -EFAULT;
- break;
- case SIOCADDTUNNEL:
- case SIOCCHGTUNNEL:
- err = -EPERM;
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- goto done;
-
- err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
- goto done;
-
- err = -EINVAL;
- if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
- p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
- ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
- goto done;
- if (p.iph.ttl)
- p.iph.frag_off |= htons(IP_DF);
-
- if (!(p.i_flags&GRE_KEY))
- p.i_key = 0;
- if (!(p.o_flags&GRE_KEY))
- p.o_key = 0;
-
- t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
-
- if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
- if (t != NULL) {
- if (t->dev != dev) {
- err = -EEXIST;
- break;
- }
- } else {
- unsigned int nflags = 0;
-
- t = netdev_priv(dev);
-
- if (ipv4_is_multicast(p.iph.daddr))
- nflags = IFF_BROADCAST;
- else if (p.iph.daddr)
- nflags = IFF_POINTOPOINT;
-
- if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
- err = -EINVAL;
- break;
- }
- ipgre_tunnel_unlink(ign, t);
- synchronize_net();
- t->parms.iph.saddr = p.iph.saddr;
- t->parms.iph.daddr = p.iph.daddr;
- t->parms.i_key = p.i_key;
- t->parms.o_key = p.o_key;
- memcpy(dev->dev_addr, &p.iph.saddr, 4);
- memcpy(dev->broadcast, &p.iph.daddr, 4);
- ipgre_tunnel_link(ign, t);
- netdev_state_change(dev);
- }
- }
-
- if (t) {
- err = 0;
- if (cmd == SIOCCHGTUNNEL) {
- t->parms.iph.ttl = p.iph.ttl;
- t->parms.iph.tos = p.iph.tos;
- t->parms.iph.frag_off = p.iph.frag_off;
- if (t->parms.link != p.link) {
- t->parms.link = p.link;
- dev->mtu = ipgre_tunnel_bind_dev(dev);
- netdev_state_change(dev);
- }
- }
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
- err = -EFAULT;
- } else
- err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
- break;
-
- case SIOCDELTUNNEL:
- err = -EPERM;
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- goto done;
-
- if (dev == ign->fb_tunnel_dev) {
- err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
- goto done;
- err = -ENOENT;
- if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
- goto done;
- err = -EPERM;
- if (t == netdev_priv(ign->fb_tunnel_dev))
- goto done;
- dev = t->dev;
- }
- unregister_netdevice(dev);
- err = 0;
- break;
-
- default:
- err = -EINVAL;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ return -EFAULT;
+ if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
+ p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
+ ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) {
+ return -EINVAL;
}
+ p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
+ p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
-done:
- return err;
-}
+ err = ip_tunnel_ioctl(dev, &p, cmd);
+ if (err)
+ return err;
-static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
-{
- struct ip_tunnel *tunnel = netdev_priv(dev);
- if (new_mtu < 68 ||
- new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
- return -EINVAL;
- dev->mtu = new_mtu;
+ p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
+ p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
+
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ return -EFAULT;
return 0;
}
@@ -1277,25 +549,23 @@ static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
...
ftp fec0:6666:6666::193.233.7.65
...
-
*/
-
static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
unsigned short type,
const void *daddr, const void *saddr, unsigned int len)
{
struct ip_tunnel *t = netdev_priv(dev);
- struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
- __be16 *p = (__be16 *)(iph+1);
+ struct iphdr *iph;
+ struct gre_base_hdr *greh;
- memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
- p[0] = t->parms.o_flags;
- p[1] = htons(type);
+ iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
+ greh = (struct gre_base_hdr *)(iph+1);
+ greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
+ greh->protocol = htons(type);
- /*
- * Set the source hardware address.
- */
+ memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
+ /* Set the source hardware address. */
if (saddr)
memcpy(&iph->saddr, saddr, 4);
if (daddr)
@@ -1303,7 +573,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
if (iph->daddr)
return t->hlen;
- return -t->hlen;
+ return -(t->hlen + sizeof(*iph));
}
static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
@@ -1357,31 +627,21 @@ static int ipgre_close(struct net_device *dev)
}
return 0;
}
-
#endif
static const struct net_device_ops ipgre_netdev_ops = {
.ndo_init = ipgre_tunnel_init,
- .ndo_uninit = ipgre_tunnel_uninit,
+ .ndo_uninit = ip_tunnel_uninit,
#ifdef CONFIG_NET_IPGRE_BROADCAST
.ndo_open = ipgre_open,
.ndo_stop = ipgre_close,
#endif
- .ndo_start_xmit = ipgre_tunnel_xmit,
+ .ndo_start_xmit = ipgre_xmit,
.ndo_do_ioctl = ipgre_tunnel_ioctl,
- .ndo_change_mtu = ipgre_tunnel_change_mtu,
- .ndo_get_stats64 = ipgre_get_stats64,
+ .ndo_change_mtu = ip_tunnel_change_mtu,
+ .ndo_get_stats64 = ip_tunnel_get_stats64,
};
-static void ipgre_dev_free(struct net_device *dev)
-{
- struct ip_tunnel *tunnel = netdev_priv(dev);
-
- gro_cells_destroy(&tunnel->gro_cells);
- free_percpu(dev->tstats);
- free_netdev(dev);
-}
-
#define GRE_FEATURES (NETIF_F_SG | \
NETIF_F_FRAGLIST | \
NETIF_F_HIGHDMA | \
@@ -1390,35 +650,48 @@ static void ipgre_dev_free(struct net_device *dev)
static void ipgre_tunnel_setup(struct net_device *dev)
{
dev->netdev_ops = &ipgre_netdev_ops;
- dev->destructor = ipgre_dev_free;
+ ip_tunnel_setup(dev, ipgre_net_id);
+}
- dev->type = ARPHRD_IPGRE;
- dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
+static void __gre_tunnel_init(struct net_device *dev)
+{
+ struct ip_tunnel *tunnel;
+
+ tunnel = netdev_priv(dev);
+ tunnel->hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
+ tunnel->parms.iph.protocol = IPPROTO_GRE;
+
+ dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
- dev->flags = IFF_NOARP;
- dev->iflink = 0;
- dev->addr_len = 4;
- dev->features |= NETIF_F_NETNS_LOCAL;
- dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
- dev->features |= GRE_FEATURES;
+ dev->features |= NETIF_F_NETNS_LOCAL | GRE_FEATURES;
dev->hw_features |= GRE_FEATURES;
+
+ if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
+ /* TCP offload with GRE SEQ is not supported. */
+ dev->features |= NETIF_F_GSO_SOFTWARE;
+ dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+ /* Can use a lockless transmit, unless we generate
+ * output sequences
+ */
+ dev->features |= NETIF_F_LLTX;
+ }
}
static int ipgre_tunnel_init(struct net_device *dev)
{
- struct ip_tunnel *tunnel;
- struct iphdr *iph;
- int err;
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct iphdr *iph = &tunnel->parms.iph;
- tunnel = netdev_priv(dev);
- iph = &tunnel->parms.iph;
+ __gre_tunnel_init(dev);
- tunnel->dev = dev;
- strcpy(tunnel->parms.name, dev->name);
+ memcpy(dev->dev_addr, &iph->saddr, 4);
+ memcpy(dev->broadcast, &iph->daddr, 4);
- memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
- memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
+ dev->type = ARPHRD_IPGRE;
+ dev->flags = IFF_NOARP;
+ dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+ dev->addr_len = 4;
if (iph->daddr) {
#ifdef CONFIG_NET_IPGRE_BROADCAST
@@ -1432,106 +705,30 @@ static int ipgre_tunnel_init(struct net_device *dev)
} else
dev->header_ops = &ipgre_header_ops;
- dev->tstats = alloc_percpu(struct pcpu_tstats);
- if (!dev->tstats)
- return -ENOMEM;
-
- err = gro_cells_init(&tunnel->gro_cells, dev);
- if (err) {
- free_percpu(dev->tstats);
- return err;
- }
-
- return 0;
-}
-
-static void ipgre_fb_tunnel_init(struct net_device *dev)
-{
- struct ip_tunnel *tunnel = netdev_priv(dev);
- struct iphdr *iph = &tunnel->parms.iph;
-
- tunnel->dev = dev;
- strcpy(tunnel->parms.name, dev->name);
-
- iph->version = 4;
- iph->protocol = IPPROTO_GRE;
- iph->ihl = 5;
- tunnel->hlen = sizeof(struct iphdr) + 4;
-
- dev_hold(dev);
+ return ip_tunnel_init(dev);
}
-
static const struct gre_protocol ipgre_protocol = {
.handler = ipgre_rcv,
.err_handler = ipgre_err,
};
-static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
-{
- int prio;
-
- for (prio = 0; prio < 4; prio++) {
- int h;
- for (h = 0; h < HASH_SIZE; h++) {
- struct ip_tunnel *t;
-
- t = rtnl_dereference(ign->tunnels[prio][h]);
-
- while (t != NULL) {
- unregister_netdevice_queue(t->dev, head);
- t = rtnl_dereference(t->next);
- }
- }
- }
-}
-
static int __net_init ipgre_init_net(struct net *net)
{
- struct ipgre_net *ign = net_generic(net, ipgre_net_id);
- int err;
-
- ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
- ipgre_tunnel_setup);
- if (!ign->fb_tunnel_dev) {
- err = -ENOMEM;
- goto err_alloc_dev;
- }
- dev_net_set(ign->fb_tunnel_dev, net);
-
- ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
- ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
-
- if ((err = register_netdev(ign->fb_tunnel_dev)))
- goto err_reg_dev;
-
- rcu_assign_pointer(ign->tunnels_wc[0],
- netdev_priv(ign->fb_tunnel_dev));
- return 0;
-
-err_reg_dev:
- ipgre_dev_free(ign->fb_tunnel_dev);
-err_alloc_dev:
- return err;
+ return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
}
static void __net_exit ipgre_exit_net(struct net *net)
{
- struct ipgre_net *ign;
- LIST_HEAD(list);
-
- ign = net_generic(net, ipgre_net_id);
- rtnl_lock();
- ipgre_destroy_tunnels(ign, &list);
- unregister_netdevice_many(&list);
- rtnl_unlock();
+ struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
+ ip_tunnel_delete_net(itn);
}
static struct pernet_operations ipgre_net_ops = {
.init = ipgre_init_net,
.exit = ipgre_exit_net,
.id = &ipgre_net_id,
- .size = sizeof(struct ipgre_net),
+ .size = sizeof(struct ip_tunnel_net),
};
static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -1576,8 +773,8 @@ out:
return ipgre_tunnel_validate(tb, data);
}
-static void ipgre_netlink_parms(struct nlattr *data[],
- struct ip_tunnel_parm *parms)
+static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
+ struct ip_tunnel_parm *parms)
{
memset(parms, 0, sizeof(*parms));
@@ -1590,10 +787,10 @@ static void ipgre_netlink_parms(struct nlattr *data[],
parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
if (data[IFLA_GRE_IFLAGS])
- parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
+ parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
if (data[IFLA_GRE_OFLAGS])
- parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
+ parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
if (data[IFLA_GRE_IKEY])
parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
@@ -1617,148 +814,46 @@ static void ipgre_netlink_parms(struct nlattr *data[],
parms->iph.frag_off = htons(IP_DF);
}
-static int ipgre_tap_init(struct net_device *dev)
+static int gre_tap_init(struct net_device *dev)
{
- struct ip_tunnel *tunnel;
-
- tunnel = netdev_priv(dev);
-
- tunnel->dev = dev;
- strcpy(tunnel->parms.name, dev->name);
+ __gre_tunnel_init(dev);
- ipgre_tunnel_bind_dev(dev);
-
- dev->tstats = alloc_percpu(struct pcpu_tstats);
- if (!dev->tstats)
- return -ENOMEM;
-
- return 0;
+ return ip_tunnel_init(dev);
}
-static const struct net_device_ops ipgre_tap_netdev_ops = {
- .ndo_init = ipgre_tap_init,
- .ndo_uninit = ipgre_tunnel_uninit,
- .ndo_start_xmit = ipgre_tunnel_xmit,
+static const struct net_device_ops gre_tap_netdev_ops = {
+ .ndo_init = gre_tap_init,
+ .ndo_uninit = ip_tunnel_uninit,
+ .ndo_start_xmit = gre_tap_xmit,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
- .ndo_change_mtu = ipgre_tunnel_change_mtu,
- .ndo_get_stats64 = ipgre_get_stats64,
+ .ndo_change_mtu = ip_tunnel_change_mtu,
+ .ndo_get_stats64 = ip_tunnel_get_stats64,
};
static void ipgre_tap_setup(struct net_device *dev)
{
-
ether_setup(dev);
-
- dev->netdev_ops = &ipgre_tap_netdev_ops;
- dev->destructor = ipgre_dev_free;
-
- dev->iflink = 0;
- dev->features |= NETIF_F_NETNS_LOCAL;
-
- dev->features |= GRE_FEATURES;
- dev->hw_features |= GRE_FEATURES;
+ dev->netdev_ops = &gre_tap_netdev_ops;
+ ip_tunnel_setup(dev, gre_tap_net_id);
}
-static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
- struct nlattr *data[])
+static int ipgre_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[])
{
- struct ip_tunnel *nt;
- struct net *net = dev_net(dev);
- struct ipgre_net *ign = net_generic(net, ipgre_net_id);
- int mtu;
- int err;
-
- nt = netdev_priv(dev);
- ipgre_netlink_parms(data, &nt->parms);
-
- if (ipgre_tunnel_find(net, &nt->parms, dev->type))
- return -EEXIST;
-
- if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
- eth_hw_addr_random(dev);
-
- mtu = ipgre_tunnel_bind_dev(dev);
- if (!tb[IFLA_MTU])
- dev->mtu = mtu;
-
- /* Can use a lockless transmit, unless we generate output sequences */
- if (!(nt->parms.o_flags & GRE_SEQ))
- dev->features |= NETIF_F_LLTX;
-
- err = register_netdevice(dev);
- if (err)
- goto out;
-
- dev_hold(dev);
- ipgre_tunnel_link(ign, nt);
+ struct ip_tunnel_parm p;
-out:
- return err;
+ ipgre_netlink_parms(data, tb, &p);
+ return ip_tunnel_newlink(dev, tb, &p);
}
static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[])
{
- struct ip_tunnel *t, *nt;
- struct net *net = dev_net(dev);
- struct ipgre_net *ign = net_generic(net, ipgre_net_id);
struct ip_tunnel_parm p;
- int mtu;
-
- if (dev == ign->fb_tunnel_dev)
- return -EINVAL;
-
- nt = netdev_priv(dev);
- ipgre_netlink_parms(data, &p);
-
- t = ipgre_tunnel_locate(net, &p, 0);
-
- if (t) {
- if (t->dev != dev)
- return -EEXIST;
- } else {
- t = nt;
-
- if (dev->type != ARPHRD_ETHER) {
- unsigned int nflags = 0;
-
- if (ipv4_is_multicast(p.iph.daddr))
- nflags = IFF_BROADCAST;
- else if (p.iph.daddr)
- nflags = IFF_POINTOPOINT;
-
- if ((dev->flags ^ nflags) &
- (IFF_POINTOPOINT | IFF_BROADCAST))
- return -EINVAL;
- }
- ipgre_tunnel_unlink(ign, t);
- t->parms.iph.saddr = p.iph.saddr;
- t->parms.iph.daddr = p.iph.daddr;
- t->parms.i_key = p.i_key;
- if (dev->type != ARPHRD_ETHER) {
- memcpy(dev->dev_addr, &p.iph.saddr, 4);
- memcpy(dev->broadcast, &p.iph.daddr, 4);
- }
- ipgre_tunnel_link(ign, t);
- netdev_state_change(dev);
- }
-
- t->parms.o_key = p.o_key;
- t->parms.iph.ttl = p.iph.ttl;
- t->parms.iph.tos = p.iph.tos;
- t->parms.iph.frag_off = p.iph.frag_off;
-
- if (t->parms.link != p.link) {
- t->parms.link = p.link;
- mtu = ipgre_tunnel_bind_dev(dev);
- if (!tb[IFLA_MTU])
- dev->mtu = mtu;
- netdev_state_change(dev);
- }
-
- return 0;
+ ipgre_netlink_parms(data, tb, &p);
+ return ip_tunnel_changelink(dev, tb, &p);
}
static size_t ipgre_get_size(const struct net_device *dev)
@@ -1793,8 +888,8 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
struct ip_tunnel_parm *p = &t->parms;
if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
- nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
- nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
+ nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
+ nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
@@ -1832,6 +927,7 @@ static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
.validate = ipgre_tunnel_validate,
.newlink = ipgre_newlink,
.changelink = ipgre_changelink,
+ .dellink = ip_tunnel_dellink,
.get_size = ipgre_get_size,
.fill_info = ipgre_fill_info,
};
@@ -1845,13 +941,28 @@ static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
.validate = ipgre_tap_validate,
.newlink = ipgre_newlink,
.changelink = ipgre_changelink,
+ .dellink = ip_tunnel_dellink,
.get_size = ipgre_get_size,
.fill_info = ipgre_fill_info,
};
-/*
- * And now the modules code and kernel interface.
- */
+static int __net_init ipgre_tap_init_net(struct net *net)
+{
+ return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, NULL);
+}
+
+static void __net_exit ipgre_tap_exit_net(struct net *net)
+{
+ struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
+ ip_tunnel_delete_net(itn);
+}
+
+static struct pernet_operations ipgre_tap_net_ops = {
+ .init = ipgre_tap_init_net,
+ .exit = ipgre_tap_exit_net,
+ .id = &gre_tap_net_id,
+ .size = sizeof(struct ip_tunnel_net),
+};
static int __init ipgre_init(void)
{
@@ -1863,6 +974,10 @@ static int __init ipgre_init(void)
if (err < 0)
return err;
+ err = register_pernet_device(&ipgre_tap_net_ops);
+ if (err < 0)
+ goto pnet_tap_faied;
+
err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
if (err < 0) {
pr_info("%s: can't add protocol\n", __func__);
@@ -1877,16 +992,17 @@ static int __init ipgre_init(void)
if (err < 0)
goto tap_ops_failed;
-out:
- return err;
+ return 0;
tap_ops_failed:
rtnl_link_unregister(&ipgre_link_ops);
rtnl_link_failed:
gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
add_proto_failed:
+ unregister_pernet_device(&ipgre_tap_net_ops);
+pnet_tap_faied:
unregister_pernet_device(&ipgre_net_ops);
- goto out;
+ return err;
}
static void __exit ipgre_fini(void)
@@ -1895,6 +1011,7 @@ static void __exit ipgre_fini(void)
rtnl_link_unregister(&ipgre_link_ops);
if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
pr_info("%s: can't remove protocol\n", __func__);
+ unregister_pernet_device(&ipgre_tap_net_ops);
unregister_pernet_device(&ipgre_net_ops);
}
@@ -1904,3 +1021,4 @@ MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK("gre");
MODULE_ALIAS_RTNL_LINK("gretap");
MODULE_ALIAS_NETDEV("gre0");
+MODULE_ALIAS_NETDEV("gretap0");
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 2bdf802e28e2..3da817b89e9b 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -419,7 +419,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
iph = ip_hdr(skb);
if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
- goto inhdr_error;
+ goto csum_error;
len = ntohs(iph->tot_len);
if (skb->len < len) {
@@ -446,6 +446,8 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, dev, NULL,
ip_rcv_finish);
+csum_error:
+ IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_CSUMERRORS);
inhdr_error:
IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
drop:
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 5e12dca7b3dd..4bcabf3ab4ca 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -84,7 +84,7 @@ int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
EXPORT_SYMBOL(sysctl_ip_default_ttl);
/* Generate a checksum for an outgoing IP datagram. */
-__inline__ void ip_send_check(struct iphdr *iph)
+void ip_send_check(struct iphdr *iph)
{
iph->check = 0;
iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
@@ -430,8 +430,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->tc_index = from->tc_index;
#endif
nf_copy(to, from);
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
- defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
to->nf_trace = from->nf_trace;
#endif
#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
new file mode 100644
index 000000000000..7fa8f08fa7ae
--- /dev/null
+++ b/net/ipv4/ip_tunnel.c
@@ -0,0 +1,1035 @@
+/*
+ * Copyright (c) 2013 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/capability.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_arp.h>
+#include <linux/mroute.h>
+#include <linux/init.h>
+#include <linux/in6.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/rculist.h>
+
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/protocol.h>
+#include <net/ip_tunnels.h>
+#include <net/arp.h>
+#include <net/checksum.h>
+#include <net/dsfield.h>
+#include <net/inet_ecn.h>
+#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/rtnetlink.h>
+
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6.h>
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+#endif
+
+static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn,
+ __be32 key, __be32 remote)
+{
+ return hash_32((__force u32)key ^ (__force u32)remote,
+ IP_TNL_HASH_BITS);
+}
+
+/* Often modified stats are per cpu, other are shared (netdev->stats) */
+struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
+ struct rtnl_link_stats64 *tot)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
+ u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin_bh(&tstats->syncp);
+ rx_packets = tstats->rx_packets;
+ tx_packets = tstats->tx_packets;
+ rx_bytes = tstats->rx_bytes;
+ tx_bytes = tstats->tx_bytes;
+ } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
+
+ tot->rx_packets += rx_packets;
+ tot->tx_packets += tx_packets;
+ tot->rx_bytes += rx_bytes;
+ tot->tx_bytes += tx_bytes;
+ }
+
+ tot->multicast = dev->stats.multicast;
+
+ tot->rx_crc_errors = dev->stats.rx_crc_errors;
+ tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
+ tot->rx_length_errors = dev->stats.rx_length_errors;
+ tot->rx_frame_errors = dev->stats.rx_frame_errors;
+ tot->rx_errors = dev->stats.rx_errors;
+
+ tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
+ tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
+ tot->tx_dropped = dev->stats.tx_dropped;
+ tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
+ tot->tx_errors = dev->stats.tx_errors;
+
+ tot->collisions = dev->stats.collisions;
+
+ return tot;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
+
+static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
+ __be16 flags, __be32 key)
+{
+ if (p->i_flags & TUNNEL_KEY) {
+ if (flags & TUNNEL_KEY)
+ return key == p->i_key;
+ else
+ /* key expected, none present */
+ return false;
+ } else
+ return !(flags & TUNNEL_KEY);
+}
+
+/* Fallback tunnel: no source, no destination, no key, no options
+
+ Tunnel hash table:
+ We require exact key match i.e. if a key is present in packet
+ it will match only tunnel with the same key; if it is not present,
+ it will match only keyless tunnel.
+
+ All keysless packets, if not matched configured keyless tunnels
+ will match fallback tunnel.
+ Given src, dst and key, find appropriate for input tunnel.
+*/
+struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
+ int link, __be16 flags,
+ __be32 remote, __be32 local,
+ __be32 key)
+{
+ unsigned int hash;
+ struct ip_tunnel *t, *cand = NULL;
+ struct hlist_head *head;
+
+ hash = ip_tunnel_hash(itn, key, remote);
+ head = &itn->tunnels[hash];
+
+ hlist_for_each_entry_rcu(t, head, hash_node) {
+ if (local != t->parms.iph.saddr ||
+ remote != t->parms.iph.daddr ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (!ip_tunnel_key_match(&t->parms, flags, key))
+ continue;
+
+ if (t->parms.link == link)
+ return t;
+ else
+ cand = t;
+ }
+
+ hlist_for_each_entry_rcu(t, head, hash_node) {
+ if (remote != t->parms.iph.daddr ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (!ip_tunnel_key_match(&t->parms, flags, key))
+ continue;
+
+ if (t->parms.link == link)
+ return t;
+ else if (!cand)
+ cand = t;
+ }
+
+ hash = ip_tunnel_hash(itn, key, 0);
+ head = &itn->tunnels[hash];
+
+ hlist_for_each_entry_rcu(t, head, hash_node) {
+ if ((local != t->parms.iph.saddr &&
+ (local != t->parms.iph.daddr ||
+ !ipv4_is_multicast(local))) ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (!ip_tunnel_key_match(&t->parms, flags, key))
+ continue;
+
+ if (t->parms.link == link)
+ return t;
+ else if (!cand)
+ cand = t;
+ }
+
+ if (flags & TUNNEL_NO_KEY)
+ goto skip_key_lookup;
+
+ hlist_for_each_entry_rcu(t, head, hash_node) {
+ if (t->parms.i_key != key ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (t->parms.link == link)
+ return t;
+ else if (!cand)
+ cand = t;
+ }
+
+skip_key_lookup:
+ if (cand)
+ return cand;
+
+ if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
+ return netdev_priv(itn->fb_tunnel_dev);
+
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
+
+static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
+ struct ip_tunnel_parm *parms)
+{
+ unsigned int h;
+ __be32 remote;
+
+ if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
+ remote = parms->iph.daddr;
+ else
+ remote = 0;
+
+ h = ip_tunnel_hash(itn, parms->i_key, remote);
+ return &itn->tunnels[h];
+}
+
+static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
+{
+ struct hlist_head *head = ip_bucket(itn, &t->parms);
+
+ hlist_add_head_rcu(&t->hash_node, head);
+}
+
+static void ip_tunnel_del(struct ip_tunnel *t)
+{
+ hlist_del_init_rcu(&t->hash_node);
+}
+
+static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
+ struct ip_tunnel_parm *parms,
+ int type)
+{
+ __be32 remote = parms->iph.daddr;
+ __be32 local = parms->iph.saddr;
+ __be32 key = parms->i_key;
+ int link = parms->link;
+ struct ip_tunnel *t = NULL;
+ struct hlist_head *head = ip_bucket(itn, parms);
+
+ hlist_for_each_entry_rcu(t, head, hash_node) {
+ if (local == t->parms.iph.saddr &&
+ remote == t->parms.iph.daddr &&
+ key == t->parms.i_key &&
+ link == t->parms.link &&
+ type == t->dev->type)
+ break;
+ }
+ return t;
+}
+
+static struct net_device *__ip_tunnel_create(struct net *net,
+ const struct rtnl_link_ops *ops,
+ struct ip_tunnel_parm *parms)
+{
+ int err;
+ struct ip_tunnel *tunnel;
+ struct net_device *dev;
+ char name[IFNAMSIZ];
+
+ if (parms->name[0])
+ strlcpy(name, parms->name, IFNAMSIZ);
+ else {
+ if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
+ err = -E2BIG;
+ goto failed;
+ }
+ strlcpy(name, ops->kind, IFNAMSIZ);
+ strncat(name, "%d", 2);
+ }
+
+ ASSERT_RTNL();
+ dev = alloc_netdev(ops->priv_size, name, ops->setup);
+ if (!dev) {
+ err = -ENOMEM;
+ goto failed;
+ }
+ dev_net_set(dev, net);
+
+ dev->rtnl_link_ops = ops;
+
+ tunnel = netdev_priv(dev);
+ tunnel->parms = *parms;
+
+ err = register_netdevice(dev);
+ if (err)
+ goto failed_free;
+
+ return dev;
+
+failed_free:
+ free_netdev(dev);
+failed:
+ return ERR_PTR(err);
+}
+
+static inline struct rtable *ip_route_output_tunnel(struct net *net,
+ struct flowi4 *fl4,
+ int proto,
+ __be32 daddr, __be32 saddr,
+ __be32 key, __u8 tos, int oif)
+{
+ memset(fl4, 0, sizeof(*fl4));
+ fl4->flowi4_oif = oif;
+ fl4->daddr = daddr;
+ fl4->saddr = saddr;
+ fl4->flowi4_tos = tos;
+ fl4->flowi4_proto = proto;
+ fl4->fl4_gre_key = key;
+ return ip_route_output_key(net, fl4);
+}
+
+static int ip_tunnel_bind_dev(struct net_device *dev)
+{
+ struct net_device *tdev = NULL;
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ const struct iphdr *iph;
+ int hlen = LL_MAX_HEADER;
+ int mtu = ETH_DATA_LEN;
+ int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+
+ iph = &tunnel->parms.iph;
+
+ /* Guess output device to choose reasonable mtu and needed_headroom */
+ if (iph->daddr) {
+ struct flowi4 fl4;
+ struct rtable *rt;
+
+ rt = ip_route_output_tunnel(dev_net(dev), &fl4,
+ tunnel->parms.iph.protocol,
+ iph->daddr, iph->saddr,
+ tunnel->parms.o_key,
+ RT_TOS(iph->tos),
+ tunnel->parms.link);
+ if (!IS_ERR(rt)) {
+ tdev = rt->dst.dev;
+ ip_rt_put(rt);
+ }
+ if (dev->type != ARPHRD_ETHER)
+ dev->flags |= IFF_POINTOPOINT;
+ }
+
+ if (!tdev && tunnel->parms.link)
+ tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
+
+ if (tdev) {
+ hlen = tdev->hard_header_len + tdev->needed_headroom;
+ mtu = tdev->mtu;
+ }
+ dev->iflink = tunnel->parms.link;
+
+ dev->needed_headroom = t_hlen + hlen;
+ mtu -= (dev->hard_header_len + t_hlen);
+
+ if (mtu < 68)
+ mtu = 68;
+
+ return mtu;
+}
+
+static struct ip_tunnel *ip_tunnel_create(struct net *net,
+ struct ip_tunnel_net *itn,
+ struct ip_tunnel_parm *parms)
+{
+ struct ip_tunnel *nt, *fbt;
+ struct net_device *dev;
+
+ BUG_ON(!itn->fb_tunnel_dev);
+ fbt = netdev_priv(itn->fb_tunnel_dev);
+ dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
+ if (IS_ERR(dev))
+ return NULL;
+
+ dev->mtu = ip_tunnel_bind_dev(dev);
+
+ nt = netdev_priv(dev);
+ ip_tunnel_add(itn, nt);
+ return nt;
+}
+
+int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
+ const struct tnl_ptk_info *tpi, bool log_ecn_error)
+{
+ struct pcpu_tstats *tstats;
+ const struct iphdr *iph = ip_hdr(skb);
+ int err;
+
+ secpath_reset(skb);
+
+ skb->protocol = tpi->proto;
+
+ skb->mac_header = skb->network_header;
+ __pskb_pull(skb, tunnel->hlen);
+ skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen);
+#ifdef CONFIG_NET_IPGRE_BROADCAST
+ if (ipv4_is_multicast(iph->daddr)) {
+ /* Looped back packet, drop it! */
+ if (rt_is_output_route(skb_rtable(skb)))
+ goto drop;
+ tunnel->dev->stats.multicast++;
+ skb->pkt_type = PACKET_BROADCAST;
+ }
+#endif
+
+ if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
+ ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
+ tunnel->dev->stats.rx_crc_errors++;
+ tunnel->dev->stats.rx_errors++;
+ goto drop;
+ }
+
+ if (tunnel->parms.i_flags&TUNNEL_SEQ) {
+ if (!(tpi->flags&TUNNEL_SEQ) ||
+ (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
+ tunnel->dev->stats.rx_fifo_errors++;
+ tunnel->dev->stats.rx_errors++;
+ goto drop;
+ }
+ tunnel->i_seqno = ntohl(tpi->seq) + 1;
+ }
+
+ /* Warning: All skb pointers will be invalidated! */
+ if (tunnel->dev->type == ARPHRD_ETHER) {
+ if (!pskb_may_pull(skb, ETH_HLEN)) {
+ tunnel->dev->stats.rx_length_errors++;
+ tunnel->dev->stats.rx_errors++;
+ goto drop;
+ }
+
+ iph = ip_hdr(skb);
+ skb->protocol = eth_type_trans(skb, tunnel->dev);
+ skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+ }
+
+ skb->pkt_type = PACKET_HOST;
+ __skb_tunnel_rx(skb, tunnel->dev);
+
+ skb_reset_network_header(skb);
+ err = IP_ECN_decapsulate(iph, skb);
+ if (unlikely(err)) {
+ if (log_ecn_error)
+ net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
+ &iph->saddr, iph->tos);
+ if (err > 1) {
+ ++tunnel->dev->stats.rx_frame_errors;
+ ++tunnel->dev->stats.rx_errors;
+ goto drop;
+ }
+ }
+
+ tstats = this_cpu_ptr(tunnel->dev->tstats);
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->rx_packets++;
+ tstats->rx_bytes += skb->len;
+ u64_stats_update_end(&tstats->syncp);
+
+ gro_cells_receive(&tunnel->gro_cells, skb);
+ return 0;
+
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
+
+void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
+ const struct iphdr *tnl_params)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ const struct iphdr *inner_iph;
+ struct iphdr *iph;
+ struct flowi4 fl4;
+ u8 tos, ttl;
+ __be16 df;
+ struct rtable *rt; /* Route to the other host */
+ struct net_device *tdev; /* Device to other host */
+ unsigned int max_headroom; /* The extra header space needed */
+ __be32 dst;
+ int mtu;
+
+ inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
+
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+ dst = tnl_params->daddr;
+ if (dst == 0) {
+ /* NBMA tunnel */
+
+ if (skb_dst(skb) == NULL) {
+ dev->stats.tx_fifo_errors++;
+ goto tx_error;
+ }
+
+ if (skb->protocol == htons(ETH_P_IP)) {
+ rt = skb_rtable(skb);
+ dst = rt_nexthop(rt, inner_iph->daddr);
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (skb->protocol == htons(ETH_P_IPV6)) {
+ const struct in6_addr *addr6;
+ struct neighbour *neigh;
+ bool do_tx_error_icmp;
+ int addr_type;
+
+ neigh = dst_neigh_lookup(skb_dst(skb),
+ &ipv6_hdr(skb)->daddr);
+ if (neigh == NULL)
+ goto tx_error;
+
+ addr6 = (const struct in6_addr *)&neigh->primary_key;
+ addr_type = ipv6_addr_type(addr6);
+
+ if (addr_type == IPV6_ADDR_ANY) {
+ addr6 = &ipv6_hdr(skb)->daddr;
+ addr_type = ipv6_addr_type(addr6);
+ }
+
+ if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
+ do_tx_error_icmp = true;
+ else {
+ do_tx_error_icmp = false;
+ dst = addr6->s6_addr32[3];
+ }
+ neigh_release(neigh);
+ if (do_tx_error_icmp)
+ goto tx_error_icmp;
+ }
+#endif
+ else
+ goto tx_error;
+ }
+
+ tos = tnl_params->tos;
+ if (tos & 0x1) {
+ tos &= ~0x1;
+ if (skb->protocol == htons(ETH_P_IP))
+ tos = inner_iph->tos;
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
+ }
+
+ rt = ip_route_output_tunnel(dev_net(dev), &fl4,
+ tunnel->parms.iph.protocol,
+ dst, tnl_params->saddr,
+ tunnel->parms.o_key,
+ RT_TOS(tos),
+ tunnel->parms.link);
+ if (IS_ERR(rt)) {
+ dev->stats.tx_carrier_errors++;
+ goto tx_error;
+ }
+ tdev = rt->dst.dev;
+
+ if (tdev == dev) {
+ ip_rt_put(rt);
+ dev->stats.collisions++;
+ goto tx_error;
+ }
+
+ df = tnl_params->frag_off;
+
+ if (df)
+ mtu = dst_mtu(&rt->dst) - dev->hard_header_len
+ - sizeof(struct iphdr);
+ else
+ mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
+
+ if (skb_dst(skb))
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+
+ if (skb->protocol == htons(ETH_P_IP)) {
+ df |= (inner_iph->frag_off&htons(IP_DF));
+
+ if (!skb_is_gso(skb) &&
+ (inner_iph->frag_off&htons(IP_DF)) &&
+ mtu < ntohs(inner_iph->tot_len)) {
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+ ip_rt_put(rt);
+ goto tx_error;
+ }
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (skb->protocol == htons(ETH_P_IPV6)) {
+ struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
+
+ if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
+ mtu >= IPV6_MIN_MTU) {
+ if ((tunnel->parms.iph.daddr &&
+ !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
+ rt6->rt6i_dst.plen == 128) {
+ rt6->rt6i_flags |= RTF_MODIFIED;
+ dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
+ }
+ }
+
+ if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
+ mtu < skb->len) {
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ ip_rt_put(rt);
+ goto tx_error;
+ }
+ }
+#endif
+
+ if (tunnel->err_count > 0) {
+ if (time_before(jiffies,
+ tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
+ tunnel->err_count--;
+
+ dst_link_failure(skb);
+ } else
+ tunnel->err_count = 0;
+ }
+
+ ttl = tnl_params->ttl;
+ if (ttl == 0) {
+ if (skb->protocol == htons(ETH_P_IP))
+ ttl = inner_iph->ttl;
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
+#endif
+ else
+ ttl = ip4_dst_hoplimit(&rt->dst);
+ }
+
+ max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr)
+ + rt->dst.header_len;
+ if (max_headroom > dev->needed_headroom) {
+ dev->needed_headroom = max_headroom;
+ if (skb_cow_head(skb, dev->needed_headroom)) {
+ dev->stats.tx_dropped++;
+ dev_kfree_skb(skb);
+ return;
+ }
+ }
+
+ skb_dst_drop(skb);
+ skb_dst_set(skb, &rt->dst);
+
+ /* Push down and install the IP header. */
+ skb_push(skb, sizeof(struct iphdr));
+ skb_reset_network_header(skb);
+
+ iph = ip_hdr(skb);
+ inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
+
+ iph->version = 4;
+ iph->ihl = sizeof(struct iphdr) >> 2;
+ iph->frag_off = df;
+ iph->protocol = tnl_params->protocol;
+ iph->tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
+ iph->daddr = fl4.daddr;
+ iph->saddr = fl4.saddr;
+ iph->ttl = ttl;
+ tunnel_ip_select_ident(skb, inner_iph, &rt->dst);
+
+ iptunnel_xmit(skb, dev);
+ return;
+
+#if IS_ENABLED(CONFIG_IPV6)
+tx_error_icmp:
+ dst_link_failure(skb);
+#endif
+tx_error:
+ dev->stats.tx_errors++;
+ dev_kfree_skb(skb);
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
+
+static void ip_tunnel_update(struct ip_tunnel_net *itn,
+ struct ip_tunnel *t,
+ struct net_device *dev,
+ struct ip_tunnel_parm *p,
+ bool set_mtu)
+{
+ ip_tunnel_del(t);
+ t->parms.iph.saddr = p->iph.saddr;
+ t->parms.iph.daddr = p->iph.daddr;
+ t->parms.i_key = p->i_key;
+ t->parms.o_key = p->o_key;
+ if (dev->type != ARPHRD_ETHER) {
+ memcpy(dev->dev_addr, &p->iph.saddr, 4);
+ memcpy(dev->broadcast, &p->iph.daddr, 4);
+ }
+ ip_tunnel_add(itn, t);
+
+ t->parms.iph.ttl = p->iph.ttl;
+ t->parms.iph.tos = p->iph.tos;
+ t->parms.iph.frag_off = p->iph.frag_off;
+
+ if (t->parms.link != p->link) {
+ int mtu;
+
+ t->parms.link = p->link;
+ mtu = ip_tunnel_bind_dev(dev);
+ if (set_mtu)
+ dev->mtu = mtu;
+ }
+ netdev_state_change(dev);
+}
+
+int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
+{
+ int err = 0;
+ struct ip_tunnel *t;
+ struct net *net = dev_net(dev);
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
+
+ BUG_ON(!itn->fb_tunnel_dev);
+ switch (cmd) {
+ case SIOCGETTUNNEL:
+ t = NULL;
+ if (dev == itn->fb_tunnel_dev)
+ t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
+ if (t == NULL)
+ t = netdev_priv(dev);
+ memcpy(p, &t->parms, sizeof(*p));
+ break;
+
+ case SIOCADDTUNNEL:
+ case SIOCCHGTUNNEL:
+ err = -EPERM;
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ goto done;
+ if (p->iph.ttl)
+ p->iph.frag_off |= htons(IP_DF);
+ if (!(p->i_flags&TUNNEL_KEY))
+ p->i_key = 0;
+ if (!(p->o_flags&TUNNEL_KEY))
+ p->o_key = 0;
+
+ t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
+
+ if (!t && (cmd == SIOCADDTUNNEL))
+ t = ip_tunnel_create(net, itn, p);
+
+ if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
+ if (t != NULL) {
+ if (t->dev != dev) {
+ err = -EEXIST;
+ break;
+ }
+ } else {
+ unsigned int nflags = 0;
+
+ if (ipv4_is_multicast(p->iph.daddr))
+ nflags = IFF_BROADCAST;
+ else if (p->iph.daddr)
+ nflags = IFF_POINTOPOINT;
+
+ if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
+ err = -EINVAL;
+ break;
+ }
+
+ t = netdev_priv(dev);
+ }
+ }
+
+ if (t) {
+ err = 0;
+ ip_tunnel_update(itn, t, dev, p, true);
+ } else
+ err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+ break;
+
+ case SIOCDELTUNNEL:
+ err = -EPERM;
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ goto done;
+
+ if (dev == itn->fb_tunnel_dev) {
+ err = -ENOENT;
+ t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
+ if (t == NULL)
+ goto done;
+ err = -EPERM;
+ if (t == netdev_priv(itn->fb_tunnel_dev))
+ goto done;
+ dev = t->dev;
+ }
+ unregister_netdevice(dev);
+ err = 0;
+ break;
+
+ default:
+ err = -EINVAL;
+ }
+
+done:
+ return err;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
+
+int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+
+ if (new_mtu < 68 ||
+ new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
+ return -EINVAL;
+ dev->mtu = new_mtu;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
+
+static void ip_tunnel_dev_free(struct net_device *dev)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+
+ gro_cells_destroy(&tunnel->gro_cells);
+ free_percpu(dev->tstats);
+ free_netdev(dev);
+}
+
+void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
+{
+ struct net *net = dev_net(dev);
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct ip_tunnel_net *itn;
+
+ itn = net_generic(net, tunnel->ip_tnl_net_id);
+
+ if (itn->fb_tunnel_dev != dev) {
+ ip_tunnel_del(netdev_priv(dev));
+ unregister_netdevice_queue(dev, head);
+ }
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
+
+int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
+ struct rtnl_link_ops *ops, char *devname)
+{
+ struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
+ struct ip_tunnel_parm parms;
+
+ itn->tunnels = kzalloc(IP_TNL_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL);
+ if (!itn->tunnels)
+ return -ENOMEM;
+
+ if (!ops) {
+ itn->fb_tunnel_dev = NULL;
+ return 0;
+ }
+ memset(&parms, 0, sizeof(parms));
+ if (devname)
+ strlcpy(parms.name, devname, IFNAMSIZ);
+
+ rtnl_lock();
+ itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
+ rtnl_unlock();
+ if (IS_ERR(itn->fb_tunnel_dev)) {
+ kfree(itn->tunnels);
+ return PTR_ERR(itn->fb_tunnel_dev);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
+
+static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head)
+{
+ int h;
+
+ for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
+ struct ip_tunnel *t;
+ struct hlist_node *n;
+ struct hlist_head *thead = &itn->tunnels[h];
+
+ hlist_for_each_entry_safe(t, n, thead, hash_node)
+ unregister_netdevice_queue(t->dev, head);
+ }
+ if (itn->fb_tunnel_dev)
+ unregister_netdevice_queue(itn->fb_tunnel_dev, head);
+}
+
+void ip_tunnel_delete_net(struct ip_tunnel_net *itn)
+{
+ LIST_HEAD(list);
+
+ rtnl_lock();
+ ip_tunnel_destroy(itn, &list);
+ unregister_netdevice_many(&list);
+ rtnl_unlock();
+ kfree(itn->tunnels);
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
+
+int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
+ struct ip_tunnel_parm *p)
+{
+ struct ip_tunnel *nt;
+ struct net *net = dev_net(dev);
+ struct ip_tunnel_net *itn;
+ int mtu;
+ int err;
+
+ nt = netdev_priv(dev);
+ itn = net_generic(net, nt->ip_tnl_net_id);
+
+ if (ip_tunnel_find(itn, p, dev->type))
+ return -EEXIST;
+
+ nt->parms = *p;
+ err = register_netdevice(dev);
+ if (err)
+ goto out;
+
+ if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
+ eth_hw_addr_random(dev);
+
+ mtu = ip_tunnel_bind_dev(dev);
+ if (!tb[IFLA_MTU])
+ dev->mtu = mtu;
+
+ ip_tunnel_add(itn, nt);
+
+out:
+ return err;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
+
+int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
+ struct ip_tunnel_parm *p)
+{
+ struct ip_tunnel *t, *nt;
+ struct net *net = dev_net(dev);
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
+
+ if (dev == itn->fb_tunnel_dev)
+ return -EINVAL;
+
+ nt = netdev_priv(dev);
+
+ t = ip_tunnel_find(itn, p, dev->type);
+
+ if (t) {
+ if (t->dev != dev)
+ return -EEXIST;
+ } else {
+ t = nt;
+
+ if (dev->type != ARPHRD_ETHER) {
+ unsigned int nflags = 0;
+
+ if (ipv4_is_multicast(p->iph.daddr))
+ nflags = IFF_BROADCAST;
+ else if (p->iph.daddr)
+ nflags = IFF_POINTOPOINT;
+
+ if ((dev->flags ^ nflags) &
+ (IFF_POINTOPOINT | IFF_BROADCAST))
+ return -EINVAL;
+ }
+ }
+
+ ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
+
+int ip_tunnel_init(struct net_device *dev)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct iphdr *iph = &tunnel->parms.iph;
+ int err;
+
+ dev->destructor = ip_tunnel_dev_free;
+ dev->tstats = alloc_percpu(struct pcpu_tstats);
+ if (!dev->tstats)
+ return -ENOMEM;
+
+ err = gro_cells_init(&tunnel->gro_cells, dev);
+ if (err) {
+ free_percpu(dev->tstats);
+ return err;
+ }
+
+ tunnel->dev = dev;
+ strcpy(tunnel->parms.name, dev->name);
+ iph->version = 4;
+ iph->ihl = 5;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_init);
+
+void ip_tunnel_uninit(struct net_device *dev)
+{
+ struct net *net = dev_net(dev);
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct ip_tunnel_net *itn;
+
+ itn = net_generic(net, tunnel->ip_tnl_net_id);
+ /* fb_tunnel_dev will be unregisted in net-exit call. */
+ if (itn->fb_tunnel_dev != dev)
+ ip_tunnel_del(netdev_priv(dev));
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
+
+/* Do least required initialization, rest of init is done in tunnel_init call */
+void ip_tunnel_setup(struct net_device *dev, int net_id)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ tunnel->ip_tnl_net_id = net_id;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_setup);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index c3a4233c0ac2..c118f6b576bb 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -38,7 +38,7 @@
#include <net/sock.h>
#include <net/ip.h>
#include <net/icmp.h>
-#include <net/ipip.h>
+#include <net/ip_tunnels.h>
#include <net/inet_ecn.h>
#include <net/xfrm.h>
#include <net/net_namespace.h>
@@ -82,44 +82,6 @@ static int vti_tunnel_bind_dev(struct net_device *dev);
} while (0)
-static struct rtnl_link_stats64 *vti_get_stats64(struct net_device *dev,
- struct rtnl_link_stats64 *tot)
-{
- int i;
-
- for_each_possible_cpu(i) {
- const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
- u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
- unsigned int start;
-
- do {
- start = u64_stats_fetch_begin_bh(&tstats->syncp);
- rx_packets = tstats->rx_packets;
- tx_packets = tstats->tx_packets;
- rx_bytes = tstats->rx_bytes;
- tx_bytes = tstats->tx_bytes;
- } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
-
- tot->rx_packets += rx_packets;
- tot->tx_packets += tx_packets;
- tot->rx_bytes += rx_bytes;
- tot->tx_bytes += tx_bytes;
- }
-
- tot->multicast = dev->stats.multicast;
- tot->rx_crc_errors = dev->stats.rx_crc_errors;
- tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
- tot->rx_length_errors = dev->stats.rx_length_errors;
- tot->rx_errors = dev->stats.rx_errors;
- tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
- tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
- tot->tx_dropped = dev->stats.tx_dropped;
- tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
- tot->tx_errors = dev->stats.tx_errors;
-
- return tot;
-}
-
static struct ip_tunnel *vti_tunnel_lookup(struct net *net,
__be32 remote, __be32 local)
{
@@ -399,8 +361,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
tunnel->err_count = 0;
}
- IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
- IPSKB_REROUTED);
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
skb_dst_drop(skb);
skb_dst_set(skb, &rt->dst);
nf_reset(skb);
@@ -597,7 +558,7 @@ static const struct net_device_ops vti_netdev_ops = {
.ndo_start_xmit = vti_tunnel_xmit,
.ndo_do_ioctl = vti_tunnel_ioctl,
.ndo_change_mtu = vti_tunnel_change_mtu,
- .ndo_get_stats64 = vti_get_stats64,
+ .ndo_get_stats64 = ip_tunnel_get_stats64,
};
static void vti_dev_free(struct net_device *dev)
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index f01d1b1aff7f..59cb8c769056 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -75,6 +75,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
t->props.mode = x->props.mode;
t->props.saddr.a4 = x->props.saddr.a4;
t->props.flags = x->props.flags;
+ t->props.extra_flags = x->props.extra_flags;
memcpy(&t->mark, &x->mark, sizeof(t->mark));
if (xfrm_init_state(t))
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index bf6c5cf31aed..efa1138fa523 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -206,7 +206,7 @@ static int __init ic_open_devs(void)
struct ic_device *d, **last;
struct net_device *dev;
unsigned short oflags;
- unsigned long start;
+ unsigned long start, next_msg;
last = &ic_first_dev;
rtnl_lock();
@@ -263,12 +263,23 @@ static int __init ic_open_devs(void)
/* wait for a carrier on at least one device */
start = jiffies;
+ next_msg = start + msecs_to_jiffies(CONF_CARRIER_TIMEOUT/12);
while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) {
+ int wait, elapsed;
+
for_each_netdev(&init_net, dev)
if (ic_is_init_dev(dev) && netif_carrier_ok(dev))
goto have_carrier;
msleep(1);
+
+ if time_before(jiffies, next_msg)
+ continue;
+
+ elapsed = jiffies_to_msecs(jiffies - start);
+ wait = (CONF_CARRIER_TIMEOUT - elapsed + 500)/1000;
+ pr_info("Waiting up to %d more seconds for network.\n", wait);
+ next_msg = jiffies + msecs_to_jiffies(CONF_CARRIER_TIMEOUT/12);
}
have_carrier:
rtnl_unlock();
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 8f024d41eefa..77bfcce64fe5 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -111,227 +111,21 @@
#include <net/sock.h>
#include <net/ip.h>
#include <net/icmp.h>
-#include <net/ipip.h>
+#include <net/ip_tunnels.h>
#include <net/inet_ecn.h>
#include <net/xfrm.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
-#define HASH_SIZE 16
-#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
-
static bool log_ecn_error = true;
module_param(log_ecn_error, bool, 0644);
MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
static int ipip_net_id __read_mostly;
-struct ipip_net {
- struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
- struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
- struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
- struct ip_tunnel __rcu *tunnels_wc[1];
- struct ip_tunnel __rcu **tunnels[4];
-
- struct net_device *fb_tunnel_dev;
-};
static int ipip_tunnel_init(struct net_device *dev);
-static void ipip_tunnel_setup(struct net_device *dev);
-static void ipip_dev_free(struct net_device *dev);
static struct rtnl_link_ops ipip_link_ops __read_mostly;
-static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
- struct rtnl_link_stats64 *tot)
-{
- int i;
-
- for_each_possible_cpu(i) {
- const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
- u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
- unsigned int start;
-
- do {
- start = u64_stats_fetch_begin_bh(&tstats->syncp);
- rx_packets = tstats->rx_packets;
- tx_packets = tstats->tx_packets;
- rx_bytes = tstats->rx_bytes;
- tx_bytes = tstats->tx_bytes;
- } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
-
- tot->rx_packets += rx_packets;
- tot->tx_packets += tx_packets;
- tot->rx_bytes += rx_bytes;
- tot->tx_bytes += tx_bytes;
- }
-
- tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
- tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
- tot->tx_dropped = dev->stats.tx_dropped;
- tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
- tot->tx_errors = dev->stats.tx_errors;
- tot->collisions = dev->stats.collisions;
-
- return tot;
-}
-
-static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
- __be32 remote, __be32 local)
-{
- unsigned int h0 = HASH(remote);
- unsigned int h1 = HASH(local);
- struct ip_tunnel *t;
- struct ipip_net *ipn = net_generic(net, ipip_net_id);
-
- for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
- if (local == t->parms.iph.saddr &&
- remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
- return t;
-
- for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
- if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
- return t;
-
- for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
- if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
- return t;
-
- t = rcu_dereference(ipn->tunnels_wc[0]);
- if (t && (t->dev->flags&IFF_UP))
- return t;
- return NULL;
-}
-
-static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
- struct ip_tunnel_parm *parms)
-{
- __be32 remote = parms->iph.daddr;
- __be32 local = parms->iph.saddr;
- unsigned int h = 0;
- int prio = 0;
-
- if (remote) {
- prio |= 2;
- h ^= HASH(remote);
- }
- if (local) {
- prio |= 1;
- h ^= HASH(local);
- }
- return &ipn->tunnels[prio][h];
-}
-
-static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
- struct ip_tunnel *t)
-{
- return __ipip_bucket(ipn, &t->parms);
-}
-
-static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
-{
- struct ip_tunnel __rcu **tp;
- struct ip_tunnel *iter;
-
- for (tp = ipip_bucket(ipn, t);
- (iter = rtnl_dereference(*tp)) != NULL;
- tp = &iter->next) {
- if (t == iter) {
- rcu_assign_pointer(*tp, t->next);
- break;
- }
- }
-}
-
-static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
-{
- struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
-
- rcu_assign_pointer(t->next, rtnl_dereference(*tp));
- rcu_assign_pointer(*tp, t);
-}
-
-static int ipip_tunnel_create(struct net_device *dev)
-{
- struct ip_tunnel *t = netdev_priv(dev);
- struct net *net = dev_net(dev);
- struct ipip_net *ipn = net_generic(net, ipip_net_id);
- int err;
-
- err = ipip_tunnel_init(dev);
- if (err < 0)
- goto out;
-
- err = register_netdevice(dev);
- if (err < 0)
- goto out;
-
- strcpy(t->parms.name, dev->name);
- dev->rtnl_link_ops = &ipip_link_ops;
-
- dev_hold(dev);
- ipip_tunnel_link(ipn, t);
- return 0;
-
-out:
- return err;
-}
-
-static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
- struct ip_tunnel_parm *parms, int create)
-{
- __be32 remote = parms->iph.daddr;
- __be32 local = parms->iph.saddr;
- struct ip_tunnel *t, *nt;
- struct ip_tunnel __rcu **tp;
- struct net_device *dev;
- char name[IFNAMSIZ];
- struct ipip_net *ipn = net_generic(net, ipip_net_id);
-
- for (tp = __ipip_bucket(ipn, parms);
- (t = rtnl_dereference(*tp)) != NULL;
- tp = &t->next) {
- if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
- return t;
- }
- if (!create)
- return NULL;
-
- if (parms->name[0])
- strlcpy(name, parms->name, IFNAMSIZ);
- else
- strcpy(name, "tunl%d");
-
- dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
- if (dev == NULL)
- return NULL;
-
- dev_net_set(dev, net);
-
- nt = netdev_priv(dev);
- nt->parms = *parms;
-
- if (ipip_tunnel_create(dev) < 0)
- goto failed_free;
-
- return nt;
-
-failed_free:
- ipip_dev_free(dev);
- return NULL;
-}
-
-/* called with RTNL */
-static void ipip_tunnel_uninit(struct net_device *dev)
-{
- struct net *net = dev_net(dev);
- struct ipip_net *ipn = net_generic(net, ipip_net_id);
-
- if (dev == ipn->fb_tunnel_dev)
- RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
- else
- ipip_tunnel_unlink(ipn, netdev_priv(dev));
- dev_put(dev);
-}
-
static int ipip_err(struct sk_buff *skb, u32 info)
{
@@ -339,41 +133,17 @@ static int ipip_err(struct sk_buff *skb, u32 info)
8 bytes of packet payload. It means, that precise relaying of
ICMP in the real Internet is absolutely infeasible.
*/
+ struct net *net = dev_net(skb->dev);
+ struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
const struct iphdr *iph = (const struct iphdr *)skb->data;
- const int type = icmp_hdr(skb)->type;
- const int code = icmp_hdr(skb)->code;
struct ip_tunnel *t;
int err;
-
- switch (type) {
- default:
- case ICMP_PARAMETERPROB:
- return 0;
-
- case ICMP_DEST_UNREACH:
- switch (code) {
- case ICMP_SR_FAILED:
- case ICMP_PORT_UNREACH:
- /* Impossible event. */
- return 0;
- default:
- /* All others are translated to HOST_UNREACH.
- rfc2003 contains "deep thoughts" about NET_UNREACH,
- I believe they are just ether pollution. --ANK
- */
- break;
- }
- break;
- case ICMP_TIME_EXCEEDED:
- if (code != ICMP_EXC_TTL)
- return 0;
- break;
- case ICMP_REDIRECT:
- break;
- }
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
err = -ENOENT;
- t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
+ t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
+ iph->daddr, iph->saddr, 0);
if (t == NULL)
goto out;
@@ -403,53 +173,29 @@ static int ipip_err(struct sk_buff *skb, u32 info)
else
t->err_count = 1;
t->err_time = jiffies;
-out:
+out:
return err;
}
+static const struct tnl_ptk_info tpi = {
+ /* no tunnel info required for ipip. */
+ .proto = htons(ETH_P_IP),
+};
+
static int ipip_rcv(struct sk_buff *skb)
{
+ struct net *net = dev_net(skb->dev);
+ struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
struct ip_tunnel *tunnel;
const struct iphdr *iph = ip_hdr(skb);
- int err;
-
- tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
- if (tunnel != NULL) {
- struct pcpu_tstats *tstats;
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
+ iph->saddr, iph->daddr, 0);
+ if (tunnel) {
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
-
- secpath_reset(skb);
-
- skb->mac_header = skb->network_header;
- skb_reset_network_header(skb);
- skb->protocol = htons(ETH_P_IP);
- skb->pkt_type = PACKET_HOST;
-
- __skb_tunnel_rx(skb, tunnel->dev);
-
- err = IP_ECN_decapsulate(iph, skb);
- if (unlikely(err)) {
- if (log_ecn_error)
- net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
- &iph->saddr, iph->tos);
- if (err > 1) {
- ++tunnel->dev->stats.rx_frame_errors;
- ++tunnel->dev->stats.rx_errors;
- goto drop;
- }
- }
-
- tstats = this_cpu_ptr(tunnel->dev->tstats);
- u64_stats_update_begin(&tstats->syncp);
- tstats->rx_packets++;
- tstats->rx_bytes += skb->len;
- u64_stats_update_end(&tstats->syncp);
-
- netif_rx(skb);
- return 0;
+ return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
}
return -1;
@@ -463,329 +209,64 @@ drop:
* This function assumes it is being called from dev_queue_xmit()
* and that skb is filled properly by that function.
*/
-
static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *tiph = &tunnel->parms.iph;
- u8 tos = tunnel->parms.iph.tos;
- __be16 df = tiph->frag_off;
- struct rtable *rt; /* Route to the other host */
- struct net_device *tdev; /* Device to other host */
- const struct iphdr *old_iph;
- struct iphdr *iph; /* Our new IP header */
- unsigned int max_headroom; /* The extra header space needed */
- __be32 dst = tiph->daddr;
- struct flowi4 fl4;
- int mtu;
-
- if (skb->protocol != htons(ETH_P_IP))
- goto tx_error;
- if (skb->ip_summed == CHECKSUM_PARTIAL &&
- skb_checksum_help(skb))
+ if (unlikely(skb->protocol != htons(ETH_P_IP)))
goto tx_error;
- old_iph = ip_hdr(skb);
-
- if (tos & 1)
- tos = old_iph->tos;
-
- if (!dst) {
- /* NBMA tunnel */
- if ((rt = skb_rtable(skb)) == NULL) {
- dev->stats.tx_fifo_errors++;
- goto tx_error;
- }
- dst = rt_nexthop(rt, old_iph->daddr);
+ if (likely(!skb->encapsulation)) {
+ skb_reset_inner_headers(skb);
+ skb->encapsulation = 1;
}
- rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
- dst, tiph->saddr,
- 0, 0,
- IPPROTO_IPIP, RT_TOS(tos),
- tunnel->parms.link);
- if (IS_ERR(rt)) {
- dev->stats.tx_carrier_errors++;
- goto tx_error_icmp;
- }
- tdev = rt->dst.dev;
-
- if (tdev == dev) {
- ip_rt_put(rt);
- dev->stats.collisions++;
- goto tx_error;
- }
-
- df |= old_iph->frag_off & htons(IP_DF);
-
- if (df) {
- mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
-
- if (mtu < 68) {
- dev->stats.collisions++;
- ip_rt_put(rt);
- goto tx_error;
- }
-
- if (skb_dst(skb))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
-
- if ((old_iph->frag_off & htons(IP_DF)) &&
- mtu < ntohs(old_iph->tot_len)) {
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
- htonl(mtu));
- ip_rt_put(rt);
- goto tx_error;
- }
- }
-
- if (tunnel->err_count > 0) {
- if (time_before(jiffies,
- tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
- tunnel->err_count--;
- dst_link_failure(skb);
- } else
- tunnel->err_count = 0;
- }
-
- /*
- * Okay, now see if we can stuff it in the buffer as-is.
- */
- max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
-
- if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
- (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
- struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
- if (!new_skb) {
- ip_rt_put(rt);
- dev->stats.tx_dropped++;
- dev_kfree_skb(skb);
- return NETDEV_TX_OK;
- }
- if (skb->sk)
- skb_set_owner_w(new_skb, skb->sk);
- dev_kfree_skb(skb);
- skb = new_skb;
- old_iph = ip_hdr(skb);
- }
-
- skb->transport_header = skb->network_header;
- skb_push(skb, sizeof(struct iphdr));
- skb_reset_network_header(skb);
- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
- IPSKB_REROUTED);
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
-
- /*
- * Push down and install the IPIP header.
- */
-
- iph = ip_hdr(skb);
- iph->version = 4;
- iph->ihl = sizeof(struct iphdr)>>2;
- iph->frag_off = df;
- iph->protocol = IPPROTO_IPIP;
- iph->tos = INET_ECN_encapsulate(tos, old_iph->tos);
- iph->daddr = fl4.daddr;
- iph->saddr = fl4.saddr;
-
- if ((iph->ttl = tiph->ttl) == 0)
- iph->ttl = old_iph->ttl;
-
- iptunnel_xmit(skb, dev);
+ ip_tunnel_xmit(skb, dev, tiph);
return NETDEV_TX_OK;
-tx_error_icmp:
- dst_link_failure(skb);
tx_error:
dev->stats.tx_errors++;
dev_kfree_skb(skb);
return NETDEV_TX_OK;
}
-static void ipip_tunnel_bind_dev(struct net_device *dev)
-{
- struct net_device *tdev = NULL;
- struct ip_tunnel *tunnel;
- const struct iphdr *iph;
-
- tunnel = netdev_priv(dev);
- iph = &tunnel->parms.iph;
-
- if (iph->daddr) {
- struct rtable *rt;
- struct flowi4 fl4;
-
- rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
- iph->daddr, iph->saddr,
- 0, 0,
- IPPROTO_IPIP,
- RT_TOS(iph->tos),
- tunnel->parms.link);
- if (!IS_ERR(rt)) {
- tdev = rt->dst.dev;
- ip_rt_put(rt);
- }
- dev->flags |= IFF_POINTOPOINT;
- }
-
- if (!tdev && tunnel->parms.link)
- tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
-
- if (tdev) {
- dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
- dev->mtu = tdev->mtu - sizeof(struct iphdr);
- }
- dev->iflink = tunnel->parms.link;
-}
-
-static void ipip_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
-{
- struct net *net = dev_net(t->dev);
- struct ipip_net *ipn = net_generic(net, ipip_net_id);
-
- ipip_tunnel_unlink(ipn, t);
- synchronize_net();
- t->parms.iph.saddr = p->iph.saddr;
- t->parms.iph.daddr = p->iph.daddr;
- memcpy(t->dev->dev_addr, &p->iph.saddr, 4);
- memcpy(t->dev->broadcast, &p->iph.daddr, 4);
- ipip_tunnel_link(ipn, t);
- t->parms.iph.ttl = p->iph.ttl;
- t->parms.iph.tos = p->iph.tos;
- t->parms.iph.frag_off = p->iph.frag_off;
- if (t->parms.link != p->link) {
- t->parms.link = p->link;
- ipip_tunnel_bind_dev(t->dev);
- }
- netdev_state_change(t->dev);
-}
-
static int
-ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
+ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
int err = 0;
struct ip_tunnel_parm p;
- struct ip_tunnel *t;
- struct net *net = dev_net(dev);
- struct ipip_net *ipn = net_generic(net, ipip_net_id);
-
- switch (cmd) {
- case SIOCGETTUNNEL:
- t = NULL;
- if (dev == ipn->fb_tunnel_dev) {
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
- err = -EFAULT;
- break;
- }
- t = ipip_tunnel_locate(net, &p, 0);
- }
- if (t == NULL)
- t = netdev_priv(dev);
- memcpy(&p, &t->parms, sizeof(p));
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
- err = -EFAULT;
- break;
-
- case SIOCADDTUNNEL:
- case SIOCCHGTUNNEL:
- err = -EPERM;
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- goto done;
-
- err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
- goto done;
-
- err = -EINVAL;
- if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
- p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
- goto done;
- if (p.iph.ttl)
- p.iph.frag_off |= htons(IP_DF);
-
- t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
-
- if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
- if (t != NULL) {
- if (t->dev != dev) {
- err = -EEXIST;
- break;
- }
- } else {
- if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
- (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
- err = -EINVAL;
- break;
- }
- t = netdev_priv(dev);
- }
-
- ipip_tunnel_update(t, &p);
- }
-
- if (t) {
- err = 0;
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
- err = -EFAULT;
- } else
- err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
- break;
-
- case SIOCDELTUNNEL:
- err = -EPERM;
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- goto done;
-
- if (dev == ipn->fb_tunnel_dev) {
- err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
- goto done;
- err = -ENOENT;
- if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
- goto done;
- err = -EPERM;
- if (t->dev == ipn->fb_tunnel_dev)
- goto done;
- dev = t->dev;
- }
- unregister_netdevice(dev);
- err = 0;
- break;
- default:
- err = -EINVAL;
- }
-
-done:
- return err;
-}
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ return -EFAULT;
-static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
-{
- if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
+ if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
+ p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
+ return -EINVAL;
+ if (p.i_key || p.o_key || p.i_flags || p.o_flags)
return -EINVAL;
- dev->mtu = new_mtu;
+ if (p.iph.ttl)
+ p.iph.frag_off |= htons(IP_DF);
+
+ err = ip_tunnel_ioctl(dev, &p, cmd);
+ if (err)
+ return err;
+
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ return -EFAULT;
+
return 0;
}
static const struct net_device_ops ipip_netdev_ops = {
- .ndo_uninit = ipip_tunnel_uninit,
+ .ndo_init = ipip_tunnel_init,
+ .ndo_uninit = ip_tunnel_uninit,
.ndo_start_xmit = ipip_tunnel_xmit,
.ndo_do_ioctl = ipip_tunnel_ioctl,
- .ndo_change_mtu = ipip_tunnel_change_mtu,
- .ndo_get_stats64 = ipip_get_stats64,
+ .ndo_change_mtu = ip_tunnel_change_mtu,
+ .ndo_get_stats64 = ip_tunnel_get_stats64,
};
-static void ipip_dev_free(struct net_device *dev)
-{
- free_percpu(dev->tstats);
- free_netdev(dev);
-}
-
#define IPIP_FEATURES (NETIF_F_SG | \
NETIF_F_FRAGLIST | \
NETIF_F_HIGHDMA | \
@@ -794,11 +275,8 @@ static void ipip_dev_free(struct net_device *dev)
static void ipip_tunnel_setup(struct net_device *dev)
{
dev->netdev_ops = &ipip_netdev_ops;
- dev->destructor = ipip_dev_free;
dev->type = ARPHRD_TUNNEL;
- dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
- dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
dev->flags = IFF_NOARP;
dev->iflink = 0;
dev->addr_len = 4;
@@ -808,46 +286,19 @@ static void ipip_tunnel_setup(struct net_device *dev)
dev->features |= IPIP_FEATURES;
dev->hw_features |= IPIP_FEATURES;
+ ip_tunnel_setup(dev, ipip_net_id);
}
static int ipip_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- tunnel->dev = dev;
-
memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
- ipip_tunnel_bind_dev(dev);
-
- dev->tstats = alloc_percpu(struct pcpu_tstats);
- if (!dev->tstats)
- return -ENOMEM;
-
- return 0;
-}
-
-static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
-{
- struct ip_tunnel *tunnel = netdev_priv(dev);
- struct iphdr *iph = &tunnel->parms.iph;
- struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
-
- tunnel->dev = dev;
- strcpy(tunnel->parms.name, dev->name);
-
- iph->version = 4;
- iph->protocol = IPPROTO_IPIP;
- iph->ihl = 5;
-
- dev->tstats = alloc_percpu(struct pcpu_tstats);
- if (!dev->tstats)
- return -ENOMEM;
-
- dev_hold(dev);
- rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
- return 0;
+ tunnel->hlen = 0;
+ tunnel->parms.iph.protocol = IPPROTO_IPIP;
+ return ip_tunnel_init(dev);
}
static void ipip_netlink_parms(struct nlattr *data[],
@@ -887,28 +338,16 @@ static void ipip_netlink_parms(struct nlattr *data[],
static int ipip_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
- struct net *net = dev_net(dev);
- struct ip_tunnel *nt;
-
- nt = netdev_priv(dev);
- ipip_netlink_parms(data, &nt->parms);
-
- if (ipip_tunnel_locate(net, &nt->parms, 0))
- return -EEXIST;
+ struct ip_tunnel_parm p;
- return ipip_tunnel_create(dev);
+ ipip_netlink_parms(data, &p);
+ return ip_tunnel_newlink(dev, tb, &p);
}
static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[])
{
- struct ip_tunnel *t;
struct ip_tunnel_parm p;
- struct net *net = dev_net(dev);
- struct ipip_net *ipn = net_generic(net, ipip_net_id);
-
- if (dev == ipn->fb_tunnel_dev)
- return -EINVAL;
ipip_netlink_parms(data, &p);
@@ -916,16 +355,7 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
(!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
return -EINVAL;
- t = ipip_tunnel_locate(net, &p, 0);
-
- if (t) {
- if (t->dev != dev)
- return -EEXIST;
- } else
- t = netdev_priv(dev);
-
- ipip_tunnel_update(t, &p);
- return 0;
+ return ip_tunnel_changelink(dev, tb, &p);
}
static size_t ipip_get_size(const struct net_device *dev)
@@ -982,6 +412,7 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly = {
.setup = ipip_tunnel_setup,
.newlink = ipip_newlink,
.changelink = ipip_changelink,
+ .dellink = ip_tunnel_dellink,
.get_size = ipip_get_size,
.fill_info = ipip_fill_info,
};
@@ -992,90 +423,29 @@ static struct xfrm_tunnel ipip_handler __read_mostly = {
.priority = 1,
};
-static const char banner[] __initconst =
- KERN_INFO "IPv4 over IPv4 tunneling driver\n";
-
-static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
-{
- int prio;
-
- for (prio = 1; prio < 4; prio++) {
- int h;
- for (h = 0; h < HASH_SIZE; h++) {
- struct ip_tunnel *t;
-
- t = rtnl_dereference(ipn->tunnels[prio][h]);
- while (t != NULL) {
- unregister_netdevice_queue(t->dev, head);
- t = rtnl_dereference(t->next);
- }
- }
- }
-}
-
static int __net_init ipip_init_net(struct net *net)
{
- struct ipip_net *ipn = net_generic(net, ipip_net_id);
- struct ip_tunnel *t;
- int err;
-
- ipn->tunnels[0] = ipn->tunnels_wc;
- ipn->tunnels[1] = ipn->tunnels_l;
- ipn->tunnels[2] = ipn->tunnels_r;
- ipn->tunnels[3] = ipn->tunnels_r_l;
-
- ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
- "tunl0",
- ipip_tunnel_setup);
- if (!ipn->fb_tunnel_dev) {
- err = -ENOMEM;
- goto err_alloc_dev;
- }
- dev_net_set(ipn->fb_tunnel_dev, net);
-
- err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
- if (err)
- goto err_reg_dev;
-
- if ((err = register_netdev(ipn->fb_tunnel_dev)))
- goto err_reg_dev;
-
- t = netdev_priv(ipn->fb_tunnel_dev);
-
- strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
- return 0;
-
-err_reg_dev:
- ipip_dev_free(ipn->fb_tunnel_dev);
-err_alloc_dev:
- /* nothing */
- return err;
+ return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
}
static void __net_exit ipip_exit_net(struct net *net)
{
- struct ipip_net *ipn = net_generic(net, ipip_net_id);
- LIST_HEAD(list);
-
- rtnl_lock();
- ipip_destroy_tunnels(ipn, &list);
- unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
- unregister_netdevice_many(&list);
- rtnl_unlock();
+ struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
+ ip_tunnel_delete_net(itn);
}
static struct pernet_operations ipip_net_ops = {
.init = ipip_init_net,
.exit = ipip_exit_net,
.id = &ipip_net_id,
- .size = sizeof(struct ipip_net),
+ .size = sizeof(struct ip_tunnel_net),
};
static int __init ipip_init(void)
{
int err;
- printk(banner);
+ pr_info("ipip: IPv4 over IPv4 tunneling driver\n");
err = register_pernet_device(&ipip_net_ops);
if (err < 0)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 5f95b3aa579e..9d9610ae7855 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -61,7 +61,7 @@
#include <linux/netfilter_ipv4.h>
#include <linux/compat.h>
#include <linux/export.h>
-#include <net/ipip.h>
+#include <net/ip_tunnels.h>
#include <net/checksum.h>
#include <net/netlink.h>
#include <net/fib_rules.h>
@@ -626,9 +626,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
if (ip_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
nlh->nlmsg_type = NLMSG_ERROR;
- nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
+ nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
skb_trim(skb, nlh->nlmsg_len);
- e = NLMSG_DATA(nlh);
+ e = nlmsg_data(nlh);
e->error = -ETIMEDOUT;
memset(&e->msg, 0, sizeof(e->msg));
@@ -910,14 +910,14 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
if (ip_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
- if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
+ if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
nlh->nlmsg_len = skb_tail_pointer(skb) -
(u8 *)nlh;
} else {
nlh->nlmsg_type = NLMSG_ERROR;
- nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
+ nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
skb_trim(skb, nlh->nlmsg_len);
- e = NLMSG_DATA(nlh);
+ e = nlmsg_data(nlh);
e->error = -EMSGSIZE;
memset(&e->msg, 0, sizeof(e->msg));
}
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 4c0cf63dd92e..c3e0adea9c27 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -1,4 +1,9 @@
-/* IPv4 specific functions of netfilter core */
+/*
+ * IPv4 specific functions of netfilter core
+ *
+ * Rusty Russell (C) 2000 -- This code is GPL.
+ * Patrick McHardy (C) 2006-2012
+ */
#include <linux/kernel.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
@@ -40,14 +45,14 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type)
fl4.flowi4_flags = flags;
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt))
- return -1;
+ return PTR_ERR(rt);
/* Drop old route. */
skb_dst_drop(skb);
skb_dst_set(skb, &rt->dst);
if (skb_dst(skb)->error)
- return -1;
+ return skb_dst(skb)->error;
#ifdef CONFIG_XFRM
if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
@@ -56,7 +61,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type)
skb_dst_set(skb, NULL);
dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0);
if (IS_ERR(dst))
- return -1;
+ return PTR_ERR(dst);;
skb_dst_set(skb, dst);
}
#endif
@@ -66,7 +71,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type)
if (skb_headroom(skb) < hh_len &&
pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),
0, GFP_ATOMIC))
- return -1;
+ return -ENOMEM;
return 0;
}
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 0d755c50994b..e7916c193932 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -71,7 +71,7 @@ config IP_NF_MATCH_ECN
config IP_NF_MATCH_RPFILTER
tristate '"rpfilter" reverse path filter match support'
- depends on NETFILTER_ADVANCED
+ depends on NETFILTER_ADVANCED && (IP_NF_MANGLE || IP_NF_RAW)
---help---
This option allows you to match packets whose replies would
go out via the interface the packet came in.
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 7dc6a9743592..85a4f21aac1a 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -6,6 +6,7 @@
* Some ARP specific bits are:
*
* Copyright (C) 2002 David S. Miller (davem@redhat.com)
+ * Copyright (C) 2006-2009 Patrick McHardy <kaber@trash.net>
*
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 79ca5e70d497..eadab1ed6500 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -48,9 +48,7 @@ static int __net_init arptable_filter_net_init(struct net *net)
net->ipv4.arptable_filter =
arpt_register_table(net, &packet_filter, repl);
kfree(repl);
- if (IS_ERR(net->ipv4.arptable_filter))
- return PTR_ERR(net->ipv4.arptable_filter);
- return 0;
+ return PTR_RET(net->ipv4.arptable_filter);
}
static void __net_exit arptable_filter_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 3efcf87400c3..d23118d95ff9 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -3,6 +3,7 @@
*
* Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
* Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
+ * Copyright (C) 2006-2010 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -182,8 +183,7 @@ ipt_get_target_c(const struct ipt_entry *e)
return ipt_get_target((struct ipt_entry *)e);
}
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
- defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
static const char *const hooknames[] = {
[NF_INET_PRE_ROUTING] = "PREROUTING",
[NF_INET_LOCAL_IN] = "INPUT",
@@ -259,6 +259,7 @@ static void trace_packet(const struct sk_buff *skb,
const char *hookname, *chainname, *comment;
const struct ipt_entry *iter;
unsigned int rulenum = 0;
+ struct net *net = dev_net(in ? in : out);
table_base = private->entries[smp_processor_id()];
root = get_entry(table_base, private->hook_entry[hook]);
@@ -271,7 +272,7 @@ static void trace_packet(const struct sk_buff *skb,
&chainname, &comment, &rulenum) != 0)
break;
- nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo,
+ nf_log_packet(net, AF_INET, hook, skb, in, out, &trace_loginfo,
"TRACE: %s:%s:%s:%u ",
tablename, chainname, comment, rulenum);
}
@@ -361,8 +362,7 @@ ipt_do_table(struct sk_buff *skb,
t = ipt_get_target(e);
IP_NF_ASSERT(t->u.kernel.target);
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
- defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
/* The packet is traced: log it */
if (unlikely(skb->nf_trace))
trace_packet(skb, hook, in, out,
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 5852b249054f..0b732efd32e2 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -105,7 +105,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
* functions are also incrementing the refcount on their own,
* so it's safe to remove the entry even if it's in use. */
#ifdef CONFIG_PROC_FS
- remove_proc_entry(c->pde->name, c->pde->parent);
+ proc_remove(c->pde);
#endif
return;
}
@@ -631,7 +631,7 @@ static int clusterip_proc_open(struct inode *inode, struct file *file)
if (!ret) {
struct seq_file *sf = file->private_data;
- struct clusterip_config *c = PDE(inode)->data;
+ struct clusterip_config *c = PDE_DATA(inode);
sf->private = c;
@@ -643,7 +643,7 @@ static int clusterip_proc_open(struct inode *inode, struct file *file)
static int clusterip_proc_release(struct inode *inode, struct file *file)
{
- struct clusterip_config *c = PDE(inode)->data;
+ struct clusterip_config *c = PDE_DATA(inode);
int ret;
ret = seq_release(inode, file);
@@ -657,7 +657,7 @@ static int clusterip_proc_release(struct inode *inode, struct file *file)
static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
size_t size, loff_t *ofs)
{
- struct clusterip_config *c = PDE(file_inode(file))->data;
+ struct clusterip_config *c = PDE_DATA(file_inode(file));
#define PROC_WRITELEN 10
char buffer[PROC_WRITELEN+1];
unsigned long nodenum;
@@ -736,7 +736,7 @@ static void __exit clusterip_tg_exit(void)
{
pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
#ifdef CONFIG_PROC_FS
- remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent);
+ proc_remove(clusterip_procdir);
#endif
nf_unregister_hook(&cip_arp_ops);
xt_unregister_target(&clusterip_tg_reg);
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 7d168dcbd135..32b0e978c8e0 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -4,6 +4,7 @@
* (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2005-2007 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -37,7 +38,7 @@
#include <linux/skbuff.h>
#include <linux/kernel.h>
#include <linux/timer.h>
-#include <linux/netlink.h>
+#include <net/netlink.h>
#include <linux/netdevice.h>
#include <linux/mm.h>
#include <linux/moduleparam.h>
@@ -45,6 +46,7 @@
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ipt_ULOG.h>
#include <net/netfilter/nf_log.h>
+#include <net/netns/generic.h>
#include <net/sock.h>
#include <linux/bitops.h>
#include <asm/unaligned.h>
@@ -78,15 +80,23 @@ typedef struct {
struct timer_list timer; /* the timer function */
} ulog_buff_t;
-static ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; /* array of buffers */
+static int ulog_net_id __read_mostly;
+struct ulog_net {
+ unsigned int nlgroup[ULOG_MAXNLGROUPS];
+ ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS];
+ struct sock *nflognl;
+ spinlock_t lock;
+};
-static struct sock *nflognl; /* our socket */
-static DEFINE_SPINLOCK(ulog_lock); /* spinlock */
+static struct ulog_net *ulog_pernet(struct net *net)
+{
+ return net_generic(net, ulog_net_id);
+}
/* send one ulog_buff_t to userspace */
-static void ulog_send(unsigned int nlgroupnum)
+static void ulog_send(struct ulog_net *ulog, unsigned int nlgroupnum)
{
- ulog_buff_t *ub = &ulog_buffers[nlgroupnum];
+ ulog_buff_t *ub = &ulog->ulog_buffers[nlgroupnum];
pr_debug("ulog_send: timer is deleting\n");
del_timer(&ub->timer);
@@ -103,7 +113,8 @@ static void ulog_send(unsigned int nlgroupnum)
NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1;
pr_debug("throwing %d packets to netlink group %u\n",
ub->qlen, nlgroupnum + 1);
- netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC);
+ netlink_broadcast(ulog->nflognl, ub->skb, 0, nlgroupnum + 1,
+ GFP_ATOMIC);
ub->qlen = 0;
ub->skb = NULL;
@@ -114,13 +125,17 @@ static void ulog_send(unsigned int nlgroupnum)
/* timer function to flush queue in flushtimeout time */
static void ulog_timer(unsigned long data)
{
+ unsigned int groupnum = *((unsigned int *)data);
+ struct ulog_net *ulog = container_of((void *)data,
+ struct ulog_net,
+ nlgroup[groupnum]);
pr_debug("timer function called, calling ulog_send\n");
/* lock to protect against somebody modifying our structure
* from ipt_ulog_target at the same time */
- spin_lock_bh(&ulog_lock);
- ulog_send(data);
- spin_unlock_bh(&ulog_lock);
+ spin_lock_bh(&ulog->lock);
+ ulog_send(ulog, groupnum);
+ spin_unlock_bh(&ulog->lock);
}
static struct sk_buff *ulog_alloc_skb(unsigned int size)
@@ -148,7 +163,8 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size)
return skb;
}
-static void ipt_ulog_packet(unsigned int hooknum,
+static void ipt_ulog_packet(struct net *net,
+ unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -160,6 +176,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
size_t size, copy_len;
struct nlmsghdr *nlh;
struct timeval tv;
+ struct ulog_net *ulog = ulog_pernet(net);
/* ffs == find first bit set, necessary because userspace
* is already shifting groupnumber, but we need unshifted.
@@ -172,11 +189,11 @@ static void ipt_ulog_packet(unsigned int hooknum,
else
copy_len = loginfo->copy_range;
- size = NLMSG_SPACE(sizeof(*pm) + copy_len);
+ size = nlmsg_total_size(sizeof(*pm) + copy_len);
- ub = &ulog_buffers[groupnum];
+ ub = &ulog->ulog_buffers[groupnum];
- spin_lock_bh(&ulog_lock);
+ spin_lock_bh(&ulog->lock);
if (!ub->skb) {
if (!(ub->skb = ulog_alloc_skb(size)))
@@ -186,7 +203,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
/* either the queue len is too high or we don't have
* enough room in nlskb left. send it to userspace. */
- ulog_send(groupnum);
+ ulog_send(ulog, groupnum);
if (!(ub->skb = ulog_alloc_skb(size)))
goto alloc_failure;
@@ -215,8 +232,10 @@ static void ipt_ulog_packet(unsigned int hooknum,
put_unaligned(tv.tv_usec, &pm->timestamp_usec);
put_unaligned(skb->mark, &pm->mark);
pm->hook = hooknum;
- if (prefix != NULL)
- strncpy(pm->prefix, prefix, sizeof(pm->prefix));
+ if (prefix != NULL) {
+ strncpy(pm->prefix, prefix, sizeof(pm->prefix) - 1);
+ pm->prefix[sizeof(pm->prefix) - 1] = '\0';
+ }
else if (loginfo->prefix[0] != '\0')
strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix));
else
@@ -260,27 +279,30 @@ static void ipt_ulog_packet(unsigned int hooknum,
if (ub->qlen >= loginfo->qthreshold) {
if (loginfo->qthreshold > 1)
nlh->nlmsg_type = NLMSG_DONE;
- ulog_send(groupnum);
+ ulog_send(ulog, groupnum);
}
out_unlock:
- spin_unlock_bh(&ulog_lock);
+ spin_unlock_bh(&ulog->lock);
return;
alloc_failure:
pr_debug("Error building netlink message\n");
- spin_unlock_bh(&ulog_lock);
+ spin_unlock_bh(&ulog->lock);
}
static unsigned int
ulog_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
- ipt_ulog_packet(par->hooknum, skb, par->in, par->out,
+ struct net *net = dev_net(par->in ? par->in : par->out);
+
+ ipt_ulog_packet(net, par->hooknum, skb, par->in, par->out,
par->targinfo, NULL);
return XT_CONTINUE;
}
-static void ipt_logfn(u_int8_t pf,
+static void ipt_logfn(struct net *net,
+ u_int8_t pf,
unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
@@ -302,7 +324,7 @@ static void ipt_logfn(u_int8_t pf,
strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix));
}
- ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
+ ipt_ulog_packet(net, hooknum, skb, in, out, &loginfo, prefix);
}
static int ulog_tg_check(const struct xt_tgchk_param *par)
@@ -376,54 +398,46 @@ static struct nf_logger ipt_ulog_logger __read_mostly = {
.me = THIS_MODULE,
};
-static int __init ulog_tg_init(void)
+static int __net_init ulog_tg_net_init(struct net *net)
{
- int ret, i;
+ int i;
+ struct ulog_net *ulog = ulog_pernet(net);
struct netlink_kernel_cfg cfg = {
.groups = ULOG_MAXNLGROUPS,
};
- pr_debug("init module\n");
-
- if (nlbufsiz > 128*1024) {
- pr_warning("Netlink buffer has to be <= 128kB\n");
- return -EINVAL;
- }
-
+ spin_lock_init(&ulog->lock);
/* initialize ulog_buffers */
- for (i = 0; i < ULOG_MAXNLGROUPS; i++)
- setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
+ for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
+ ulog->nlgroup[i] = i;
+ setup_timer(&ulog->ulog_buffers[i].timer, ulog_timer,
+ (unsigned long)&ulog->nlgroup[i]);
+ }
- nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg);
- if (!nflognl)
+ ulog->nflognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg);
+ if (!ulog->nflognl)
return -ENOMEM;
- ret = xt_register_target(&ulog_tg_reg);
- if (ret < 0) {
- netlink_kernel_release(nflognl);
- return ret;
- }
if (nflog)
- nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger);
+ nf_log_set(net, NFPROTO_IPV4, &ipt_ulog_logger);
return 0;
}
-static void __exit ulog_tg_exit(void)
+static void __net_exit ulog_tg_net_exit(struct net *net)
{
ulog_buff_t *ub;
int i;
-
- pr_debug("cleanup_module\n");
+ struct ulog_net *ulog = ulog_pernet(net);
if (nflog)
- nf_log_unregister(&ipt_ulog_logger);
- xt_unregister_target(&ulog_tg_reg);
- netlink_kernel_release(nflognl);
+ nf_log_unset(net, &ipt_ulog_logger);
+
+ netlink_kernel_release(ulog->nflognl);
/* remove pending timers and free allocated skb's */
for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
- ub = &ulog_buffers[i];
+ ub = &ulog->ulog_buffers[i];
pr_debug("timer is deleting\n");
del_timer(&ub->timer);
@@ -434,5 +448,50 @@ static void __exit ulog_tg_exit(void)
}
}
+static struct pernet_operations ulog_tg_net_ops = {
+ .init = ulog_tg_net_init,
+ .exit = ulog_tg_net_exit,
+ .id = &ulog_net_id,
+ .size = sizeof(struct ulog_net),
+};
+
+static int __init ulog_tg_init(void)
+{
+ int ret;
+ pr_debug("init module\n");
+
+ if (nlbufsiz > 128*1024) {
+ pr_warn("Netlink buffer has to be <= 128kB\n");
+ return -EINVAL;
+ }
+
+ ret = register_pernet_subsys(&ulog_tg_net_ops);
+ if (ret)
+ goto out_pernet;
+
+ ret = xt_register_target(&ulog_tg_reg);
+ if (ret < 0)
+ goto out_target;
+
+ if (nflog)
+ nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger);
+
+ return 0;
+
+out_target:
+ unregister_pernet_subsys(&ulog_tg_net_ops);
+out_pernet:
+ return ret;
+}
+
+static void __exit ulog_tg_exit(void)
+{
+ pr_debug("cleanup_module\n");
+ if (nflog)
+ nf_log_unregister(&ipt_ulog_logger);
+ xt_unregister_target(&ulog_tg_reg);
+ unregister_pernet_subsys(&ulog_tg_net_ops);
+}
+
module_init(ulog_tg_init);
module_exit(ulog_tg_exit);
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 85d88f206447..cba5658ec82c 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -44,6 +44,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
u_int8_t tos;
__be32 saddr, daddr;
u_int32_t mark;
+ int err;
/* root is playing with raw sockets. */
if (skb->len < sizeof(struct iphdr) ||
@@ -66,9 +67,11 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
if (iph->saddr != saddr ||
iph->daddr != daddr ||
skb->mark != mark ||
- iph->tos != tos)
- if (ip_route_me_harder(skb, RTN_UNSPEC))
- ret = NF_DROP;
+ iph->tos != tos) {
+ err = ip_route_me_harder(skb, RTN_UNSPEC);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
}
return ret;
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index eeaff7e4acb5..6383273d54e1 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -176,6 +176,7 @@ nf_nat_ipv4_out(unsigned int hooknum,
#ifdef CONFIG_XFRM
const struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
+ int err;
#endif
unsigned int ret;
@@ -195,9 +196,11 @@ nf_nat_ipv4_out(unsigned int hooknum,
ct->tuplehash[!dir].tuple.dst.u3.ip) ||
(ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
ct->tuplehash[dir].tuple.src.u.all !=
- ct->tuplehash[!dir].tuple.dst.u.all))
- if (nf_xfrm_me_harder(skb, AF_INET) < 0)
- ret = NF_DROP;
+ ct->tuplehash[!dir].tuple.dst.u.all)) {
+ err = nf_xfrm_me_harder(skb, AF_INET);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
}
#endif
return ret;
@@ -213,6 +216,7 @@ nf_nat_ipv4_local_fn(unsigned int hooknum,
const struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
unsigned int ret;
+ int err;
/* root is playing with raw sockets. */
if (skb->len < sizeof(struct iphdr) ||
@@ -226,16 +230,19 @@ nf_nat_ipv4_local_fn(unsigned int hooknum,
if (ct->tuplehash[dir].tuple.dst.u3.ip !=
ct->tuplehash[!dir].tuple.src.u3.ip) {
- if (ip_route_me_harder(skb, RTN_UNSPEC))
- ret = NF_DROP;
+ err = ip_route_me_harder(skb, RTN_UNSPEC);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
}
#ifdef CONFIG_XFRM
else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
ct->tuplehash[dir].tuple.dst.u.all !=
- ct->tuplehash[!dir].tuple.src.u.all)
- if (nf_xfrm_me_harder(skb, AF_INET) < 0)
- ret = NF_DROP;
+ ct->tuplehash[!dir].tuple.src.u.all) {
+ err = nf_xfrm_me_harder(skb, AF_INET);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
#endif
}
return ret;
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 2820aa18b542..567d84168bd2 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -1,6 +1,7 @@
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index f2ca12794081..4c48e434bb1f 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -2,6 +2,7 @@
*
* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 5241d997ab75..a338dad41b7d 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -1,5 +1,6 @@
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -187,8 +188,8 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
if (icmph == NULL) {
if (LOG_INVALID(net, IPPROTO_ICMP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "nf_ct_icmp: short packet ");
+ nf_log_packet(net, PF_INET, 0, skb, NULL, NULL,
+ NULL, "nf_ct_icmp: short packet ");
return -NF_ACCEPT;
}
@@ -196,7 +197,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_ip_checksum(skb, hooknum, dataoff, 0)) {
if (LOG_INVALID(net, IPPROTO_ICMP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
"nf_ct_icmp: bad HW ICMP checksum ");
return -NF_ACCEPT;
}
@@ -209,7 +210,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
*/
if (icmph->type > NR_ICMP_TYPES) {
if (LOG_INVALID(net, IPPROTO_ICMP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
"nf_ct_icmp: invalid ICMP type ");
return -NF_ACCEPT;
}
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 9c3db10b22d3..9eea059dd621 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -2,6 +2,7 @@
* H.323 extension for NAT alteration.
*
* Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
+ * Copyright (c) 2006-2012 Patrick McHardy <kaber@trash.net>
*
* This source code is licensed under General Public License version 2.
*
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index a06d7d74817d..657d2307f031 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -13,6 +13,8 @@
*
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ *
* TODO: - NAT to a unique tuple, not to TCP source port
* (needs netfilter tuple reservation)
*/
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index ea44f02563b5..690d890111bb 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -21,6 +21,8 @@
*
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ *
*/
#include <linux/module.h>
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index bac712293fd6..5f011cc89cd9 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -38,6 +38,8 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Author: James Morris <jmorris@intercode.com.au>
+ *
+ * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net>
*/
#include <linux/module.h>
#include <linux/moduleparam.h>
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 2e91006d6076..7d93d62cd5fd 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -514,9 +514,8 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
ipc.opt = NULL;
ipc.oif = sk->sk_bound_dev_if;
ipc.tx_flags = 0;
- err = sock_tx_timestamp(sk, &ipc.tx_flags);
- if (err)
- return err;
+
+ sock_tx_timestamp(sk, &ipc.tx_flags);
if (msg->msg_controllen) {
err = ip_cmsg_send(sock_net(sk), msg, &ipc);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 32030a24e776..2a5bf86d2415 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -125,6 +125,7 @@ static const struct snmp_mib snmp4_ipextstats_list[] = {
SNMP_MIB_ITEM("OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS),
SNMP_MIB_ITEM("InBcastOctets", IPSTATS_MIB_INBCASTOCTETS),
SNMP_MIB_ITEM("OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS),
+ SNMP_MIB_ITEM("InCsumErrors", IPSTATS_MIB_CSUMERRORS),
SNMP_MIB_SENTINEL
};
@@ -162,6 +163,7 @@ static const struct snmp_mib snmp4_tcp_list[] = {
SNMP_MIB_ITEM("RetransSegs", TCP_MIB_RETRANSSEGS),
SNMP_MIB_ITEM("InErrs", TCP_MIB_INERRS),
SNMP_MIB_ITEM("OutRsts", TCP_MIB_OUTRSTS),
+ SNMP_MIB_ITEM("InCsumErrors", TCP_MIB_CSUMERRORS),
SNMP_MIB_SENTINEL
};
@@ -172,6 +174,7 @@ static const struct snmp_mib snmp4_udp_list[] = {
SNMP_MIB_ITEM("OutDatagrams", UDP_MIB_OUTDATAGRAMS),
SNMP_MIB_ITEM("RcvbufErrors", UDP_MIB_RCVBUFERRORS),
SNMP_MIB_ITEM("SndbufErrors", UDP_MIB_SNDBUFERRORS),
+ SNMP_MIB_ITEM("InCsumErrors", UDP_MIB_CSUMERRORS),
SNMP_MIB_SENTINEL
};
@@ -224,6 +227,8 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPForwardRetrans", LINUX_MIB_TCPFORWARDRETRANS),
SNMP_MIB_ITEM("TCPSlowStartRetrans", LINUX_MIB_TCPSLOWSTARTRETRANS),
SNMP_MIB_ITEM("TCPTimeouts", LINUX_MIB_TCPTIMEOUTS),
+ SNMP_MIB_ITEM("TCPLossProbes", LINUX_MIB_TCPLOSSPROBES),
+ SNMP_MIB_ITEM("TCPLossProbeRecovery", LINUX_MIB_TCPLOSSPROBERECOVERY),
SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL),
SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL),
SNMP_MIB_ITEM("TCPSchedulerFailed", LINUX_MIB_TCPSCHEDULERFAILED),
@@ -267,6 +272,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL),
SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW),
SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD),
+ SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES),
SNMP_MIB_SENTINEL
};
@@ -319,15 +325,16 @@ static void icmp_put(struct seq_file *seq)
struct net *net = seq->private;
atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs;
- seq_puts(seq, "\nIcmp: InMsgs InErrors");
+ seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors");
for (i=0; icmpmibmap[i].name != NULL; i++)
seq_printf(seq, " In%s", icmpmibmap[i].name);
seq_printf(seq, " OutMsgs OutErrors");
for (i=0; icmpmibmap[i].name != NULL; i++)
seq_printf(seq, " Out%s", icmpmibmap[i].name);
- seq_printf(seq, "\nIcmp: %lu %lu",
+ seq_printf(seq, "\nIcmp: %lu %lu %lu",
snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INMSGS),
- snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS));
+ snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS),
+ snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS));
for (i=0; icmpmibmap[i].name != NULL; i++)
seq_printf(seq, " %lu",
atomic_long_read(ptr + icmpmibmap[i].index));
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6e2851464f8f..d35bbf0cf404 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -737,10 +737,15 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
{
struct rtable *rt;
struct flowi4 fl4;
+ const struct iphdr *iph = (const struct iphdr *) skb->data;
+ int oif = skb->dev->ifindex;
+ u8 tos = RT_TOS(iph->tos);
+ u8 prot = iph->protocol;
+ u32 mark = skb->mark;
rt = (struct rtable *) dst;
- ip_rt_build_flow_key(&fl4, sk, skb);
+ __build_flow_key(&fl4, sk, iph, oif, tos, prot, mark, 0);
__ip_do_redirect(rt, skb, &fl4, true);
}
@@ -2311,7 +2316,7 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
+static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(in_skb->sk);
struct rtmsg *rtm;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 397e0f69435f..b05c96e7af8b 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -267,7 +267,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
struct ip_options *opt)
{
struct tcp_options_received tcp_opt;
- const u8 *hash_location;
struct inet_request_sock *ireq;
struct tcp_request_sock *treq;
struct tcp_sock *tp = tcp_sk(sk);
@@ -294,7 +293,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
- tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL);
+ tcp_parse_options(skb, &tcp_opt, 0, NULL);
if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok))
goto out;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 960fd29d9b8e..fa2f63fc453b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -28,7 +28,7 @@
static int zero;
static int one = 1;
-static int two = 2;
+static int four = 4;
static int tcp_retr1_max = 255;
static int ip_local_port_range_min[] = { 1, 1 };
static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -592,13 +592,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec
},
{
- .procname = "tcp_frto_response",
- .data = &sysctl_tcp_frto_response,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "tcp_low_latency",
.data = &sysctl_tcp_low_latency,
.maxlen = sizeof(int),
@@ -733,13 +726,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec,
},
{
- .procname = "tcp_cookie_size",
- .data = &sysctl_tcp_cookie_size,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "tcp_thin_linear_timeouts",
.data = &sysctl_tcp_thin_linear_timeouts,
.maxlen = sizeof(int),
@@ -760,7 +746,7 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
- .extra2 = &two,
+ .extra2 = &four,
},
{
.procname = "udp_mem",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e22020790709..ab450c099aa4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -409,15 +409,6 @@ void tcp_init_sock(struct sock *sk)
icsk->icsk_sync_mss = tcp_sync_mss;
- /* TCP Cookie Transactions */
- if (sysctl_tcp_cookie_size > 0) {
- /* Default, cookies without s_data_payload. */
- tp->cookie_values =
- kzalloc(sizeof(*tp->cookie_values),
- sk->sk_allocation);
- if (tp->cookie_values != NULL)
- kref_init(&tp->cookie_values->kref);
- }
/* Presumed zeroed, in order of appearance:
* cookie_in_always, cookie_out_never,
* s_data_constant, s_data_in, s_data_out
@@ -2397,92 +2388,6 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
release_sock(sk);
return err;
}
- case TCP_COOKIE_TRANSACTIONS: {
- struct tcp_cookie_transactions ctd;
- struct tcp_cookie_values *cvp = NULL;
-
- if (sizeof(ctd) > optlen)
- return -EINVAL;
- if (copy_from_user(&ctd, optval, sizeof(ctd)))
- return -EFAULT;
-
- if (ctd.tcpct_used > sizeof(ctd.tcpct_value) ||
- ctd.tcpct_s_data_desired > TCP_MSS_DESIRED)
- return -EINVAL;
-
- if (ctd.tcpct_cookie_desired == 0) {
- /* default to global value */
- } else if ((0x1 & ctd.tcpct_cookie_desired) ||
- ctd.tcpct_cookie_desired > TCP_COOKIE_MAX ||
- ctd.tcpct_cookie_desired < TCP_COOKIE_MIN) {
- return -EINVAL;
- }
-
- if (TCP_COOKIE_OUT_NEVER & ctd.tcpct_flags) {
- /* Supercedes all other values */
- lock_sock(sk);
- if (tp->cookie_values != NULL) {
- kref_put(&tp->cookie_values->kref,
- tcp_cookie_values_release);
- tp->cookie_values = NULL;
- }
- tp->rx_opt.cookie_in_always = 0; /* false */
- tp->rx_opt.cookie_out_never = 1; /* true */
- release_sock(sk);
- return err;
- }
-
- /* Allocate ancillary memory before locking.
- */
- if (ctd.tcpct_used > 0 ||
- (tp->cookie_values == NULL &&
- (sysctl_tcp_cookie_size > 0 ||
- ctd.tcpct_cookie_desired > 0 ||
- ctd.tcpct_s_data_desired > 0))) {
- cvp = kzalloc(sizeof(*cvp) + ctd.tcpct_used,
- GFP_KERNEL);
- if (cvp == NULL)
- return -ENOMEM;
-
- kref_init(&cvp->kref);
- }
- lock_sock(sk);
- tp->rx_opt.cookie_in_always =
- (TCP_COOKIE_IN_ALWAYS & ctd.tcpct_flags);
- tp->rx_opt.cookie_out_never = 0; /* false */
-
- if (tp->cookie_values != NULL) {
- if (cvp != NULL) {
- /* Changed values are recorded by a changed
- * pointer, ensuring the cookie will differ,
- * without separately hashing each value later.
- */
- kref_put(&tp->cookie_values->kref,
- tcp_cookie_values_release);
- } else {
- cvp = tp->cookie_values;
- }
- }
-
- if (cvp != NULL) {
- cvp->cookie_desired = ctd.tcpct_cookie_desired;
-
- if (ctd.tcpct_used > 0) {
- memcpy(cvp->s_data_payload, ctd.tcpct_value,
- ctd.tcpct_used);
- cvp->s_data_desired = ctd.tcpct_used;
- cvp->s_data_constant = 1; /* true */
- } else {
- /* No constant payload data. */
- cvp->s_data_desired = ctd.tcpct_s_data_desired;
- cvp->s_data_constant = 0; /* false */
- }
-
- tp->cookie_values = cvp;
- }
- release_sock(sk);
- return err;
- }
default:
/* fallthru */
break;
@@ -2902,41 +2807,6 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
return -EFAULT;
return 0;
- case TCP_COOKIE_TRANSACTIONS: {
- struct tcp_cookie_transactions ctd;
- struct tcp_cookie_values *cvp = tp->cookie_values;
-
- if (get_user(len, optlen))
- return -EFAULT;
- if (len < sizeof(ctd))
- return -EINVAL;
-
- memset(&ctd, 0, sizeof(ctd));
- ctd.tcpct_flags = (tp->rx_opt.cookie_in_always ?
- TCP_COOKIE_IN_ALWAYS : 0)
- | (tp->rx_opt.cookie_out_never ?
- TCP_COOKIE_OUT_NEVER : 0);
-
- if (cvp != NULL) {
- ctd.tcpct_flags |= (cvp->s_data_in ?
- TCP_S_DATA_IN : 0)
- | (cvp->s_data_out ?
- TCP_S_DATA_OUT : 0);
-
- ctd.tcpct_cookie_desired = cvp->cookie_desired;
- ctd.tcpct_s_data_desired = cvp->s_data_desired;
-
- memcpy(&ctd.tcpct_value[0], &cvp->cookie_pair[0],
- cvp->cookie_pair_size);
- ctd.tcpct_used = cvp->cookie_pair_size;
- }
-
- if (put_user(sizeof(ctd), optlen))
- return -EFAULT;
- if (copy_to_user(optval, &ctd, sizeof(ctd)))
- return -EFAULT;
- return 0;
- }
case TCP_THIN_LINEAR_TIMEOUTS:
val = tp->thin_lto;
break;
@@ -3015,6 +2885,9 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
__be32 delta;
unsigned int oldlen;
unsigned int mss;
+ struct sk_buff *gso_skb = skb;
+ __sum16 newcheck;
+ bool ooo_okay, copy_destructor;
if (!pskb_may_pull(skb, sizeof(*th)))
goto out;
@@ -3044,6 +2917,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
SKB_GSO_TCP_ECN |
SKB_GSO_TCPV6 |
SKB_GSO_GRE |
+ SKB_GSO_UDP_TUNNEL |
0) ||
!(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
goto out;
@@ -3054,27 +2928,48 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
goto out;
}
+ copy_destructor = gso_skb->destructor == tcp_wfree;
+ ooo_okay = gso_skb->ooo_okay;
+ /* All segments but the first should have ooo_okay cleared */
+ skb->ooo_okay = 0;
+
segs = skb_segment(skb, features);
if (IS_ERR(segs))
goto out;
+ /* Only first segment might have ooo_okay set */
+ segs->ooo_okay = ooo_okay;
+
delta = htonl(oldlen + (thlen + mss));
skb = segs;
th = tcp_hdr(skb);
seq = ntohl(th->seq);
+ newcheck = ~csum_fold((__force __wsum)((__force u32)th->check +
+ (__force u32)delta));
+
do {
th->fin = th->psh = 0;
+ th->check = newcheck;
- th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
- (__force u32)delta));
if (skb->ip_summed != CHECKSUM_PARTIAL)
th->check =
csum_fold(csum_partial(skb_transport_header(skb),
thlen, skb->csum));
seq += mss;
+ if (copy_destructor) {
+ skb->destructor = gso_skb->destructor;
+ skb->sk = gso_skb->sk;
+ /* {tcp|sock}_wfree() use exact truesize accounting :
+ * sum(skb->truesize) MUST be exactly be gso_skb->truesize
+ * So we account mss bytes of 'true size' for each segment.
+ * The last segment will contain the remaining.
+ */
+ skb->truesize = mss;
+ gso_skb->truesize -= mss;
+ }
skb = skb->next;
th = tcp_hdr(skb);
@@ -3082,6 +2977,17 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
th->cwr = 0;
} while (skb->next);
+ /* Following permits TCP Small Queues to work well with GSO :
+ * The callback to TCP stack will be called at the time last frag
+ * is freed at TX completion, and not right now when gso_skb
+ * is freed by GSO engine
+ */
+ if (copy_destructor) {
+ swap(gso_skb->sk, skb->sk);
+ swap(gso_skb->destructor, skb->destructor);
+ swap(gso_skb->truesize, skb->truesize);
+ }
+
delta = htonl(oldlen + (skb->tail - skb->transport_header) +
skb->data_len);
th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
@@ -3383,8 +3289,11 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
for (i = 0; i < shi->nr_frags; ++i) {
const struct skb_frag_struct *f = &shi->frags[i];
- struct page *page = skb_frag_page(f);
- sg_set_page(&sg, page, skb_frag_size(f), f->page_offset);
+ unsigned int offset = f->page_offset;
+ struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT);
+
+ sg_set_page(&sg, page, skb_frag_size(f),
+ offset_in_page(offset));
if (crypto_hash_update(desc, &sg, skb_frag_size(f)))
return 1;
}
@@ -3408,134 +3317,6 @@ EXPORT_SYMBOL(tcp_md5_hash_key);
#endif
-/* Each Responder maintains up to two secret values concurrently for
- * efficient secret rollover. Each secret value has 4 states:
- *
- * Generating. (tcp_secret_generating != tcp_secret_primary)
- * Generates new Responder-Cookies, but not yet used for primary
- * verification. This is a short-term state, typically lasting only
- * one round trip time (RTT).
- *
- * Primary. (tcp_secret_generating == tcp_secret_primary)
- * Used both for generation and primary verification.
- *
- * Retiring. (tcp_secret_retiring != tcp_secret_secondary)
- * Used for verification, until the first failure that can be
- * verified by the newer Generating secret. At that time, this
- * cookie's state is changed to Secondary, and the Generating
- * cookie's state is changed to Primary. This is a short-term state,
- * typically lasting only one round trip time (RTT).
- *
- * Secondary. (tcp_secret_retiring == tcp_secret_secondary)
- * Used for secondary verification, after primary verification
- * failures. This state lasts no more than twice the Maximum Segment
- * Lifetime (2MSL). Then, the secret is discarded.
- */
-struct tcp_cookie_secret {
- /* The secret is divided into two parts. The digest part is the
- * equivalent of previously hashing a secret and saving the state,
- * and serves as an initialization vector (IV). The message part
- * serves as the trailing secret.
- */
- u32 secrets[COOKIE_WORKSPACE_WORDS];
- unsigned long expires;
-};
-
-#define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL)
-#define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2)
-#define TCP_SECRET_LIFE (HZ * 600)
-
-static struct tcp_cookie_secret tcp_secret_one;
-static struct tcp_cookie_secret tcp_secret_two;
-
-/* Essentially a circular list, without dynamic allocation. */
-static struct tcp_cookie_secret *tcp_secret_generating;
-static struct tcp_cookie_secret *tcp_secret_primary;
-static struct tcp_cookie_secret *tcp_secret_retiring;
-static struct tcp_cookie_secret *tcp_secret_secondary;
-
-static DEFINE_SPINLOCK(tcp_secret_locker);
-
-/* Select a pseudo-random word in the cookie workspace.
- */
-static inline u32 tcp_cookie_work(const u32 *ws, const int n)
-{
- return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])];
-}
-
-/* Fill bakery[COOKIE_WORKSPACE_WORDS] with generator, updating as needed.
- * Called in softirq context.
- * Returns: 0 for success.
- */
-int tcp_cookie_generator(u32 *bakery)
-{
- unsigned long jiffy = jiffies;
-
- if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) {
- spin_lock_bh(&tcp_secret_locker);
- if (!time_after_eq(jiffy, tcp_secret_generating->expires)) {
- /* refreshed by another */
- memcpy(bakery,
- &tcp_secret_generating->secrets[0],
- COOKIE_WORKSPACE_WORDS);
- } else {
- /* still needs refreshing */
- get_random_bytes(bakery, COOKIE_WORKSPACE_WORDS);
-
- /* The first time, paranoia assumes that the
- * randomization function isn't as strong. But,
- * this secret initialization is delayed until
- * the last possible moment (packet arrival).
- * Although that time is observable, it is
- * unpredictably variable. Mash in the most
- * volatile clock bits available, and expire the
- * secret extra quickly.
- */
- if (unlikely(tcp_secret_primary->expires ==
- tcp_secret_secondary->expires)) {
- struct timespec tv;
-
- getnstimeofday(&tv);
- bakery[COOKIE_DIGEST_WORDS+0] ^=
- (u32)tv.tv_nsec;
-
- tcp_secret_secondary->expires = jiffy
- + TCP_SECRET_1MSL
- + (0x0f & tcp_cookie_work(bakery, 0));
- } else {
- tcp_secret_secondary->expires = jiffy
- + TCP_SECRET_LIFE
- + (0xff & tcp_cookie_work(bakery, 1));
- tcp_secret_primary->expires = jiffy
- + TCP_SECRET_2MSL
- + (0x1f & tcp_cookie_work(bakery, 2));
- }
- memcpy(&tcp_secret_secondary->secrets[0],
- bakery, COOKIE_WORKSPACE_WORDS);
-
- rcu_assign_pointer(tcp_secret_generating,
- tcp_secret_secondary);
- rcu_assign_pointer(tcp_secret_retiring,
- tcp_secret_primary);
- /*
- * Neither call_rcu() nor synchronize_rcu() needed.
- * Retiring data is not freed. It is replaced after
- * further (locked) pointer updates, and a quiet time
- * (minimum 1MSL, maximum LIFE - 2MSL).
- */
- }
- spin_unlock_bh(&tcp_secret_locker);
- } else {
- rcu_read_lock_bh();
- memcpy(bakery,
- &rcu_dereference(tcp_secret_generating)->secrets[0],
- COOKIE_WORKSPACE_WORDS);
- rcu_read_unlock_bh();
- }
- return 0;
-}
-EXPORT_SYMBOL(tcp_cookie_generator);
-
void tcp_done(struct sock *sk)
{
struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
@@ -3590,7 +3371,6 @@ void __init tcp_init(void)
unsigned long limit;
int max_rshare, max_wshare, cnt;
unsigned int i;
- unsigned long jiffy = jiffies;
BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
@@ -3666,13 +3446,5 @@ void __init tcp_init(void)
tcp_register_congestion_control(&tcp_reno);
- memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets));
- memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets));
- tcp_secret_one.expires = jiffy; /* past due */
- tcp_secret_two.expires = jiffy; /* past due */
- tcp_secret_generating = &tcp_secret_one;
- tcp_secret_primary = &tcp_secret_one;
- tcp_secret_retiring = &tcp_secret_two;
- tcp_secret_secondary = &tcp_secret_two;
tcp_tasklet_init();
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 13b9c08fc158..9c6225780bd5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -93,12 +93,11 @@ int sysctl_tcp_stdurg __read_mostly;
int sysctl_tcp_rfc1337 __read_mostly;
int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
int sysctl_tcp_frto __read_mostly = 2;
-int sysctl_tcp_frto_response __read_mostly;
int sysctl_tcp_thin_dupack __read_mostly;
int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
-int sysctl_tcp_early_retrans __read_mostly = 2;
+int sysctl_tcp_early_retrans __read_mostly = 3;
#define FLAG_DATA 0x01 /* Incoming frame contained data. */
#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
@@ -108,10 +107,9 @@ int sysctl_tcp_early_retrans __read_mostly = 2;
#define FLAG_DATA_SACKED 0x20 /* New SACK. */
#define FLAG_ECE 0x40 /* ECE in this ACK */
#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/
-#define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */
+#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */
#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */
-#define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */
#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */
#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */
@@ -119,7 +117,6 @@ int sysctl_tcp_early_retrans __read_mostly = 2;
#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
-#define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED)
#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
@@ -1160,10 +1157,8 @@ static u8 tcp_sacktag_one(struct sock *sk,
tcp_highest_sack_seq(tp)))
state->reord = min(fack_count,
state->reord);
-
- /* SACK enhanced F-RTO (RFC4138; Appendix B) */
- if (!after(end_seq, tp->frto_highmark))
- state->flag |= FLAG_ONLY_ORIG_SACKED;
+ if (!after(end_seq, tp->high_seq))
+ state->flag |= FLAG_ORIG_SACK_ACKED;
}
if (sacked & TCPCB_LOST) {
@@ -1556,7 +1551,6 @@ static int
tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
u32 prior_snd_una)
{
- const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
const unsigned char *ptr = (skb_transport_header(ack_skb) +
TCP_SKB_CB(ack_skb)->sacked);
@@ -1729,12 +1723,6 @@ walk:
start_seq, end_seq, dup_sack);
advance_sp:
- /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct
- * due to in-order walk
- */
- if (after(end_seq, tp->frto_highmark))
- state.flag &= ~FLAG_ONLY_ORIG_SACKED;
-
i++;
}
@@ -1751,8 +1739,7 @@ advance_sp:
tcp_verify_left_out(tp);
if ((state.reord < tp->fackets_out) &&
- ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) &&
- (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
+ ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
out:
@@ -1826,197 +1813,6 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
tp->sacked_out = 0;
}
-static int tcp_is_sackfrto(const struct tcp_sock *tp)
-{
- return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp);
-}
-
-/* F-RTO can only be used if TCP has never retransmitted anything other than
- * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here)
- */
-bool tcp_use_frto(struct sock *sk)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
- const struct inet_connection_sock *icsk = inet_csk(sk);
- struct sk_buff *skb;
-
- if (!sysctl_tcp_frto)
- return false;
-
- /* MTU probe and F-RTO won't really play nicely along currently */
- if (icsk->icsk_mtup.probe_size)
- return false;
-
- if (tcp_is_sackfrto(tp))
- return true;
-
- /* Avoid expensive walking of rexmit queue if possible */
- if (tp->retrans_out > 1)
- return false;
-
- skb = tcp_write_queue_head(sk);
- if (tcp_skb_is_last(sk, skb))
- return true;
- skb = tcp_write_queue_next(sk, skb); /* Skips head */
- tcp_for_write_queue_from(skb, sk) {
- if (skb == tcp_send_head(sk))
- break;
- if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
- return false;
- /* Short-circuit when first non-SACKed skb has been checked */
- if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
- break;
- }
- return true;
-}
-
-/* RTO occurred, but do not yet enter Loss state. Instead, defer RTO
- * recovery a bit and use heuristics in tcp_process_frto() to detect if
- * the RTO was spurious. Only clear SACKED_RETRANS of the head here to
- * keep retrans_out counting accurate (with SACK F-RTO, other than head
- * may still have that bit set); TCPCB_LOST and remaining SACKED_RETRANS
- * bits are handled if the Loss state is really to be entered (in
- * tcp_enter_frto_loss).
- *
- * Do like tcp_enter_loss() would; when RTO expires the second time it
- * does:
- * "Reduce ssthresh if it has not yet been made inside this window."
- */
-void tcp_enter_frto(struct sock *sk)
-{
- const struct inet_connection_sock *icsk = inet_csk(sk);
- struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *skb;
-
- if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) ||
- tp->snd_una == tp->high_seq ||
- ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) &&
- !icsk->icsk_retransmits)) {
- tp->prior_ssthresh = tcp_current_ssthresh(sk);
- /* Our state is too optimistic in ssthresh() call because cwnd
- * is not reduced until tcp_enter_frto_loss() when previous F-RTO
- * recovery has not yet completed. Pattern would be this: RTO,
- * Cumulative ACK, RTO (2xRTO for the same segment does not end
- * up here twice).
- * RFC4138 should be more specific on what to do, even though
- * RTO is quite unlikely to occur after the first Cumulative ACK
- * due to back-off and complexity of triggering events ...
- */
- if (tp->frto_counter) {
- u32 stored_cwnd;
- stored_cwnd = tp->snd_cwnd;
- tp->snd_cwnd = 2;
- tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
- tp->snd_cwnd = stored_cwnd;
- } else {
- tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
- }
- /* ... in theory, cong.control module could do "any tricks" in
- * ssthresh(), which means that ca_state, lost bits and lost_out
- * counter would have to be faked before the call occurs. We
- * consider that too expensive, unlikely and hacky, so modules
- * using these in ssthresh() must deal these incompatibility
- * issues if they receives CA_EVENT_FRTO and frto_counter != 0
- */
- tcp_ca_event(sk, CA_EVENT_FRTO);
- }
-
- tp->undo_marker = tp->snd_una;
- tp->undo_retrans = 0;
-
- skb = tcp_write_queue_head(sk);
- if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
- tp->undo_marker = 0;
- if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
- TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
- tp->retrans_out -= tcp_skb_pcount(skb);
- }
- tcp_verify_left_out(tp);
-
- /* Too bad if TCP was application limited */
- tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
-
- /* Earlier loss recovery underway (see RFC4138; Appendix B).
- * The last condition is necessary at least in tp->frto_counter case.
- */
- if (tcp_is_sackfrto(tp) && (tp->frto_counter ||
- ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&
- after(tp->high_seq, tp->snd_una)) {
- tp->frto_highmark = tp->high_seq;
- } else {
- tp->frto_highmark = tp->snd_nxt;
- }
- tcp_set_ca_state(sk, TCP_CA_Disorder);
- tp->high_seq = tp->snd_nxt;
- tp->frto_counter = 1;
-}
-
-/* Enter Loss state after F-RTO was applied. Dupack arrived after RTO,
- * which indicates that we should follow the traditional RTO recovery,
- * i.e. mark everything lost and do go-back-N retransmission.
- */
-static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
-{
- struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *skb;
-
- tp->lost_out = 0;
- tp->retrans_out = 0;
- if (tcp_is_reno(tp))
- tcp_reset_reno_sack(tp);
-
- tcp_for_write_queue(skb, sk) {
- if (skb == tcp_send_head(sk))
- break;
-
- TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
- /*
- * Count the retransmission made on RTO correctly (only when
- * waiting for the first ACK and did not get it)...
- */
- if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) {
- /* For some reason this R-bit might get cleared? */
- if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
- tp->retrans_out += tcp_skb_pcount(skb);
- /* ...enter this if branch just for the first segment */
- flag |= FLAG_DATA_ACKED;
- } else {
- if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
- tp->undo_marker = 0;
- TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
- }
-
- /* Marking forward transmissions that were made after RTO lost
- * can cause unnecessary retransmissions in some scenarios,
- * SACK blocks will mitigate that in some but not in all cases.
- * We used to not mark them but it was causing break-ups with
- * receivers that do only in-order receival.
- *
- * TODO: we could detect presence of such receiver and select
- * different behavior per flow.
- */
- if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
- TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
- tp->lost_out += tcp_skb_pcount(skb);
- tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
- }
- }
- tcp_verify_left_out(tp);
-
- tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;
- tp->snd_cwnd_cnt = 0;
- tp->snd_cwnd_stamp = tcp_time_stamp;
- tp->frto_counter = 0;
-
- tp->reordering = min_t(unsigned int, tp->reordering,
- sysctl_tcp_reordering);
- tcp_set_ca_state(sk, TCP_CA_Loss);
- tp->high_seq = tp->snd_nxt;
- TCP_ECN_queue_cwr(tp);
-
- tcp_clear_all_retrans_hints(tp);
-}
-
static void tcp_clear_retrans_partial(struct tcp_sock *tp)
{
tp->retrans_out = 0;
@@ -2043,10 +1839,13 @@ void tcp_enter_loss(struct sock *sk, int how)
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
+ bool new_recovery = false;
/* Reduce ssthresh if it has not yet been made inside this window. */
- if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq ||
+ if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
+ !after(tp->high_seq, tp->snd_una) ||
(icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
+ new_recovery = true;
tp->prior_ssthresh = tcp_current_ssthresh(sk);
tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
tcp_ca_event(sk, CA_EVENT_LOSS);
@@ -2088,8 +1887,14 @@ void tcp_enter_loss(struct sock *sk, int how)
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->snd_nxt;
TCP_ECN_queue_cwr(tp);
- /* Abort F-RTO algorithm if one is in progress */
- tp->frto_counter = 0;
+
+ /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
+ * loss recovery is underway except recurring timeout(s) on
+ * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
+ */
+ tp->frto = sysctl_tcp_frto &&
+ (new_recovery || icsk->icsk_retransmits) &&
+ !inet_csk(sk)->icsk_mtup.probe_size;
}
/* If ACK arrived pointing to a remembered SACK, it means that our
@@ -2148,15 +1953,16 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
* max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples
* available, or RTO is scheduled to fire first.
*/
- if (sysctl_tcp_early_retrans < 2 || (flag & FLAG_ECE) || !tp->srtt)
+ if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 ||
+ (flag & FLAG_ECE) || !tp->srtt)
return false;
delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2));
if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
return false;
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, TCP_RTO_MAX);
- tp->early_retrans_delayed = 1;
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay,
+ TCP_RTO_MAX);
return true;
}
@@ -2272,10 +2078,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
struct tcp_sock *tp = tcp_sk(sk);
__u32 packets_out;
- /* Do not perform any recovery during F-RTO algorithm */
- if (tp->frto_counter)
- return false;
-
/* Trick#1: The loss is proven. */
if (tp->lost_out)
return true;
@@ -2319,7 +2121,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
* interval if appropriate.
*/
if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out &&
- (tp->packets_out == (tp->sacked_out + 1) && tp->packets_out < 4) &&
+ (tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) &&
!tcp_may_send_now(sk))
return !tcp_pause_early_retransmit(sk, flag);
@@ -2636,12 +2438,12 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)
return failed;
}
-/* Undo during loss recovery after partial ACK. */
-static bool tcp_try_undo_loss(struct sock *sk)
+/* Undo during loss recovery after partial ACK or using F-RTO. */
+static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (tcp_may_undo(tp)) {
+ if (frto_undo || tcp_may_undo(tp)) {
struct sk_buff *skb;
tcp_for_write_queue(skb, sk) {
if (skb == tcp_send_head(sk))
@@ -2655,9 +2457,12 @@ static bool tcp_try_undo_loss(struct sock *sk)
tp->lost_out = 0;
tcp_undo_cwr(sk, true);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
+ if (frto_undo)
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPSPURIOUSRTOS);
inet_csk(sk)->icsk_retransmits = 0;
tp->undo_marker = 0;
- if (tcp_is_sack(tp))
+ if (frto_undo || tcp_is_sack(tp))
tcp_set_ca_state(sk, TCP_CA_Open);
return true;
}
@@ -2679,6 +2484,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
struct tcp_sock *tp = tcp_sk(sk);
tp->high_seq = tp->snd_nxt;
+ tp->tlp_high_seq = 0;
tp->snd_cwnd_cnt = 0;
tp->prior_cwnd = tp->snd_cwnd;
tp->prr_delivered = 0;
@@ -2756,7 +2562,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked)
tcp_verify_left_out(tp);
- if (!tp->frto_counter && !tcp_any_retrans_done(sk))
+ if (!tcp_any_retrans_done(sk))
tp->retrans_stamp = 0;
if (flag & FLAG_ECE)
@@ -2873,6 +2679,58 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
tcp_set_ca_state(sk, TCP_CA_Recovery);
}
+/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are
+ * recovered or spurious. Otherwise retransmits more on partial ACKs.
+ */
+static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+ bool recovered = !before(tp->snd_una, tp->high_seq);
+
+ if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
+ if (flag & FLAG_ORIG_SACK_ACKED) {
+ /* Step 3.b. A timeout is spurious if not all data are
+ * lost, i.e., never-retransmitted data are (s)acked.
+ */
+ tcp_try_undo_loss(sk, true);
+ return;
+ }
+ if (after(tp->snd_nxt, tp->high_seq) &&
+ (flag & FLAG_DATA_SACKED || is_dupack)) {
+ tp->frto = 0; /* Loss was real: 2nd part of step 3.a */
+ } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
+ tp->high_seq = tp->snd_nxt;
+ __tcp_push_pending_frames(sk, tcp_current_mss(sk),
+ TCP_NAGLE_OFF);
+ if (after(tp->snd_nxt, tp->high_seq))
+ return; /* Step 2.b */
+ tp->frto = 0;
+ }
+ }
+
+ if (recovered) {
+ /* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */
+ icsk->icsk_retransmits = 0;
+ tcp_try_undo_recovery(sk);
+ return;
+ }
+ if (flag & FLAG_DATA_ACKED)
+ icsk->icsk_retransmits = 0;
+ if (tcp_is_reno(tp)) {
+ /* A Reno DUPACK means new data in F-RTO step 2.b above are
+ * delivered. Lower inflight to clock out (re)tranmissions.
+ */
+ if (after(tp->snd_nxt, tp->high_seq) && is_dupack)
+ tcp_add_reno_sack(sk);
+ else if (flag & FLAG_SND_UNA_ADVANCED)
+ tcp_reset_reno_sack(tp);
+ }
+ if (tcp_try_undo_loss(sk, false))
+ return;
+ tcp_xmit_retransmit_queue(sk);
+}
+
/* Process an event, which can update packets-in-flight not trivially.
* Main goal of this function is to calculate new estimate for left_out,
* taking into account both packets sitting in receiver's buffer and
@@ -2885,8 +2743,8 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
* tcp_xmit_retransmit_queue().
*/
static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
- int prior_sacked, bool is_dupack,
- int flag)
+ int prior_sacked, int prior_packets,
+ bool is_dupack, int flag)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
@@ -2919,12 +2777,6 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
tp->retrans_stamp = 0;
} else if (!before(tp->snd_una, tp->high_seq)) {
switch (icsk->icsk_ca_state) {
- case TCP_CA_Loss:
- icsk->icsk_retransmits = 0;
- if (tcp_try_undo_recovery(sk))
- return;
- break;
-
case TCP_CA_CWR:
/* CWR is to be held something *above* high_seq
* is ACKed for CWR bit to reach receiver. */
@@ -2952,21 +2804,14 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
tcp_add_reno_sack(sk);
} else
do_lost = tcp_try_undo_partial(sk, pkts_acked);
- newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
+ newly_acked_sacked = prior_packets - tp->packets_out +
+ tp->sacked_out - prior_sacked;
break;
case TCP_CA_Loss:
- if (flag & FLAG_DATA_ACKED)
- icsk->icsk_retransmits = 0;
- if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED)
- tcp_reset_reno_sack(tp);
- if (!tcp_try_undo_loss(sk)) {
- tcp_moderate_cwnd(tp);
- tcp_xmit_retransmit_queue(sk);
- return;
- }
+ tcp_process_loss(sk, flag, is_dupack);
if (icsk->icsk_ca_state != TCP_CA_Open)
return;
- /* Loss is undone; fall through to processing in Open state. */
+ /* Fall through to processing in Open state. */
default:
if (tcp_is_reno(tp)) {
if (flag & FLAG_SND_UNA_ADVANCED)
@@ -2974,7 +2819,8 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
if (is_dupack)
tcp_add_reno_sack(sk);
}
- newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
+ newly_acked_sacked = prior_packets - tp->packets_out +
+ tp->sacked_out - prior_sacked;
if (icsk->icsk_ca_state <= TCP_CA_Disorder)
tcp_try_undo_dsack(sk);
@@ -3079,6 +2925,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
*/
void tcp_rearm_rto(struct sock *sk)
{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
/* If the retrans timer is currently being used by Fast Open
@@ -3092,12 +2939,13 @@ void tcp_rearm_rto(struct sock *sk)
} else {
u32 rto = inet_csk(sk)->icsk_rto;
/* Offset the time elapsed after installing regular RTO */
- if (tp->early_retrans_delayed) {
+ if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
+ icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
struct sk_buff *skb = tcp_write_queue_head(sk);
const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto;
s32 delta = (s32)(rto_time_stamp - tcp_time_stamp);
/* delta may not be positive if the socket is locked
- * when the delayed ER timer fires and is rescheduled.
+ * when the retrans timer fires and is rescheduled.
*/
if (delta > 0)
rto = delta;
@@ -3105,7 +2953,6 @@ void tcp_rearm_rto(struct sock *sk)
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
TCP_RTO_MAX);
}
- tp->early_retrans_delayed = 0;
}
/* This function is called when the delayed ER timer fires. TCP enters
@@ -3193,8 +3040,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
flag |= FLAG_RETRANS_DATA_ACKED;
ca_seq_rtt = -1;
seq_rtt = -1;
- if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1))
- flag |= FLAG_NONHEAD_RETRANS_ACKED;
} else {
ca_seq_rtt = now - scb->when;
last_ackt = skb->tstamp;
@@ -3203,6 +3048,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
}
if (!(sacked & TCPCB_SACKED_ACKED))
reord = min(pkts_acked, reord);
+ if (!after(scb->end_seq, tp->high_seq))
+ flag |= FLAG_ORIG_SACK_ACKED;
}
if (sacked & TCPCB_SACKED_ACKED)
@@ -3403,150 +3250,6 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
return flag;
}
-/* A very conservative spurious RTO response algorithm: reduce cwnd and
- * continue in congestion avoidance.
- */
-static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
-{
- tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
- tp->snd_cwnd_cnt = 0;
- TCP_ECN_queue_cwr(tp);
- tcp_moderate_cwnd(tp);
-}
-
-/* A conservative spurious RTO response algorithm: reduce cwnd using
- * PRR and continue in congestion avoidance.
- */
-static void tcp_cwr_spur_to_response(struct sock *sk)
-{
- tcp_enter_cwr(sk, 0);
-}
-
-static void tcp_undo_spur_to_response(struct sock *sk, int flag)
-{
- if (flag & FLAG_ECE)
- tcp_cwr_spur_to_response(sk);
- else
- tcp_undo_cwr(sk, true);
-}
-
-/* F-RTO spurious RTO detection algorithm (RFC4138)
- *
- * F-RTO affects during two new ACKs following RTO (well, almost, see inline
- * comments). State (ACK number) is kept in frto_counter. When ACK advances
- * window (but not to or beyond highest sequence sent before RTO):
- * On First ACK, send two new segments out.
- * On Second ACK, RTO was likely spurious. Do spurious response (response
- * algorithm is not part of the F-RTO detection algorithm
- * given in RFC4138 but can be selected separately).
- * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss
- * and TCP falls back to conventional RTO recovery. F-RTO allows overriding
- * of Nagle, this is done using frto_counter states 2 and 3, when a new data
- * segment of any size sent during F-RTO, state 2 is upgraded to 3.
- *
- * Rationale: if the RTO was spurious, new ACKs should arrive from the
- * original window even after we transmit two new data segments.
- *
- * SACK version:
- * on first step, wait until first cumulative ACK arrives, then move to
- * the second step. In second step, the next ACK decides.
- *
- * F-RTO is implemented (mainly) in four functions:
- * - tcp_use_frto() is used to determine if TCP is can use F-RTO
- * - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is
- * called when tcp_use_frto() showed green light
- * - tcp_process_frto() handles incoming ACKs during F-RTO algorithm
- * - tcp_enter_frto_loss() is called if there is not enough evidence
- * to prove that the RTO is indeed spurious. It transfers the control
- * from F-RTO to the conventional RTO recovery
- */
-static bool tcp_process_frto(struct sock *sk, int flag)
-{
- struct tcp_sock *tp = tcp_sk(sk);
-
- tcp_verify_left_out(tp);
-
- /* Duplicate the behavior from Loss state (fastretrans_alert) */
- if (flag & FLAG_DATA_ACKED)
- inet_csk(sk)->icsk_retransmits = 0;
-
- if ((flag & FLAG_NONHEAD_RETRANS_ACKED) ||
- ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED)))
- tp->undo_marker = 0;
-
- if (!before(tp->snd_una, tp->frto_highmark)) {
- tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
- return true;
- }
-
- if (!tcp_is_sackfrto(tp)) {
- /* RFC4138 shortcoming in step 2; should also have case c):
- * ACK isn't duplicate nor advances window, e.g., opposite dir
- * data, winupdate
- */
- if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP))
- return true;
-
- if (!(flag & FLAG_DATA_ACKED)) {
- tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
- flag);
- return true;
- }
- } else {
- if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
- if (!tcp_packets_in_flight(tp)) {
- tcp_enter_frto_loss(sk, 2, flag);
- return true;
- }
-
- /* Prevent sending of new data. */
- tp->snd_cwnd = min(tp->snd_cwnd,
- tcp_packets_in_flight(tp));
- return true;
- }
-
- if ((tp->frto_counter >= 2) &&
- (!(flag & FLAG_FORWARD_PROGRESS) ||
- ((flag & FLAG_DATA_SACKED) &&
- !(flag & FLAG_ONLY_ORIG_SACKED)))) {
- /* RFC4138 shortcoming (see comment above) */
- if (!(flag & FLAG_FORWARD_PROGRESS) &&
- (flag & FLAG_NOT_DUP))
- return true;
-
- tcp_enter_frto_loss(sk, 3, flag);
- return true;
- }
- }
-
- if (tp->frto_counter == 1) {
- /* tcp_may_send_now needs to see updated state */
- tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
- tp->frto_counter = 2;
-
- if (!tcp_may_send_now(sk))
- tcp_enter_frto_loss(sk, 2, flag);
-
- return true;
- } else {
- switch (sysctl_tcp_frto_response) {
- case 2:
- tcp_undo_spur_to_response(sk, flag);
- break;
- case 1:
- tcp_conservative_spur_to_response(tp);
- break;
- default:
- tcp_cwr_spur_to_response(sk);
- break;
- }
- tp->frto_counter = 0;
- tp->undo_marker = 0;
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS);
- }
- return false;
-}
-
/* RFC 5961 7 [ACK Throttling] */
static void tcp_send_challenge_ack(struct sock *sk)
{
@@ -3586,6 +3289,38 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
}
}
+/* This routine deals with acks during a TLP episode.
+ * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe.
+ */
+static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ bool is_tlp_dupack = (ack == tp->tlp_high_seq) &&
+ !(flag & (FLAG_SND_UNA_ADVANCED |
+ FLAG_NOT_DUP | FLAG_DATA_SACKED));
+
+ /* Mark the end of TLP episode on receiving TLP dupack or when
+ * ack is after tlp_high_seq.
+ */
+ if (is_tlp_dupack) {
+ tp->tlp_high_seq = 0;
+ return;
+ }
+
+ if (after(ack, tp->tlp_high_seq)) {
+ tp->tlp_high_seq = 0;
+ /* Don't reduce cwnd if DSACK arrives for TLP retrans. */
+ if (!(flag & FLAG_DSACKING_ACK)) {
+ tcp_init_cwnd_reduction(sk, true);
+ tcp_set_ca_state(sk, TCP_CA_CWR);
+ tcp_end_cwnd_reduction(sk);
+ tcp_set_ca_state(sk, TCP_CA_Open);
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPLOSSPROBERECOVERY);
+ }
+ }
+}
+
/* This routine deals with incoming acks, but not outgoing ones. */
static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
{
@@ -3597,10 +3332,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
bool is_dupack = false;
u32 prior_in_flight;
u32 prior_fackets;
- int prior_packets;
+ int prior_packets = tp->packets_out;
int prior_sacked = tp->sacked_out;
int pkts_acked = 0;
- bool frto_cwnd = false;
+ int previous_packets_out = 0;
/* If the ack is older than previous acks
* then we can probably ignore it.
@@ -3620,7 +3355,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (after(ack, tp->snd_nxt))
goto invalid_ack;
- if (tp->early_retrans_delayed)
+ if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
+ icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
tcp_rearm_rto(sk);
if (after(ack, prior_snd_una))
@@ -3670,52 +3406,54 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
sk->sk_err_soft = 0;
icsk->icsk_probes_out = 0;
tp->rcv_tstamp = tcp_time_stamp;
- prior_packets = tp->packets_out;
if (!prior_packets)
goto no_queue;
/* See if we can take anything off of the retransmit queue. */
+ previous_packets_out = tp->packets_out;
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
- pkts_acked = prior_packets - tp->packets_out;
-
- if (tp->frto_counter)
- frto_cwnd = tcp_process_frto(sk, flag);
- /* Guarantee sacktag reordering detection against wrap-arounds */
- if (before(tp->frto_highmark, tp->snd_una))
- tp->frto_highmark = 0;
+ pkts_acked = previous_packets_out - tp->packets_out;
if (tcp_ack_is_dubious(sk, flag)) {
/* Advance CWND, if state allows this. */
- if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
- tcp_may_raise_cwnd(sk, flag))
+ if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
tcp_cong_avoid(sk, ack, prior_in_flight);
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
- is_dupack, flag);
+ prior_packets, is_dupack, flag);
} else {
- if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
+ if (flag & FLAG_DATA_ACKED)
tcp_cong_avoid(sk, ack, prior_in_flight);
}
+ if (tp->tlp_high_seq)
+ tcp_process_tlp_ack(sk, ack, flag);
+
if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
struct dst_entry *dst = __sk_dst_get(sk);
if (dst)
dst_confirm(dst);
}
+
+ if (icsk->icsk_pending == ICSK_TIME_RETRANS)
+ tcp_schedule_loss_probe(sk);
return 1;
no_queue:
/* If data was DSACKed, see if we can undo a cwnd reduction. */
if (flag & FLAG_DSACKING_ACK)
tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
- is_dupack, flag);
+ prior_packets, is_dupack, flag);
/* If this ack opens up a zero window, clear backoff. It was
* being used to time the probes, and is probably far higher than
* it needs to be for normal retransmission.
*/
if (tcp_send_head(sk))
tcp_ack_probe(sk);
+
+ if (tp->tlp_high_seq)
+ tcp_process_tlp_ack(sk, ack, flag);
return 1;
invalid_ack:
@@ -3729,7 +3467,7 @@ old_ack:
if (TCP_SKB_CB(skb)->sacked) {
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
- is_dupack, flag);
+ prior_packets, is_dupack, flag);
}
SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
@@ -3740,8 +3478,8 @@ old_ack:
* But, this can also be called on packets in the established flow when
* the fast version below fails.
*/
-void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx,
- const u8 **hvpp, int estab,
+void tcp_parse_options(const struct sk_buff *skb,
+ struct tcp_options_received *opt_rx, int estab,
struct tcp_fastopen_cookie *foc)
{
const unsigned char *ptr;
@@ -3825,31 +3563,6 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
*/
break;
#endif
- case TCPOPT_COOKIE:
- /* This option is variable length.
- */
- switch (opsize) {
- case TCPOLEN_COOKIE_BASE:
- /* not yet implemented */
- break;
- case TCPOLEN_COOKIE_PAIR:
- /* not yet implemented */
- break;
- case TCPOLEN_COOKIE_MIN+0:
- case TCPOLEN_COOKIE_MIN+2:
- case TCPOLEN_COOKIE_MIN+4:
- case TCPOLEN_COOKIE_MIN+6:
- case TCPOLEN_COOKIE_MAX:
- /* 16-bit multiple */
- opt_rx->cookie_plus = opsize;
- *hvpp = ptr;
- break;
- default:
- /* ignore option */
- break;
- }
- break;
-
case TCPOPT_EXP:
/* Fast Open option shares code 254 using a
* 16 bits magic number. It's valid only in
@@ -3895,8 +3608,7 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr
* If it is wrong it falls back on tcp_parse_options().
*/
static bool tcp_fast_parse_options(const struct sk_buff *skb,
- const struct tcphdr *th,
- struct tcp_sock *tp, const u8 **hvpp)
+ const struct tcphdr *th, struct tcp_sock *tp)
{
/* In the spirit of fast parsing, compare doff directly to constant
* values. Because equality is used, short doff can be ignored here.
@@ -3910,7 +3622,7 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb,
return true;
}
- tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL);
+ tcp_parse_options(skb, &tp->rx_opt, 1, NULL);
if (tp->rx_opt.saw_tstamp)
tp->rx_opt.rcv_tsecr -= tp->tsoffset;
@@ -5270,12 +4982,10 @@ out:
static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
const struct tcphdr *th, int syn_inerr)
{
- const u8 *hash_location;
struct tcp_sock *tp = tcp_sk(sk);
/* RFC1323: H1. Apply PAWS check first. */
- if (tcp_fast_parse_options(skb, th, tp, &hash_location) &&
- tp->rx_opt.saw_tstamp &&
+ if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
tcp_paws_discard(sk, skb)) {
if (!th->rst) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
@@ -5566,6 +5276,7 @@ step5:
return 0;
csum_error:
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
discard:
@@ -5624,12 +5335,11 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
if (mss == tp->rx_opt.user_mss) {
struct tcp_options_received opt;
- const u8 *hash_location;
/* Get original SYNACK MSS value if user MSS sets mss_clamp */
tcp_clear_options(&opt);
opt.user_mss = opt.mss_clamp = 0;
- tcp_parse_options(synack, &opt, &hash_location, 0, NULL);
+ tcp_parse_options(synack, &opt, 0, NULL);
mss = opt.mss_clamp;
}
@@ -5660,14 +5370,12 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
const struct tcphdr *th, unsigned int len)
{
- const u8 *hash_location;
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- struct tcp_cookie_values *cvp = tp->cookie_values;
struct tcp_fastopen_cookie foc = { .len = -1 };
int saved_clamp = tp->rx_opt.mss_clamp;
- tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, &foc);
+ tcp_parse_options(skb, &tp->rx_opt, 0, &foc);
if (tp->rx_opt.saw_tstamp)
tp->rx_opt.rcv_tsecr -= tp->tsoffset;
@@ -5764,30 +5472,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
* is initialized. */
tp->copied_seq = tp->rcv_nxt;
- if (cvp != NULL &&
- cvp->cookie_pair_size > 0 &&
- tp->rx_opt.cookie_plus > 0) {
- int cookie_size = tp->rx_opt.cookie_plus
- - TCPOLEN_COOKIE_BASE;
- int cookie_pair_size = cookie_size
- + cvp->cookie_desired;
-
- /* A cookie extension option was sent and returned.
- * Note that each incoming SYNACK replaces the
- * Responder cookie. The initial exchange is most
- * fragile, as protection against spoofing relies
- * entirely upon the sequence and timestamp (above).
- * This replacement strategy allows the correct pair to
- * pass through, while any others will be filtered via
- * Responder verification later.
- */
- if (sizeof(cvp->cookie_pair) >= cookie_pair_size) {
- memcpy(&cvp->cookie_pair[cvp->cookie_desired],
- hash_location, cookie_size);
- cvp->cookie_pair_size = cookie_pair_size;
- }
- }
-
smp_mb();
tcp_finish_connect(sk, skb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d09203c63264..7999fc55c83b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -838,7 +838,6 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
*/
static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
struct request_sock *req,
- struct request_values *rvp,
u16 queue_mapping,
bool nocache)
{
@@ -851,7 +850,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
return -1;
- skb = tcp_make_synack(sk, dst, req, rvp, NULL);
+ skb = tcp_make_synack(sk, dst, req, NULL);
if (skb) {
__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
@@ -868,10 +867,9 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
return err;
}
-static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
- struct request_values *rvp)
+static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
{
- int res = tcp_v4_send_synack(sk, NULL, req, rvp, 0, false);
+ int res = tcp_v4_send_synack(sk, NULL, req, 0, false);
if (!res)
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
@@ -1005,7 +1003,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_info *md5sig;
- key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
+ key = tcp_md5_do_lookup(sk, addr, family);
if (key) {
/* Pre-existing entry - just update that one. */
memcpy(key->key, newkey, newkeylen);
@@ -1050,7 +1048,7 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
struct tcp_md5sig_key *key;
struct tcp_md5sig_info *md5sig;
- key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
+ key = tcp_md5_do_lookup(sk, addr, family);
if (!key)
return -ENOENT;
hlist_del_rcu(&key->node);
@@ -1371,8 +1369,7 @@ static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
static int tcp_v4_conn_req_fastopen(struct sock *sk,
struct sk_buff *skb,
struct sk_buff *skb_synack,
- struct request_sock *req,
- struct request_values *rvp)
+ struct request_sock *req)
{
struct tcp_sock *tp = tcp_sk(sk);
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
@@ -1467,9 +1464,7 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk,
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
{
- struct tcp_extend_values tmp_ext;
struct tcp_options_received tmp_opt;
- const u8 *hash_location;
struct request_sock *req;
struct inet_request_sock *ireq;
struct tcp_sock *tp = tcp_sk(sk);
@@ -1519,42 +1514,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
tmp_opt.user_mss = tp->rx_opt.user_mss;
- tcp_parse_options(skb, &tmp_opt, &hash_location, 0,
- want_cookie ? NULL : &foc);
-
- if (tmp_opt.cookie_plus > 0 &&
- tmp_opt.saw_tstamp &&
- !tp->rx_opt.cookie_out_never &&
- (sysctl_tcp_cookie_size > 0 ||
- (tp->cookie_values != NULL &&
- tp->cookie_values->cookie_desired > 0))) {
- u8 *c;
- u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
- int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
-
- if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
- goto drop_and_release;
-
- /* Secret recipe starts with IP addresses */
- *mess++ ^= (__force u32)daddr;
- *mess++ ^= (__force u32)saddr;
-
- /* plus variable length Initiator Cookie */
- c = (u8 *)mess;
- while (l-- > 0)
- *c++ ^= *hash_location++;
-
- want_cookie = false; /* not our kind of cookie */
- tmp_ext.cookie_out_never = 0; /* false */
- tmp_ext.cookie_plus = tmp_opt.cookie_plus;
- } else if (!tp->rx_opt.cookie_in_always) {
- /* redundant indications, but ensure initialization. */
- tmp_ext.cookie_out_never = 1; /* true */
- tmp_ext.cookie_plus = 0;
- } else {
- goto drop_and_release;
- }
- tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
+ tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
if (want_cookie && !tmp_opt.saw_tstamp)
tcp_clear_options(&tmp_opt);
@@ -1636,7 +1596,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
* of tcp_v4_send_synack()->tcp_select_initial_window().
*/
skb_synack = tcp_make_synack(sk, dst, req,
- (struct request_values *)&tmp_ext,
fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
if (skb_synack) {
@@ -1660,8 +1619,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
if (fastopen_cookie_present(&foc) && foc.len != 0)
NET_INC_STATS_BH(sock_net(sk),
LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
- } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req,
- (struct request_values *)&tmp_ext))
+ } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req))
goto drop_and_free;
return 0;
@@ -1908,6 +1866,7 @@ discard:
return 0;
csum_err:
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
goto discard;
}
@@ -1950,6 +1909,51 @@ void tcp_v4_early_demux(struct sk_buff *skb)
}
}
+/* Packet is added to VJ-style prequeue for processing in process
+ * context, if a reader task is waiting. Apparently, this exciting
+ * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
+ * failed somewhere. Latency? Burstiness? Well, at least now we will
+ * see, why it failed. 8)8) --ANK
+ *
+ */
+bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (sysctl_tcp_low_latency || !tp->ucopy.task)
+ return false;
+
+ if (skb->len <= tcp_hdrlen(skb) &&
+ skb_queue_len(&tp->ucopy.prequeue) == 0)
+ return false;
+
+ skb_dst_force(skb);
+ __skb_queue_tail(&tp->ucopy.prequeue, skb);
+ tp->ucopy.memory += skb->truesize;
+ if (tp->ucopy.memory > sk->sk_rcvbuf) {
+ struct sk_buff *skb1;
+
+ BUG_ON(sock_owned_by_user(sk));
+
+ while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
+ sk_backlog_rcv(sk, skb1);
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPPREQUEUEDROPPED);
+ }
+
+ tp->ucopy.memory = 0;
+ } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
+ wake_up_interruptible_sync_poll(sk_sleep(sk),
+ POLLIN | POLLRDNORM | POLLRDBAND);
+ if (!inet_csk_ack_scheduled(sk))
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+ (3 * tcp_rto_min(sk)) / 4,
+ TCP_RTO_MAX);
+ }
+ return true;
+}
+EXPORT_SYMBOL(tcp_prequeue);
+
/*
* From tcp_input.c
*/
@@ -1983,7 +1987,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
* provided case of th->doff==0 is eliminated.
* So, we defer the checks. */
if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
- goto bad_packet;
+ goto csum_error;
th = tcp_hdr(skb);
iph = ip_hdr(skb);
@@ -2049,6 +2053,8 @@ no_tcp_socket:
goto discard_it;
if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
+csum_error:
+ TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
bad_packet:
TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
} else {
@@ -2070,10 +2076,13 @@ do_time_wait:
goto discard_it;
}
- if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
- TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
+ if (skb->len < (th->doff << 2)) {
inet_twsk_put(inet_twsk(sk));
- goto discard_it;
+ goto bad_packet;
+ }
+ if (tcp_checksum_complete(skb)) {
+ inet_twsk_put(inet_twsk(sk));
+ goto csum_error;
}
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
case TCP_TW_SYN: {
@@ -2197,12 +2206,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
if (inet_csk(sk)->icsk_bind_hash)
inet_put_port(sk);
- /* TCP Cookie Transactions */
- if (tp->cookie_values != NULL) {
- kref_put(&tp->cookie_values->kref,
- tcp_cookie_values_release);
- tp->cookie_values = NULL;
- }
BUG_ON(tp->fastopen_rsk != NULL);
/* If socket is aborted during connect operation */
@@ -2580,7 +2583,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
int tcp_seq_open(struct inode *inode, struct file *file)
{
- struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
+ struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
struct tcp_iter_state *s;
int err;
@@ -2659,7 +2662,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
__u16 srcp = ntohs(inet->inet_sport);
int rx_queue;
- if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
+ if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
+ icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
+ icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;
timer_expires = icsk->icsk_timeout;
} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index b6f3583ddfe8..da14436c1735 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -64,7 +64,6 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg)
{
struct cg_proto *cg_proto;
struct tcp_memcontrol *tcp;
- u64 val;
cg_proto = tcp_prot.proto_cgroup(memcg);
if (!cg_proto)
@@ -72,8 +71,6 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg)
tcp = tcp_from_cgproto(cg_proto);
percpu_counter_destroy(&tcp->tcp_sockets_allocated);
-
- val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
}
EXPORT_SYMBOL(tcp_destroy_cgroup);
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index f696d7c2e9fa..f6a005c485a9 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -96,7 +96,8 @@ struct tcpm_hash_bucket {
static DEFINE_SPINLOCK(tcp_metrics_lock);
-static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst)
+static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst,
+ bool fastopen_clear)
{
u32 val;
@@ -122,9 +123,11 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst)
tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
tm->tcpm_ts = 0;
tm->tcpm_ts_stamp = 0;
- tm->tcpm_fastopen.mss = 0;
- tm->tcpm_fastopen.syn_loss = 0;
- tm->tcpm_fastopen.cookie.len = 0;
+ if (fastopen_clear) {
+ tm->tcpm_fastopen.mss = 0;
+ tm->tcpm_fastopen.syn_loss = 0;
+ tm->tcpm_fastopen.cookie.len = 0;
+ }
}
static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
@@ -154,7 +157,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
}
tm->tcpm_addr = *addr;
- tcpm_suck_dst(tm, dst);
+ tcpm_suck_dst(tm, dst, true);
if (likely(!reclaim)) {
tm->tcpm_next = net->ipv4.tcp_metrics_hash[hash].chain;
@@ -171,7 +174,7 @@ out_unlock:
static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst)
{
if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT)))
- tcpm_suck_dst(tm, dst);
+ tcpm_suck_dst(tm, dst, false);
}
#define TCP_METRICS_RECLAIM_DEPTH 5
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b83a49cc3816..0f0178827259 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -93,13 +93,12 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
const struct tcphdr *th)
{
struct tcp_options_received tmp_opt;
- const u8 *hash_location;
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
bool paws_reject = false;
tmp_opt.saw_tstamp = 0;
if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
- tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
+ tcp_parse_options(skb, &tmp_opt, 0, NULL);
if (tmp_opt.saw_tstamp) {
tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset;
@@ -388,32 +387,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
struct tcp_request_sock *treq = tcp_rsk(req);
struct inet_connection_sock *newicsk = inet_csk(newsk);
struct tcp_sock *newtp = tcp_sk(newsk);
- struct tcp_sock *oldtp = tcp_sk(sk);
- struct tcp_cookie_values *oldcvp = oldtp->cookie_values;
-
- /* TCP Cookie Transactions require space for the cookie pair,
- * as it differs for each connection. There is no need to
- * copy any s_data_payload stored at the original socket.
- * Failure will prevent resuming the connection.
- *
- * Presumed copied, in order of appearance:
- * cookie_in_always, cookie_out_never
- */
- if (oldcvp != NULL) {
- struct tcp_cookie_values *newcvp =
- kzalloc(sizeof(*newtp->cookie_values),
- GFP_ATOMIC);
-
- if (newcvp != NULL) {
- kref_init(&newcvp->kref);
- newcvp->cookie_desired =
- oldcvp->cookie_desired;
- newtp->cookie_values = newcvp;
- } else {
- /* Not Yet Implemented */
- newtp->cookie_values = NULL;
- }
- }
/* Now setup tcp_sock */
newtp->pred_flags = 0;
@@ -422,8 +395,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->rcv_nxt = treq->rcv_isn + 1;
newtp->snd_sml = newtp->snd_una =
- newtp->snd_nxt = newtp->snd_up =
- treq->snt_isn + 1 + tcp_s_data_size(oldtp);
+ newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
tcp_prequeue_init(newtp);
INIT_LIST_HEAD(&newtp->tsq_node);
@@ -440,6 +412,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->fackets_out = 0;
newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
tcp_enable_early_retrans(newtp);
+ newtp->tlp_high_seq = 0;
/* So many TCP implementations out there (incorrectly) count the
* initial SYN frame in their delayed-ACK and congestion control
@@ -449,9 +422,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->snd_cwnd = TCP_INIT_CWND;
newtp->snd_cwnd_cnt = 0;
- newtp->frto_counter = 0;
- newtp->frto_highmark = 0;
-
if (newicsk->icsk_ca_ops != &tcp_init_congestion_ops &&
!try_module_get(newicsk->icsk_ca_ops->owner))
newicsk->icsk_ca_ops = &tcp_init_congestion_ops;
@@ -459,8 +429,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
tcp_set_ca_state(newsk, TCP_CA_Open);
tcp_init_xmit_timers(newsk);
skb_queue_head_init(&newtp->out_of_order_queue);
- newtp->write_seq = newtp->pushed_seq =
- treq->snt_isn + 1 + tcp_s_data_size(oldtp);
+ newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1;
newtp->rx_opt.saw_tstamp = 0;
@@ -537,7 +506,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
bool fastopen)
{
struct tcp_options_received tmp_opt;
- const u8 *hash_location;
struct sock *child;
const struct tcphdr *th = tcp_hdr(skb);
__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
@@ -547,7 +515,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
tmp_opt.saw_tstamp = 0;
if (th->doff > (sizeof(struct tcphdr)>>2)) {
- tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
+ tcp_parse_options(skb, &tmp_opt, 0, NULL);
if (tmp_opt.saw_tstamp) {
tmp_opt.ts_recent = req->ts_recent;
@@ -583,8 +551,13 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
*
* Note that even if there is new data in the SYN packet
* they will be thrown away too.
+ *
+ * Reset timer after retransmitting SYNACK, similar to
+ * the idea of fast retransmit in recovery.
*/
- inet_rtx_syn_ack(sk, req);
+ if (!inet_rtx_syn_ack(sk, req))
+ req->expires = min(TCP_TIMEOUT_INIT << req->num_timeout,
+ TCP_RTO_MAX) + jiffies;
return NULL;
}
@@ -647,7 +620,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
*/
if ((flg & TCP_FLAG_ACK) && !fastopen &&
(TCP_SKB_CB(skb)->ack_seq !=
- tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk))))
+ tcp_rsk(req)->snt_isn + 1))
return sk;
/* Also, it would be not so bad idea to check rcv_tsecr, which
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 509912a5ff98..ec335fabd5cc 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -65,28 +65,24 @@ int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
/* By default, RFC2861 behavior. */
int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
-int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */
-EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size);
-
static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
int push_one, gfp_t gfp);
/* Account for new data that has been sent to the network. */
static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
{
+ struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
unsigned int prior_packets = tp->packets_out;
tcp_advance_send_head(sk, skb);
tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
- /* Don't override Nagle indefinitely with F-RTO */
- if (tp->frto_counter == 2)
- tp->frto_counter = 3;
-
tp->packets_out += tcp_skb_pcount(skb);
- if (!prior_packets || tp->early_retrans_delayed)
+ if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
+ icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
tcp_rearm_rto(sk);
+ }
}
/* SND.NXT, if window was not shrunk.
@@ -384,7 +380,6 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
#define OPTION_TS (1 << 1)
#define OPTION_MD5 (1 << 2)
#define OPTION_WSCALE (1 << 3)
-#define OPTION_COOKIE_EXTENSION (1 << 4)
#define OPTION_FAST_OPEN_COOKIE (1 << 8)
struct tcp_out_options {
@@ -398,36 +393,6 @@ struct tcp_out_options {
struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */
};
-/* The sysctl int routines are generic, so check consistency here.
- */
-static u8 tcp_cookie_size_check(u8 desired)
-{
- int cookie_size;
-
- if (desired > 0)
- /* previously specified */
- return desired;
-
- cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size);
- if (cookie_size <= 0)
- /* no default specified */
- return 0;
-
- if (cookie_size <= TCP_COOKIE_MIN)
- /* value too small, specify minimum */
- return TCP_COOKIE_MIN;
-
- if (cookie_size >= TCP_COOKIE_MAX)
- /* value too large, specify maximum */
- return TCP_COOKIE_MAX;
-
- if (cookie_size & 1)
- /* 8-bit multiple, illegal, fix it */
- cookie_size++;
-
- return (u8)cookie_size;
-}
-
/* Write previously computed TCP options to the packet.
*
* Beware: Something in the Internet is very sensitive to the ordering of
@@ -446,27 +411,9 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
{
u16 options = opts->options; /* mungable copy */
- /* Having both authentication and cookies for security is redundant,
- * and there's certainly not enough room. Instead, the cookie-less
- * extension variant is proposed.
- *
- * Consider the pessimal case with authentication. The options
- * could look like:
- * COOKIE|MD5(20) + MSS(4) + SACK|TS(12) + WSCALE(4) == 40
- */
if (unlikely(OPTION_MD5 & options)) {
- if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
- *ptr++ = htonl((TCPOPT_COOKIE << 24) |
- (TCPOLEN_COOKIE_BASE << 16) |
- (TCPOPT_MD5SIG << 8) |
- TCPOLEN_MD5SIG);
- } else {
- *ptr++ = htonl((TCPOPT_NOP << 24) |
- (TCPOPT_NOP << 16) |
- (TCPOPT_MD5SIG << 8) |
- TCPOLEN_MD5SIG);
- }
- options &= ~OPTION_COOKIE_EXTENSION;
+ *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+ (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
/* overload cookie hash location */
opts->hash_location = (__u8 *)ptr;
ptr += 4;
@@ -495,44 +442,6 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
*ptr++ = htonl(opts->tsecr);
}
- /* Specification requires after timestamp, so do it now.
- *
- * Consider the pessimal case without authentication. The options
- * could look like:
- * MSS(4) + SACK|TS(12) + COOKIE(20) + WSCALE(4) == 40
- */
- if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
- __u8 *cookie_copy = opts->hash_location;
- u8 cookie_size = opts->hash_size;
-
- /* 8-bit multiple handled in tcp_cookie_size_check() above,
- * and elsewhere.
- */
- if (0x2 & cookie_size) {
- __u8 *p = (__u8 *)ptr;
-
- /* 16-bit multiple */
- *p++ = TCPOPT_COOKIE;
- *p++ = TCPOLEN_COOKIE_BASE + cookie_size;
- *p++ = *cookie_copy++;
- *p++ = *cookie_copy++;
- ptr++;
- cookie_size -= 2;
- } else {
- /* 32-bit multiple */
- *ptr++ = htonl(((TCPOPT_NOP << 24) |
- (TCPOPT_NOP << 16) |
- (TCPOPT_COOKIE << 8) |
- TCPOLEN_COOKIE_BASE) +
- cookie_size);
- }
-
- if (cookie_size > 0) {
- memcpy(ptr, cookie_copy, cookie_size);
- ptr += (cookie_size / 4);
- }
- }
-
if (unlikely(OPTION_SACK_ADVERTISE & options)) {
*ptr++ = htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) |
@@ -591,11 +500,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
struct tcp_md5sig_key **md5)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct tcp_cookie_values *cvp = tp->cookie_values;
unsigned int remaining = MAX_TCP_OPTION_SPACE;
- u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ?
- tcp_cookie_size_check(cvp->cookie_desired) :
- 0;
struct tcp_fastopen_request *fastopen = tp->fastopen_req;
#ifdef CONFIG_TCP_MD5SIG
@@ -647,52 +552,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
tp->syn_fastopen = 1;
}
}
- /* Note that timestamps are required by the specification.
- *
- * Odd numbers of bytes are prohibited by the specification, ensuring
- * that the cookie is 16-bit aligned, and the resulting cookie pair is
- * 32-bit aligned.
- */
- if (*md5 == NULL &&
- (OPTION_TS & opts->options) &&
- cookie_size > 0) {
- int need = TCPOLEN_COOKIE_BASE + cookie_size;
-
- if (0x2 & need) {
- /* 32-bit multiple */
- need += 2; /* NOPs */
-
- if (need > remaining) {
- /* try shrinking cookie to fit */
- cookie_size -= 2;
- need -= 4;
- }
- }
- while (need > remaining && TCP_COOKIE_MIN <= cookie_size) {
- cookie_size -= 4;
- need -= 4;
- }
- if (TCP_COOKIE_MIN <= cookie_size) {
- opts->options |= OPTION_COOKIE_EXTENSION;
- opts->hash_location = (__u8 *)&cvp->cookie_pair[0];
- opts->hash_size = cookie_size;
-
- /* Remember for future incarnations. */
- cvp->cookie_desired = cookie_size;
-
- if (cvp->cookie_desired != cvp->cookie_pair_size) {
- /* Currently use random bytes as a nonce,
- * assuming these are completely unpredictable
- * by hostile users of the same system.
- */
- get_random_bytes(&cvp->cookie_pair[0],
- cookie_size);
- cvp->cookie_pair_size = cookie_size;
- }
- remaining -= need;
- }
- }
return MAX_TCP_OPTION_SPACE - remaining;
}
@@ -702,14 +562,10 @@ static unsigned int tcp_synack_options(struct sock *sk,
unsigned int mss, struct sk_buff *skb,
struct tcp_out_options *opts,
struct tcp_md5sig_key **md5,
- struct tcp_extend_values *xvp,
struct tcp_fastopen_cookie *foc)
{
struct inet_request_sock *ireq = inet_rsk(req);
unsigned int remaining = MAX_TCP_OPTION_SPACE;
- u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ?
- xvp->cookie_plus :
- 0;
#ifdef CONFIG_TCP_MD5SIG
*md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
@@ -757,28 +613,7 @@ static unsigned int tcp_synack_options(struct sock *sk,
remaining -= need;
}
}
- /* Similar rationale to tcp_syn_options() applies here, too.
- * If the <SYN> options fit, the same options should fit now!
- */
- if (*md5 == NULL &&
- ireq->tstamp_ok &&
- cookie_plus > TCPOLEN_COOKIE_BASE) {
- int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */
-
- if (0x2 & need) {
- /* 32-bit multiple */
- need += 2; /* NOPs */
- }
- if (need <= remaining) {
- opts->options |= OPTION_COOKIE_EXTENSION;
- opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE;
- remaining -= need;
- } else {
- /* There's no error return, so flag it. */
- xvp->cookie_out_never = 1; /* true */
- opts->hash_size = 0;
- }
- }
+
return MAX_TCP_OPTION_SPACE - remaining;
}
@@ -953,7 +788,7 @@ void __init tcp_tasklet_init(void)
* We cant xmit new skbs from this context, as we might already
* hold qdisc lock.
*/
-static void tcp_wfree(struct sk_buff *skb)
+void tcp_wfree(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
struct tcp_sock *tp = tcp_sk(sk);
@@ -1012,6 +847,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
__net_timestamp(skb);
if (likely(clone_it)) {
+ const struct sk_buff *fclone = skb + 1;
+
+ if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
+ fclone->fclone == SKB_FCLONE_CLONE))
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
+
if (unlikely(skb_cloned(skb)))
skb = pskb_copy(skb, gfp_mask);
else
@@ -1032,11 +874,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
&md5);
tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
- if (tcp_packets_in_flight(tp) == 0) {
+ if (tcp_packets_in_flight(tp) == 0)
tcp_ca_event(sk, CA_EVENT_TX_START);
- skb->ooo_okay = 1;
- } else
- skb->ooo_okay = 0;
+
+ /* if no packet is in qdisc/device queue, then allow XPS to select
+ * another queue.
+ */
+ skb->ooo_okay = sk_wmem_alloc_get(sk) == 0;
skb_push(skb, tcp_header_size);
skb_reset_transport_header(skb);
@@ -1632,11 +1476,8 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf
if (nonagle & TCP_NAGLE_PUSH)
return true;
- /* Don't use the nagle rule for urgent data (or for the final FIN).
- * Nagle can be ignored during F-RTO too (see RFC4138).
- */
- if (tcp_urg_mode(tp) || (tp->frto_counter == 2) ||
- (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
+ /* Don't use the nagle rule for urgent data (or for the final FIN). */
+ if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
return true;
if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
@@ -1961,6 +1802,9 @@ static int tcp_mtu_probe(struct sock *sk)
* snd_up-64k-mss .. snd_up cannot be large. However, taking into
* account rare use of URG, this is not a big flaw.
*
+ * Send at most one packet when push_one > 0. Temporarily ignore
+ * cwnd limit to force at most one packet out when push_one == 2.
+
* Returns true, if no segments are in flight and we have queued segments,
* but cannot send anything now because of SWS or another problem.
*/
@@ -1996,8 +1840,13 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
goto repair; /* Skip network transmission */
cwnd_quota = tcp_cwnd_test(tp, skb);
- if (!cwnd_quota)
- break;
+ if (!cwnd_quota) {
+ if (push_one == 2)
+ /* Force out a loss probe pkt. */
+ cwnd_quota = 1;
+ else
+ break;
+ }
if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
break;
@@ -2051,10 +1900,129 @@ repair:
if (likely(sent_pkts)) {
if (tcp_in_cwnd_reduction(sk))
tp->prr_out += sent_pkts;
+
+ /* Send one loss probe per tail loss episode. */
+ if (push_one != 2)
+ tcp_schedule_loss_probe(sk);
tcp_cwnd_validate(sk);
return false;
}
- return !tp->packets_out && tcp_send_head(sk);
+ return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk));
+}
+
+bool tcp_schedule_loss_probe(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+ u32 timeout, tlp_time_stamp, rto_time_stamp;
+ u32 rtt = tp->srtt >> 3;
+
+ if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS))
+ return false;
+ /* No consecutive loss probes. */
+ if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
+ tcp_rearm_rto(sk);
+ return false;
+ }
+ /* Don't do any loss probe on a Fast Open connection before 3WHS
+ * finishes.
+ */
+ if (sk->sk_state == TCP_SYN_RECV)
+ return false;
+
+ /* TLP is only scheduled when next timer event is RTO. */
+ if (icsk->icsk_pending != ICSK_TIME_RETRANS)
+ return false;
+
+ /* Schedule a loss probe in 2*RTT for SACK capable connections
+ * in Open state, that are either limited by cwnd or application.
+ */
+ if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out ||
+ !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
+ return false;
+
+ if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
+ tcp_send_head(sk))
+ return false;
+
+ /* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account
+ * for delayed ack when there's one outstanding packet.
+ */
+ timeout = rtt << 1;
+ if (tp->packets_out == 1)
+ timeout = max_t(u32, timeout,
+ (rtt + (rtt >> 1) + TCP_DELACK_MAX));
+ timeout = max_t(u32, timeout, msecs_to_jiffies(10));
+
+ /* If RTO is shorter, just schedule TLP in its place. */
+ tlp_time_stamp = tcp_time_stamp + timeout;
+ rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout;
+ if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) {
+ s32 delta = rto_time_stamp - tcp_time_stamp;
+ if (delta > 0)
+ timeout = delta;
+ }
+
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout,
+ TCP_RTO_MAX);
+ return true;
+}
+
+/* When probe timeout (PTO) fires, send a new segment if one exists, else
+ * retransmit the last segment.
+ */
+void tcp_send_loss_probe(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct sk_buff *skb;
+ int pcount;
+ int mss = tcp_current_mss(sk);
+ int err = -1;
+
+ if (tcp_send_head(sk) != NULL) {
+ err = tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
+ goto rearm_timer;
+ }
+
+ /* At most one outstanding TLP retransmission. */
+ if (tp->tlp_high_seq)
+ goto rearm_timer;
+
+ /* Retransmit last segment. */
+ skb = tcp_write_queue_tail(sk);
+ if (WARN_ON(!skb))
+ goto rearm_timer;
+
+ pcount = tcp_skb_pcount(skb);
+ if (WARN_ON(!pcount))
+ goto rearm_timer;
+
+ if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
+ if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss)))
+ goto rearm_timer;
+ skb = tcp_write_queue_tail(sk);
+ }
+
+ if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
+ goto rearm_timer;
+
+ /* Probe with zero data doesn't trigger fast recovery. */
+ if (skb->len > 0)
+ err = __tcp_retransmit_skb(sk, skb);
+
+ /* Record snd_nxt for loss detection. */
+ if (likely(!err))
+ tp->tlp_high_seq = tp->snd_nxt;
+
+rearm_timer:
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+ inet_csk(sk)->icsk_rto,
+ TCP_RTO_MAX);
+
+ if (likely(!err))
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPLOSSPROBES);
+ return;
}
/* Push out any pending frames which were held back due to
@@ -2679,32 +2647,24 @@ int tcp_send_synack(struct sock *sk)
* sk: listener socket
* dst: dst entry attached to the SYNACK
* req: request_sock pointer
- * rvp: request_values pointer
*
* Allocate one skb and build a SYNACK packet.
* @dst is consumed : Caller should not use it again.
*/
struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
struct request_sock *req,
- struct request_values *rvp,
struct tcp_fastopen_cookie *foc)
{
struct tcp_out_options opts;
- struct tcp_extend_values *xvp = tcp_xv(rvp);
struct inet_request_sock *ireq = inet_rsk(req);
struct tcp_sock *tp = tcp_sk(sk);
- const struct tcp_cookie_values *cvp = tp->cookie_values;
struct tcphdr *th;
struct sk_buff *skb;
struct tcp_md5sig_key *md5;
int tcp_header_size;
int mss;
- int s_data_desired = 0;
- if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired)
- s_data_desired = cvp->s_data_desired;
- skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired,
- sk_gfp_atomic(sk, GFP_ATOMIC));
+ skb = alloc_skb(MAX_TCP_HEADER + 15, sk_gfp_atomic(sk, GFP_ATOMIC));
if (unlikely(!skb)) {
dst_release(dst);
return NULL;
@@ -2747,9 +2707,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
else
#endif
TCP_SKB_CB(skb)->when = tcp_time_stamp;
- tcp_header_size = tcp_synack_options(sk, req, mss,
- skb, &opts, &md5, xvp, foc)
- + sizeof(*th);
+ tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5,
+ foc) + sizeof(*th);
skb_push(skb, tcp_header_size);
skb_reset_transport_header(skb);
@@ -2767,40 +2726,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
TCPHDR_SYN | TCPHDR_ACK);
- if (OPTION_COOKIE_EXTENSION & opts.options) {
- if (s_data_desired) {
- u8 *buf = skb_put(skb, s_data_desired);
-
- /* copy data directly from the listening socket. */
- memcpy(buf, cvp->s_data_payload, s_data_desired);
- TCP_SKB_CB(skb)->end_seq += s_data_desired;
- }
-
- if (opts.hash_size > 0) {
- __u32 workspace[SHA_WORKSPACE_WORDS];
- u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS];
- u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1];
-
- /* Secret recipe depends on the Timestamp, (future)
- * Sequence and Acknowledgment Numbers, Initiator
- * Cookie, and others handled by IP variant caller.
- */
- *tail-- ^= opts.tsval;
- *tail-- ^= tcp_rsk(req)->rcv_isn + 1;
- *tail-- ^= TCP_SKB_CB(skb)->seq + 1;
-
- /* recommended */
- *tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source);
- *tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */
-
- sha_transform((__u32 *)&xvp->cookie_bakery[0],
- (char *)mess,
- &workspace[0]);
- opts.hash_location =
- (__u8 *)&xvp->cookie_bakery[0];
- }
- }
-
th->seq = htonl(TCP_SKB_CB(skb)->seq);
/* XXX data is queued and acked as is. No buffer/window check */
th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b78aac30c498..4b85e6f636c9 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -342,10 +342,6 @@ void tcp_retransmit_timer(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
- if (tp->early_retrans_delayed) {
- tcp_resume_early_retransmit(sk);
- return;
- }
if (tp->fastopen_rsk) {
WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
sk->sk_state != TCP_FIN_WAIT1);
@@ -360,6 +356,8 @@ void tcp_retransmit_timer(struct sock *sk)
WARN_ON(tcp_write_queue_empty(sk));
+ tp->tlp_high_seq = 0;
+
if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
!((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
/* Receiver dastardly shrinks window. Our retransmits
@@ -418,11 +416,7 @@ void tcp_retransmit_timer(struct sock *sk)
NET_INC_STATS_BH(sock_net(sk), mib_idx);
}
- if (tcp_use_frto(sk)) {
- tcp_enter_frto(sk);
- } else {
- tcp_enter_loss(sk, 0);
- }
+ tcp_enter_loss(sk, 0);
if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) {
/* Retransmission failed because of local congestion,
@@ -495,13 +489,20 @@ void tcp_write_timer_handler(struct sock *sk)
}
event = icsk->icsk_pending;
- icsk->icsk_pending = 0;
switch (event) {
+ case ICSK_TIME_EARLY_RETRANS:
+ tcp_resume_early_retransmit(sk);
+ break;
+ case ICSK_TIME_LOSS_PROBE:
+ tcp_send_loss_probe(sk);
+ break;
case ICSK_TIME_RETRANS:
+ icsk->icsk_pending = 0;
tcp_retransmit_timer(sk);
break;
case ICSK_TIME_PROBE0:
+ icsk->icsk_pending = 0;
tcp_probe_timer(sk);
break;
}
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 1b91bf48e277..76a1e23259e1 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -236,7 +236,7 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
tp->snd_cwnd = tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
break;
- case CA_EVENT_FRTO:
+ case CA_EVENT_LOSS:
tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
/* Update RTT_min when next ack arrives */
w->reset_rtt_min = 1;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0a073a263720..0bf5d399a03c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -902,9 +902,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
ipc.addr = inet->inet_saddr;
ipc.oif = sk->sk_bound_dev_if;
- err = sock_tx_timestamp(sk, &ipc.tx_flags);
- if (err)
- return err;
+
+ sock_tx_timestamp(sk, &ipc.tx_flags);
+
if (msg->msg_controllen) {
err = ip_cmsg_send(sock_net(sk), msg, &ipc);
if (err)
@@ -1131,6 +1131,8 @@ static unsigned int first_packet_length(struct sock *sk)
spin_lock_bh(&rcvq->lock);
while ((skb = skb_peek(rcvq)) != NULL &&
udp_lib_checksum_complete(skb)) {
+ UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS,
+ IS_UDPLITE(sk));
UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
IS_UDPLITE(sk));
atomic_inc(&sk->sk_drops);
@@ -1286,8 +1288,10 @@ out:
csum_copy_err:
slow = lock_sock_fast(sk);
- if (!skb_kill_datagram(sk, skb, flags))
+ if (!skb_kill_datagram(sk, skb, flags)) {
+ UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+ }
unlock_sock_fast(sk, slow);
if (noblock)
@@ -1513,7 +1517,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
if (rcu_access_pointer(sk->sk_filter) &&
udp_lib_checksum_complete(skb))
- goto drop;
+ goto csum_error;
if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf))
@@ -1533,6 +1537,8 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
return rc;
+csum_error:
+ UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
drop:
UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
atomic_inc(&sk->sk_drops);
@@ -1749,6 +1755,7 @@ csum_error:
proto == IPPROTO_UDPLITE ? "Lite" : "",
&saddr, ntohs(uh->source), &daddr, ntohs(uh->dest),
ulen);
+ UDP_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
drop:
UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
kfree_skb(skb);
@@ -2093,7 +2100,7 @@ static void udp_seq_stop(struct seq_file *seq, void *v)
int udp_seq_open(struct inode *inode, struct file *file)
{
- struct udp_seq_afinfo *afinfo = PDE(inode)->data;
+ struct udp_seq_afinfo *afinfo = PDE_DATA(inode);
struct udp_iter_state *s;
int err;
@@ -2279,31 +2286,91 @@ void __init udp_init(void)
int udp4_ufo_send_check(struct sk_buff *skb)
{
- const struct iphdr *iph;
- struct udphdr *uh;
-
- if (!pskb_may_pull(skb, sizeof(*uh)))
+ if (!pskb_may_pull(skb, sizeof(struct udphdr)))
return -EINVAL;
- iph = ip_hdr(skb);
- uh = udp_hdr(skb);
+ if (likely(!skb->encapsulation)) {
+ const struct iphdr *iph;
+ struct udphdr *uh;
+
+ iph = ip_hdr(skb);
+ uh = udp_hdr(skb);
- uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
- IPPROTO_UDP, 0);
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct udphdr, check);
- skb->ip_summed = CHECKSUM_PARTIAL;
+ uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
+ IPPROTO_UDP, 0);
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ }
return 0;
}
+static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ int mac_len = skb->mac_len;
+ int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
+ __be16 protocol = skb->protocol;
+ netdev_features_t enc_features;
+ int outer_hlen;
+
+ if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
+ goto out;
+
+ skb->encapsulation = 0;
+ __skb_pull(skb, tnl_hlen);
+ skb_reset_mac_header(skb);
+ skb_set_network_header(skb, skb_inner_network_offset(skb));
+ skb->mac_len = skb_inner_network_offset(skb);
+ skb->protocol = htons(ETH_P_TEB);
+
+ /* segment inner packet. */
+ enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
+ segs = skb_mac_gso_segment(skb, enc_features);
+ if (!segs || IS_ERR(segs))
+ goto out;
+
+ outer_hlen = skb_tnl_header_len(skb);
+ skb = segs;
+ do {
+ struct udphdr *uh;
+ int udp_offset = outer_hlen - tnl_hlen;
+
+ skb->mac_len = mac_len;
+
+ skb_push(skb, outer_hlen);
+ skb_reset_mac_header(skb);
+ skb_set_network_header(skb, mac_len);
+ skb_set_transport_header(skb, udp_offset);
+ uh = udp_hdr(skb);
+ uh->len = htons(skb->len - udp_offset);
+
+ /* csum segment if tunnel sets skb with csum. */
+ if (unlikely(uh->check)) {
+ struct iphdr *iph = ip_hdr(skb);
+
+ uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+ skb->len - udp_offset,
+ IPPROTO_UDP, 0);
+ uh->check = csum_fold(skb_checksum(skb, udp_offset,
+ skb->len - udp_offset, 0));
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+
+ }
+ skb->ip_summed = CHECKSUM_NONE;
+ skb->protocol = protocol;
+ } while ((skb = skb->next));
+out:
+ return segs;
+}
+
struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
unsigned int mss;
- int offset;
- __wsum csum;
-
mss = skb_shinfo(skb)->gso_size;
if (unlikely(skb->len <= mss))
goto out;
@@ -2313,6 +2380,7 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
int type = skb_shinfo(skb)->gso_type;
if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
+ SKB_GSO_UDP_TUNNEL |
SKB_GSO_GRE) ||
!(type & (SKB_GSO_UDP))))
goto out;
@@ -2323,20 +2391,27 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
goto out;
}
- /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
- * do checksum of UDP packets sent as multiple IP fragments.
- */
- offset = skb_checksum_start_offset(skb);
- csum = skb_checksum(skb, offset, skb->len - offset, 0);
- offset += skb->csum_offset;
- *(__sum16 *)(skb->data + offset) = csum_fold(csum);
- skb->ip_summed = CHECKSUM_NONE;
-
/* Fragment the skb. IP headers of the fragments are updated in
* inet_gso_segment()
*/
- segs = skb_segment(skb, features);
+ if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL)
+ segs = skb_udp_tunnel_segment(skb, features);
+ else {
+ int offset;
+ __wsum csum;
+
+ /* Do software UFO. Complete and fill in the UDP checksum as
+ * HW cannot do checksum of UDP packets sent as multiple
+ * IP fragments.
+ */
+ offset = skb_checksum_start_offset(skb);
+ csum = skb_checksum(skb, offset, skb->len - offset, 0);
+ offset += skb->csum_offset;
+ *(__sum16 *)(skb->data + offset) = csum_fold(csum);
+ skb->ip_summed = CHECKSUM_NONE;
+
+ segs = skb_segment(skb, features);
+ }
out:
return segs;
}
-
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 505b30ad9182..7927db0a9279 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -25,7 +25,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
return 0;
return inet_sk_diag_fill(sk, NULL, skb, req,
- sk_user_ns(NETLINK_CB(cb->skb).ssk),
+ sk_user_ns(NETLINK_CB(cb->skb).sk),
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
}
@@ -64,14 +64,14 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
goto out;
err = -ENOMEM;
- rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) +
- sizeof(struct inet_diag_meminfo) +
- 64)), GFP_KERNEL);
+ rep = nlmsg_new(sizeof(struct inet_diag_msg) +
+ sizeof(struct inet_diag_meminfo) + 64,
+ GFP_KERNEL);
if (!rep)
goto out;
err = inet_sk_diag_fill(sk, NULL, rep, req,
- sk_user_ns(NETLINK_CB(in_skb).ssk),
+ sk_user_ns(NETLINK_CB(in_skb).sk),
NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, 0, nlh);
if (err < 0) {
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index fe5189e2e114..eb1dd4d643f2 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -103,8 +103,12 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family);
- /* DS disclosed */
- top_iph->tos = INET_ECN_encapsulate(XFRM_MODE_SKB_CB(skb)->tos,
+ /* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */
+ if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
+ top_iph->tos = 0;
+ else
+ top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos;
+ top_iph->tos = INET_ECN_encapsulate(top_iph->tos,
XFRM_MODE_SKB_CB(skb)->tos);
flags = x->props.flags;
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index ed0b9e2e797a..11b13ea69db4 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -156,6 +156,7 @@ config INET6_XFRM_MODE_ROUTEOPTIMIZATION
config IPV6_SIT
tristate "IPv6: IPv6-in-IPv4 tunnel (SIT driver)"
select INET_TUNNEL
+ select NET_IP_TUNNEL
select IPV6_NDISC_NODETYPE
default y
---help---
@@ -201,6 +202,7 @@ config IPV6_TUNNEL
config IPV6_GRE
tristate "IPv6: GRE tunnel"
select IPV6_TUNNEL
+ select NET_IP_TUNNEL
---help---
Tunneling means encapsulating data of one protocol type within
another protocol and sending it over a channel that understands the
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 309af19a0a0a..9af088d2cdaa 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -40,7 +40,7 @@ obj-$(CONFIG_IPV6_SIT) += sit.o
obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
obj-$(CONFIG_IPV6_GRE) += ip6_gre.o
-obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o
+obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o
obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload)
obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index dae802c0af7c..4ab4c38958c6 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -70,6 +70,7 @@
#include <net/snmp.h>
#include <net/af_ieee802154.h>
+#include <net/firewire.h>
#include <net/ipv6.h>
#include <net/protocol.h>
#include <net/ndisc.h>
@@ -419,6 +420,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
ipv6_regen_rndid((unsigned long) ndev);
}
#endif
+ ndev->token = in6addr_any;
if (netif_running(dev) && addrconf_qdisc_ok(dev))
ndev->if_flags |= IF_READY;
@@ -542,8 +544,7 @@ static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = {
};
static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
- struct nlmsghdr *nlh,
- void *arg)
+ struct nlmsghdr *nlh)
{
struct net *net = sock_net(in_skb->sk);
struct nlattr *tb[NETCONFA_MAX+1];
@@ -603,6 +604,77 @@ errout:
return err;
}
+static int inet6_netconf_dump_devconf(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ int h, s_h;
+ int idx, s_idx;
+ struct net_device *dev;
+ struct inet6_dev *idev;
+ struct hlist_head *head;
+
+ s_h = cb->args[0];
+ s_idx = idx = cb->args[1];
+
+ for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+ idx = 0;
+ head = &net->dev_index_head[h];
+ rcu_read_lock();
+ cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^
+ net->dev_base_seq;
+ hlist_for_each_entry_rcu(dev, head, index_hlist) {
+ if (idx < s_idx)
+ goto cont;
+ idev = __in6_dev_get(dev);
+ if (!idev)
+ goto cont;
+
+ if (inet6_netconf_fill_devconf(skb, dev->ifindex,
+ &idev->cnf,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNETCONF,
+ NLM_F_MULTI,
+ -1) <= 0) {
+ rcu_read_unlock();
+ goto done;
+ }
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+cont:
+ idx++;
+ }
+ rcu_read_unlock();
+ }
+ if (h == NETDEV_HASHENTRIES) {
+ if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
+ net->ipv6.devconf_all,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNETCONF, NLM_F_MULTI,
+ -1) <= 0)
+ goto done;
+ else
+ h++;
+ }
+ if (h == NETDEV_HASHENTRIES + 1) {
+ if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
+ net->ipv6.devconf_dflt,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNETCONF, NLM_F_MULTI,
+ -1) <= 0)
+ goto done;
+ else
+ h++;
+ }
+done:
+ cb->args[0] = h;
+ cb->args[1] = idx;
+
+ return skb->len;
+}
+
#ifdef CONFIG_SYSCTL
static void dev_forward_change(struct inet6_dev *idev)
{
@@ -804,6 +876,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
ifa->prefix_len = pfxlen;
ifa->flags = flags | IFA_F_TENTATIVE;
ifa->cstamp = ifa->tstamp = jiffies;
+ ifa->tokenized = false;
ifa->rt = rt;
@@ -1414,7 +1487,7 @@ static int ipv6_count_addresses(struct inet6_dev *idev)
}
int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
- struct net_device *dev, int strict)
+ const struct net_device *dev, int strict)
{
struct inet6_ifaddr *ifp;
unsigned int hash = inet6_addr_hash(addr);
@@ -1666,6 +1739,20 @@ static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev)
return 0;
}
+static int addrconf_ifid_ieee1394(u8 *eui, struct net_device *dev)
+{
+ union fwnet_hwaddr *ha;
+
+ if (dev->addr_len != FWNET_ALEN)
+ return -1;
+
+ ha = (union fwnet_hwaddr *)dev->dev_addr;
+
+ memcpy(eui, &ha->uc.uniq_id, sizeof(ha->uc.uniq_id));
+ eui[0] ^= 2;
+ return 0;
+}
+
static int addrconf_ifid_arcnet(u8 *eui, struct net_device *dev)
{
/* XXX: inherit EUI-64 from other interface -- yoshfuji */
@@ -1730,6 +1817,8 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
return addrconf_ifid_gre(eui, dev);
case ARPHRD_IEEE802154:
return addrconf_ifid_eui64(eui, dev);
+ case ARPHRD_IEEE1394:
+ return addrconf_ifid_ieee1394(eui, dev);
}
return -1;
}
@@ -2044,11 +2133,19 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
struct inet6_ifaddr *ifp;
struct in6_addr addr;
int create = 0, update_lft = 0;
+ bool tokenized = false;
if (pinfo->prefix_len == 64) {
memcpy(&addr, &pinfo->prefix, 8);
- if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
- ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
+
+ if (!ipv6_addr_any(&in6_dev->token)) {
+ read_lock_bh(&in6_dev->lock);
+ memcpy(addr.s6_addr + 8,
+ in6_dev->token.s6_addr + 8, 8);
+ read_unlock_bh(&in6_dev->lock);
+ tokenized = true;
+ } else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
+ ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
in6_dev_put(in6_dev);
return;
}
@@ -2089,6 +2186,7 @@ ok:
update_lft = create = 1;
ifp->cstamp = jiffies;
+ ifp->tokenized = tokenized;
addrconf_dad_start(ifp);
}
@@ -2557,11 +2655,16 @@ static void init_loopback(struct net_device *dev)
if (sp_ifa->flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE))
continue;
+ if (sp_ifa->rt)
+ continue;
+
sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, 0);
/* Failure cases are ignored */
- if (!IS_ERR(sp_rt))
+ if (!IS_ERR(sp_rt)) {
+ sp_ifa->rt = sp_rt;
ip6_ins_rt(sp_rt);
+ }
}
read_unlock_bh(&idev->lock);
}
@@ -2598,7 +2701,8 @@ static void addrconf_dev_config(struct net_device *dev)
(dev->type != ARPHRD_FDDI) &&
(dev->type != ARPHRD_ARCNET) &&
(dev->type != ARPHRD_INFINIBAND) &&
- (dev->type != ARPHRD_IEEE802154)) {
+ (dev->type != ARPHRD_IEEE802154) &&
+ (dev->type != ARPHRD_IEEE1394)) {
/* Alas, we support only Ethernet autoconfiguration. */
return;
}
@@ -3535,7 +3639,7 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = {
};
static int
-inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct ifaddrmsg *ifm;
@@ -3601,7 +3705,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
}
static int
-inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct ifaddrmsg *ifm;
@@ -3832,6 +3936,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
NLM_F_MULTI);
if (err <= 0)
break;
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
}
break;
}
@@ -3889,6 +3994,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
s_ip_idx = ip_idx = cb->args[2];
rcu_read_lock();
+ cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ net->dev_base_seq;
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
head = &net->dev_index_head[h];
@@ -3940,8 +4046,7 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
return inet6_dump_addr(skb, cb, type);
}
-static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
- void *arg)
+static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(in_skb->sk);
struct ifaddrmsg *ifm;
@@ -4074,7 +4179,8 @@ static inline size_t inet6_ifla6_size(void)
+ nla_total_size(sizeof(struct ifla_cacheinfo))
+ nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
+ nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
- + nla_total_size(ICMP6_MIB_MAX * 8); /* IFLA_INET6_ICMP6STATS */
+ + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
+ + nla_total_size(sizeof(struct in6_addr)); /* IFLA_INET6_TOKEN */
}
static inline size_t inet6_if_nlmsg_size(void)
@@ -4161,6 +4267,13 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)
goto nla_put_failure;
snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
+ nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr));
+ if (nla == NULL)
+ goto nla_put_failure;
+ read_lock_bh(&idev->lock);
+ memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla));
+ read_unlock_bh(&idev->lock);
+
return 0;
nla_put_failure:
@@ -4188,6 +4301,79 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
return 0;
}
+static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
+{
+ struct inet6_ifaddr *ifp;
+ struct net_device *dev = idev->dev;
+ bool update_rs = false;
+ struct in6_addr ll_addr;
+
+ if (token == NULL)
+ return -EINVAL;
+ if (ipv6_addr_any(token))
+ return -EINVAL;
+ if (dev->flags & (IFF_LOOPBACK | IFF_NOARP))
+ return -EINVAL;
+ if (!ipv6_accept_ra(idev))
+ return -EINVAL;
+ if (idev->cnf.rtr_solicits <= 0)
+ return -EINVAL;
+
+ write_lock_bh(&idev->lock);
+
+ BUILD_BUG_ON(sizeof(token->s6_addr) != 16);
+ memcpy(idev->token.s6_addr + 8, token->s6_addr + 8, 8);
+
+ write_unlock_bh(&idev->lock);
+
+ if (!idev->dead && (idev->if_flags & IF_READY) &&
+ !ipv6_get_lladdr(dev, &ll_addr, IFA_F_TENTATIVE |
+ IFA_F_OPTIMISTIC)) {
+
+ /* If we're not ready, then normal ifup will take care
+ * of this. Otherwise, we need to request our rs here.
+ */
+ ndisc_send_rs(dev, &ll_addr, &in6addr_linklocal_allrouters);
+ update_rs = true;
+ }
+
+ write_lock_bh(&idev->lock);
+
+ if (update_rs)
+ idev->if_flags |= IF_RS_SENT;
+
+ /* Well, that's kinda nasty ... */
+ list_for_each_entry(ifp, &idev->addr_list, if_list) {
+ spin_lock(&ifp->lock);
+ if (ifp->tokenized) {
+ ifp->valid_lft = 0;
+ ifp->prefered_lft = 0;
+ }
+ spin_unlock(&ifp->lock);
+ }
+
+ write_unlock_bh(&idev->lock);
+ return 0;
+}
+
+static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla)
+{
+ int err = -EINVAL;
+ struct inet6_dev *idev = __in6_dev_get(dev);
+ struct nlattr *tb[IFLA_INET6_MAX + 1];
+
+ if (!idev)
+ return -EAFNOSUPPORT;
+
+ if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL) < 0)
+ BUG();
+
+ if (tb[IFLA_INET6_TOKEN])
+ err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]));
+
+ return err;
+}
+
static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
u32 portid, u32 seq, int event, unsigned int flags)
{
@@ -4366,6 +4552,8 @@ errout:
static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
{
+ struct net *net = dev_net(ifp->idev->dev);
+
inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);
switch (event) {
@@ -4391,6 +4579,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
dst_free(&ifp->rt->dst);
break;
}
+ atomic_inc(&net->ipv6.dev_addr_genid);
}
static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
@@ -4871,6 +5060,7 @@ static struct rtnl_af_ops inet6_ops = {
.family = AF_INET6,
.fill_link_af = inet6_fill_link_af,
.get_link_af_size = inet6_get_link_af_size,
+ .set_link_af = inet6_set_link_af,
};
/*
@@ -4943,7 +5133,7 @@ int __init addrconf_init(void)
__rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,
inet6_dump_ifacaddr, NULL);
__rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf,
- NULL, NULL);
+ inet6_netconf_dump_devconf, NULL);
ipv6_addr_label_rtnl_register();
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index aad64352cb60..f083a583a05c 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -414,8 +414,7 @@ static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
[IFAL_LABEL] = { .len = sizeof(u32), },
};
-static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
- void *arg)
+static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct ifaddrlblmsg *ifal;
@@ -436,10 +435,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!tb[IFAL_ADDRESS])
return -EINVAL;
-
pfx = nla_data(tb[IFAL_ADDRESS]);
- if (!pfx)
- return -EINVAL;
if (!tb[IFAL_LABEL])
return -EINVAL;
@@ -533,8 +529,7 @@ static inline int ip6addrlbl_msgsize(void)
+ nla_total_size(4); /* IFAL_LABEL */
}
-static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
- void *arg)
+static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh)
{
struct net *net = sock_net(in_skb->sk);
struct ifaddrlblmsg *ifal;
@@ -561,10 +556,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
if (!tb[IFAL_ADDRESS])
return -EINVAL;
-
addr = nla_data(tb[IFAL_ADDRESS]);
- if (!addr)
- return -EINVAL;
rcu_read_lock();
p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 6b793bfc0e10..ab5c7ad482cd 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -49,7 +49,6 @@
#include <net/udp.h>
#include <net/udplite.h>
#include <net/tcp.h>
-#include <net/ipip.h>
#include <net/protocol.h>
#include <net/inet_common.h>
#include <net/route.h>
@@ -323,7 +322,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct net_device *dev = NULL;
rcu_read_lock();
- if (addr_type & IPV6_ADDR_LINKLOCAL) {
+ if (__ipv6_addr_needs_scope_id(addr_type)) {
if (addr_len >= sizeof(struct sockaddr_in6) &&
addr->sin6_scope_id) {
/* Override any existing binding, if another one
@@ -471,8 +470,8 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
sin->sin6_port = inet->inet_sport;
}
- if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
- sin->sin6_scope_id = sk->sk_bound_dev_if;
+ sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
+ sk->sk_bound_dev_if);
*uaddr_len = sizeof(*sin);
return 0;
}
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index f5a54782a340..4b56cbbc7890 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -124,7 +124,7 @@ ipv4_connected:
goto out;
}
- if (addr_type&IPV6_ADDR_LINKLOCAL) {
+ if (__ipv6_addr_needs_scope_id(addr_type)) {
if (addr_len >= sizeof(struct sockaddr_in6) &&
usin->sin6_scope_id) {
if (sk->sk_bound_dev_if &&
@@ -355,18 +355,19 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
sin->sin6_family = AF_INET6;
sin->sin6_flowinfo = 0;
sin->sin6_port = serr->port;
- sin->sin6_scope_id = 0;
if (skb->protocol == htons(ETH_P_IPV6)) {
const struct ipv6hdr *ip6h = container_of((struct in6_addr *)(nh + serr->addr_offset),
struct ipv6hdr, daddr);
sin->sin6_addr = ip6h->daddr;
if (np->sndflow)
sin->sin6_flowinfo = ip6_flowinfo(ip6h);
- if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
- sin->sin6_scope_id = IP6CB(skb)->iif;
+ sin->sin6_scope_id =
+ ipv6_iface_scope_id(&sin->sin6_addr,
+ IP6CB(skb)->iif);
} else {
ipv6_addr_set_v4mapped(*(__be32 *)(nh + serr->addr_offset),
&sin->sin6_addr);
+ sin->sin6_scope_id = 0;
}
}
@@ -376,18 +377,19 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) {
sin->sin6_family = AF_INET6;
sin->sin6_flowinfo = 0;
- sin->sin6_scope_id = 0;
if (skb->protocol == htons(ETH_P_IPV6)) {
sin->sin6_addr = ipv6_hdr(skb)->saddr;
if (np->rxopt.all)
ip6_datagram_recv_ctl(sk, msg, skb);
- if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
- sin->sin6_scope_id = IP6CB(skb)->iif;
+ sin->sin6_scope_id =
+ ipv6_iface_scope_id(&sin->sin6_addr,
+ IP6CB(skb)->iif);
} else {
struct inet_sock *inet = inet_sk(sk);
ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
&sin->sin6_addr);
+ sin->sin6_scope_id = 0;
if (inet->cmsg_flags)
ip_cmsg_recv(msg, skb);
}
@@ -592,7 +594,9 @@ int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
sin6.sin6_addr = ipv6_hdr(skb)->daddr;
sin6.sin6_port = ports[1];
sin6.sin6_flowinfo = 0;
- sin6.sin6_scope_id = 0;
+ sin6.sin6_scope_id =
+ ipv6_iface_scope_id(&ipv6_hdr(skb)->daddr,
+ opt->iif);
put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6);
}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index fff5bdd8b680..b4ff0a42b8c7 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -124,15 +124,6 @@ static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
}
/*
- * Slightly more convenient version of icmpv6_send.
- */
-void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
-{
- icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos);
- kfree_skb(skb);
-}
-
-/*
* Figure out, may we reply to this packet with icmp error.
*
* We do not reply, if:
@@ -332,7 +323,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *sk
* anycast.
*/
if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
- LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
+ LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: acast source\n");
dst_release(dst);
return ERR_PTR(-EINVAL);
}
@@ -381,7 +372,7 @@ relookup_failed:
/*
* Send an ICMP message in response to a packet in error
*/
-void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
+static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
{
struct net *net = dev_net(skb->dev);
struct inet6_dev *idev = NULL;
@@ -406,7 +397,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
/*
* Make sure we respect the rules
* i.e. RFC 1885 2.4(e)
- * Rule (e.1) is enforced by not using icmpv6_send
+ * Rule (e.1) is enforced by not using icmp6_send
* in any code that processes icmp errors.
*/
addr_type = ipv6_addr_type(&hdr->daddr);
@@ -434,7 +425,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
* Source addr check
*/
- if (addr_type & IPV6_ADDR_LINKLOCAL)
+ if (__ipv6_addr_needs_scope_id(addr_type))
iif = skb->dev->ifindex;
/*
@@ -444,7 +435,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
* and anycast addresses will be checked later.
*/
if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
- LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
+ LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: addr_any/mcast source\n");
return;
}
@@ -452,7 +443,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
* Never answer to a ICMP packet.
*/
if (is_ineligible(skb)) {
- LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
+ LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: no reply to icmp error\n");
return;
}
@@ -529,7 +520,14 @@ out_dst_release:
out:
icmpv6_xmit_unlock(sk);
}
-EXPORT_SYMBOL(icmpv6_send);
+
+/* Slightly more convenient version of icmp6_send.
+ */
+void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
+{
+ icmp6_send(skb, ICMPV6_PARAMPROB, code, pos);
+ kfree_skb(skb);
+}
static void icmpv6_echo_reply(struct sk_buff *skb)
{
@@ -701,7 +699,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
if (__skb_checksum_complete(skb)) {
LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n",
saddr, daddr);
- goto discard_it;
+ goto csum_error;
}
}
@@ -787,6 +785,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
kfree_skb(skb);
return 0;
+csum_error:
+ ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
discard_it:
ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
drop_no_count:
@@ -885,8 +885,14 @@ int __init icmpv6_init(void)
err = -EAGAIN;
if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
goto fail;
+
+ err = inet6_register_icmp_sender(icmp6_send);
+ if (err)
+ goto sender_reg_err;
return 0;
+sender_reg_err:
+ inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
fail:
pr_err("Failed to register ICMP6 protocol\n");
unregister_pernet_subsys(&icmpv6_sk_ops);
@@ -895,6 +901,7 @@ fail:
void icmpv6_cleanup(void)
{
+ inet6_unregister_icmp_sender(icmp6_send);
unregister_pernet_subsys(&icmpv6_sk_ops);
inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
}
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 9bfab19ff3c0..e4311cbc8b4e 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -54,6 +54,10 @@ int inet6_csk_bind_conflict(const struct sock *sk,
if (ipv6_rcv_saddr_equal(sk, sk2))
break;
}
+ if (!relax && reuse && sk2->sk_reuse &&
+ sk2->sk_state != TCP_LISTEN &&
+ ipv6_rcv_saddr_equal(sk, sk2))
+ break;
}
}
@@ -169,10 +173,8 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
sin6->sin6_port = inet_sk(sk)->inet_dport;
/* We do not store received flowlabel for TCP */
sin6->sin6_flowinfo = 0;
- sin6->sin6_scope_id = 0;
- if (sk->sk_bound_dev_if &&
- ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
- sin6->sin6_scope_id = sk->sk_bound_dev_if;
+ sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr,
+ sk->sk_bound_dev_if);
}
EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index b973ed3d06cf..46e88433ec7d 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -144,7 +144,9 @@ static void ip6_fl_gc(unsigned long dummy)
spin_lock(&ip6_fl_lock);
for (i=0; i<=FL_HASH_MASK; i++) {
- struct ip6_flowlabel *fl, **flp;
+ struct ip6_flowlabel *fl;
+ struct ip6_flowlabel __rcu **flp;
+
flp = &fl_ht[i];
while ((fl = rcu_dereference_protected(*flp,
lockdep_is_held(&ip6_fl_lock))) != NULL) {
@@ -179,7 +181,9 @@ static void __net_exit ip6_fl_purge(struct net *net)
spin_lock(&ip6_fl_lock);
for (i = 0; i <= FL_HASH_MASK; i++) {
- struct ip6_flowlabel *fl, **flp;
+ struct ip6_flowlabel *fl;
+ struct ip6_flowlabel __rcu **flp;
+
flp = &fl_ht[i];
while ((fl = rcu_dereference_protected(*flp,
lockdep_is_held(&ip6_fl_lock))) != NULL) {
@@ -506,7 +510,8 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_flowlabel_req freq;
struct ipv6_fl_socklist *sfl1=NULL;
- struct ipv6_fl_socklist *sfl, **sflp;
+ struct ipv6_fl_socklist *sfl;
+ struct ipv6_fl_socklist __rcu **sflp;
struct ip6_flowlabel *fl, *fl1 = NULL;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index e4efffe2522e..ecd60733e5e2 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -38,6 +38,7 @@
#include <net/sock.h>
#include <net/ip.h>
+#include <net/ip_tunnels.h>
#include <net/icmp.h>
#include <net/protocol.h>
#include <net/addrconf.h>
@@ -110,46 +111,6 @@ static u32 HASH_ADDR(const struct in6_addr *addr)
#define tunnels_l tunnels[1]
#define tunnels_wc tunnels[0]
-static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev,
- struct rtnl_link_stats64 *tot)
-{
- int i;
-
- for_each_possible_cpu(i) {
- const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
- u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
- unsigned int start;
-
- do {
- start = u64_stats_fetch_begin_bh(&tstats->syncp);
- rx_packets = tstats->rx_packets;
- tx_packets = tstats->tx_packets;
- rx_bytes = tstats->rx_bytes;
- tx_bytes = tstats->tx_bytes;
- } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
-
- tot->rx_packets += rx_packets;
- tot->tx_packets += tx_packets;
- tot->rx_bytes += rx_bytes;
- tot->tx_bytes += tx_bytes;
- }
-
- tot->multicast = dev->stats.multicast;
- tot->rx_crc_errors = dev->stats.rx_crc_errors;
- tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
- tot->rx_length_errors = dev->stats.rx_length_errors;
- tot->rx_frame_errors = dev->stats.rx_frame_errors;
- tot->rx_errors = dev->stats.rx_errors;
-
- tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
- tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
- tot->tx_dropped = dev->stats.tx_dropped;
- tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
- tot->tx_errors = dev->stats.tx_errors;
-
- return tot;
-}
-
/* Given src, dst and key, find appropriate for input tunnel. */
static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
@@ -667,7 +628,6 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
struct net_device_stats *stats = &tunnel->dev->stats;
int err = -1;
u8 proto;
- int pkt_len;
struct sk_buff *new_skb;
if (dev->type == ARPHRD_ETHER)
@@ -801,23 +761,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
}
}
- nf_reset(skb);
- pkt_len = skb->len;
- err = ip6_local_out(skb);
-
- if (net_xmit_eval(err) == 0) {
- struct pcpu_tstats *tstats = this_cpu_ptr(tunnel->dev->tstats);
-
- tstats->tx_bytes += pkt_len;
- tstats->tx_packets++;
- } else {
- stats->tx_errors++;
- stats->tx_aborted_errors++;
- }
-
+ ip6tunnel_xmit(skb, dev);
if (ndst)
ip6_tnl_dst_store(tunnel, ndst);
-
return 0;
tx_err_link_failure:
stats->tx_carrier_errors++;
@@ -1135,6 +1081,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
}
if (t == NULL)
t = netdev_priv(dev);
+ memset(&p, 0, sizeof(p));
ip6gre_tnl_parm_to_user(&p, &t->parms);
if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
err = -EFAULT;
@@ -1182,6 +1129,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
if (t) {
err = 0;
+ memset(&p, 0, sizeof(p));
ip6gre_tnl_parm_to_user(&p, &t->parms);
if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
err = -EFAULT;
@@ -1271,7 +1219,7 @@ static const struct net_device_ops ip6gre_netdev_ops = {
.ndo_start_xmit = ip6gre_tunnel_xmit,
.ndo_do_ioctl = ip6gre_tunnel_ioctl,
.ndo_change_mtu = ip6gre_tunnel_change_mtu,
- .ndo_get_stats64 = ip6gre_get_stats64,
+ .ndo_get_stats64 = ip_tunnel_get_stats64,
};
static void ip6gre_dev_free(struct net_device *dev)
@@ -1520,7 +1468,7 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = ip6gre_tunnel_change_mtu,
- .ndo_get_stats64 = ip6gre_get_stats64,
+ .ndo_get_stats64 = ip_tunnel_get_stats64,
};
static void ip6gre_tap_setup(struct net_device *dev)
diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
new file mode 100644
index 000000000000..4578e23834f7
--- /dev/null
+++ b/net/ipv6/ip6_icmp.c
@@ -0,0 +1,47 @@
+#include <linux/export.h>
+#include <linux/icmpv6.h>
+#include <linux/mutex.h>
+#include <linux/netdevice.h>
+#include <linux/spinlock.h>
+
+#include <net/ipv6.h>
+
+#if IS_ENABLED(CONFIG_IPV6)
+
+static ip6_icmp_send_t __rcu *ip6_icmp_send;
+
+int inet6_register_icmp_sender(ip6_icmp_send_t *fn)
+{
+ return (cmpxchg((ip6_icmp_send_t **)&ip6_icmp_send, NULL, fn) == NULL) ?
+ 0 : -EBUSY;
+}
+EXPORT_SYMBOL(inet6_register_icmp_sender);
+
+int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn)
+{
+ int ret;
+
+ ret = (cmpxchg((ip6_icmp_send_t **)&ip6_icmp_send, fn, NULL) == fn) ?
+ 0 : -EINVAL;
+
+ synchronize_net();
+
+ return ret;
+}
+EXPORT_SYMBOL(inet6_unregister_icmp_sender);
+
+void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
+{
+ ip6_icmp_send_t *send;
+
+ rcu_read_lock();
+ send = rcu_dereference(ip6_icmp_send);
+
+ if (!send)
+ goto out;
+ send(skb, type, code, info);
+out:
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL(icmpv6_send);
+#endif
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 8234c1dcdf72..71b766ee821d 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -92,14 +92,12 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
u8 *prevhdr;
int offset = 0;
- if (!(features & NETIF_F_V6_CSUM))
- features &= ~NETIF_F_SG;
-
if (unlikely(skb_shinfo(skb)->gso_type &
~(SKB_GSO_UDP |
SKB_GSO_DODGY |
SKB_GSO_TCP_ECN |
SKB_GSO_GRE |
+ SKB_GSO_UDP_TUNNEL |
SKB_GSO_TCPV6 |
0)))
goto out;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 155eccfa7760..d5d20cde8d92 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -381,9 +381,8 @@ int ip6_forward(struct sk_buff *skb)
* cannot be fragmented, because there is no warranty
* that different fragments will go along one path. --ANK
*/
- if (opt->ra) {
- u8 *ptr = skb_network_header(skb) + opt->ra;
- if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
+ if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
+ if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
return 0;
}
@@ -822,11 +821,17 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
const struct flowi6 *fl6)
{
struct ipv6_pinfo *np = inet6_sk(sk);
- struct rt6_info *rt = (struct rt6_info *)dst;
+ struct rt6_info *rt;
if (!dst)
goto out;
+ if (dst->ops->family != AF_INET6) {
+ dst_release(dst);
+ return NULL;
+ }
+
+ rt = (struct rt6_info *)dst;
/* Yes, checking route validity in not connected
* case is not very simple. Take into account,
* that we do not support routing by source, TOS,
@@ -1147,7 +1152,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
if (WARN_ON(np->cork.opt))
return -EINVAL;
- np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
+ np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
if (unlikely(np->cork.opt == NULL))
return -ENOBUFS;
@@ -1224,11 +1229,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
}
/* For UDP, check if TX timestamp is enabled */
- if (sk->sk_type == SOCK_DGRAM) {
- err = sock_tx_timestamp(sk, &tx_flags);
- if (err)
- goto error;
- }
+ if (sk->sk_type == SOCK_DGRAM)
+ sock_tx_timestamp(sk, &tx_flags);
/*
* Let's try using as much space as possible.
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index fff83cbc197f..1e55866cead7 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -47,6 +47,7 @@
#include <net/icmp.h>
#include <net/ip.h>
+#include <net/ip_tunnels.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
@@ -955,7 +956,6 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
unsigned int max_headroom = sizeof(struct ipv6hdr);
u8 proto;
int err = -1;
- int pkt_len;
if (!fl6->flowi6_mark)
dst = ip6_tnl_dst_check(t);
@@ -1035,19 +1035,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
ipv6h->nexthdr = proto;
ipv6h->saddr = fl6->saddr;
ipv6h->daddr = fl6->daddr;
- nf_reset(skb);
- pkt_len = skb->len;
- err = ip6_local_out(skb);
-
- if (net_xmit_eval(err) == 0) {
- struct pcpu_tstats *tstats = this_cpu_ptr(t->dev->tstats);
-
- tstats->tx_bytes += pkt_len;
- tstats->tx_packets++;
- } else {
- stats->tx_errors++;
- stats->tx_aborted_errors++;
- }
+ ip6tunnel_xmit(skb, dev);
if (ndst)
ip6_tnl_dst_store(t, ndst);
return 0;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 96bfb4e4b820..241fb8ad9fcf 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -842,9 +842,9 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
if (ipv6_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
nlh->nlmsg_type = NLMSG_ERROR;
- nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
+ nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
skb_trim(skb, nlh->nlmsg_len);
- ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
+ ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
} else
kfree_skb(skb);
@@ -1100,13 +1100,13 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
if (ipv6_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
- if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
+ if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
} else {
nlh->nlmsg_type = NLMSG_ERROR;
- nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
+ nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
skb_trim(skb, nlh->nlmsg_len);
- ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
+ ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
}
rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
} else
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 76ef4353d518..ca4ffcc287f1 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -610,8 +610,6 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
}
}
#endif
- if (!dev->addr_len)
- send_sllao = 0;
if (send_sllao)
optlen += ndisc_opt_addr_space(dev);
@@ -1495,7 +1493,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
*/
if (ha)
- ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, ha);
+ ndisc_fill_addr_option(buff, ND_OPT_TARGET_LL_ADDR, ha);
/*
* build redirect option and copy skb over to the new packet.
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 429089cb073d..95f3f1da0d7f 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -1,9 +1,16 @@
+/*
+ * IPv6 specific functions of netfilter core
+ *
+ * Rusty Russell (C) 2000 -- This code is GPL.
+ * Patrick McHardy (C) 2006-2012
+ */
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/ipv6.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>
#include <linux/export.h>
+#include <net/addrconf.h>
#include <net/dst.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
@@ -29,7 +36,7 @@ int ip6_route_me_harder(struct sk_buff *skb)
IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
dst_release(dst);
- return -EINVAL;
+ return dst->error;
}
/* Drop old route. */
@@ -43,7 +50,7 @@ int ip6_route_me_harder(struct sk_buff *skb)
skb_dst_set(skb, NULL);
dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), skb->sk, 0);
if (IS_ERR(dst))
- return -1;
+ return PTR_ERR(dst);
skb_dst_set(skb, dst);
}
#endif
@@ -53,7 +60,7 @@ int ip6_route_me_harder(struct sk_buff *skb)
if (skb_headroom(skb) < hh_len &&
pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),
0, GFP_ATOMIC))
- return -1;
+ return -ENOMEM;
return 0;
}
@@ -180,6 +187,10 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
return csum;
};
+static const struct nf_ipv6_ops ipv6ops = {
+ .chk_addr = ipv6_chk_addr,
+};
+
static const struct nf_afinfo nf_ip6_afinfo = {
.family = AF_INET6,
.checksum = nf_ip6_checksum,
@@ -192,6 +203,7 @@ static const struct nf_afinfo nf_ip6_afinfo = {
int __init ipv6_netfilter_init(void)
{
+ RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops);
return nf_register_afinfo(&nf_ip6_afinfo);
}
@@ -200,5 +212,6 @@ int __init ipv6_netfilter_init(void)
*/
void ipv6_netfilter_fini(void)
{
+ RCU_INIT_POINTER(nf_ipv6_ops, NULL);
nf_unregister_afinfo(&nf_ip6_afinfo);
}
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index c72532a60d88..4433ab40e7de 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -105,7 +105,7 @@ config IP6_NF_MATCH_MH
config IP6_NF_MATCH_RPFILTER
tristate '"rpfilter" reverse path filter match support'
- depends on NETFILTER_ADVANCED
+ depends on NETFILTER_ADVANCED && (IP6_NF_MANGLE || IP6_NF_RAW)
---help---
This option allows you to match packets whose replies would
go out via the interface the packet came in.
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 341b54ade72c..44400c216dc6 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -3,6 +3,7 @@
*
* Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
* Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
+ * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -284,6 +285,7 @@ static void trace_packet(const struct sk_buff *skb,
const char *hookname, *chainname, *comment;
const struct ip6t_entry *iter;
unsigned int rulenum = 0;
+ struct net *net = dev_net(in ? in : out);
table_base = private->entries[smp_processor_id()];
root = get_entry(table_base, private->hook_entry[hook]);
@@ -296,7 +298,7 @@ static void trace_packet(const struct sk_buff *skb,
&chainname, &comment, &rulenum) != 0)
break;
- nf_log_packet(AF_INET6, hook, skb, in, out, &trace_loginfo,
+ nf_log_packet(net, AF_INET6, hook, skb, in, out, &trace_loginfo,
"TRACE: %s:%s:%s:%u ",
tablename, chainname, comment, rulenum);
}
diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c
index cb631143721c..590f767db5d4 100644
--- a/net/ipv6/netfilter/ip6t_NPT.c
+++ b/net/ipv6/netfilter/ip6t_NPT.c
@@ -18,9 +18,8 @@
static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
{
struct ip6t_npt_tginfo *npt = par->targinfo;
- __wsum src_sum = 0, dst_sum = 0;
struct in6_addr pfx;
- unsigned int i;
+ __wsum src_sum, dst_sum;
if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64)
return -EINVAL;
@@ -33,12 +32,8 @@ static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
if (!ipv6_addr_equal(&pfx, &npt->dst_pfx.in6))
return -EINVAL;
- for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) {
- src_sum = csum_add(src_sum,
- (__force __wsum)npt->src_pfx.in6.s6_addr16[i]);
- dst_sum = csum_add(dst_sum,
- (__force __wsum)npt->dst_pfx.in6.s6_addr16[i]);
- }
+ src_sum = csum_partial(&npt->src_pfx.in6, sizeof(npt->src_pfx.in6), 0);
+ dst_sum = csum_partial(&npt->dst_pfx.in6, sizeof(npt->dst_pfx.in6), 0);
npt->adjustment = ~csum_fold(csum_sub(src_sum, dst_sum));
return 0;
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index ed3b427b2841..70f9abc0efe9 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -7,6 +7,8 @@
* Authors:
* Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
*
+ * Copyright (c) 2005-2007 Patrick McHardy <kaber@trash.net>
+ *
* Based on net/ipv4/netfilter/ipt_REJECT.c
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 6134a1ebfb1b..e075399d8b72 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -38,7 +38,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
struct in6_addr saddr, daddr;
u_int8_t hop_limit;
u_int32_t flowlabel, mark;
-
+ int err;
#if 0
/* root is playing with raw sockets. */
if (skb->len < sizeof(struct iphdr) ||
@@ -65,8 +65,11 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) ||
skb->mark != mark ||
ipv6_hdr(skb)->hop_limit != hop_limit ||
- flowlabel != *((u_int32_t *)ipv6_hdr(skb))))
- return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP;
+ flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) {
+ err = ip6_route_me_harder(skb);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
return ret;
}
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index e0e788d25b14..6383f90efda8 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -179,6 +179,7 @@ nf_nat_ipv6_out(unsigned int hooknum,
#ifdef CONFIG_XFRM
const struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
+ int err;
#endif
unsigned int ret;
@@ -197,9 +198,11 @@ nf_nat_ipv6_out(unsigned int hooknum,
&ct->tuplehash[!dir].tuple.dst.u3) ||
(ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
ct->tuplehash[dir].tuple.src.u.all !=
- ct->tuplehash[!dir].tuple.dst.u.all))
- if (nf_xfrm_me_harder(skb, AF_INET6) < 0)
- ret = NF_DROP;
+ ct->tuplehash[!dir].tuple.dst.u.all)) {
+ err = nf_xfrm_me_harder(skb, AF_INET6);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
}
#endif
return ret;
@@ -215,6 +218,7 @@ nf_nat_ipv6_local_fn(unsigned int hooknum,
const struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
unsigned int ret;
+ int err;
/* root is playing with raw sockets. */
if (skb->len < sizeof(struct ipv6hdr))
@@ -227,16 +231,19 @@ nf_nat_ipv6_local_fn(unsigned int hooknum,
if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
&ct->tuplehash[!dir].tuple.src.u3)) {
- if (ip6_route_me_harder(skb))
- ret = NF_DROP;
+ err = ip6_route_me_harder(skb);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
}
#ifdef CONFIG_XFRM
else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
ct->tuplehash[dir].tuple.dst.u.all !=
- ct->tuplehash[!dir].tuple.src.u.all)
- if (nf_xfrm_me_harder(skb, AF_INET6))
- ret = NF_DROP;
+ ct->tuplehash[!dir].tuple.src.u.all) {
+ err = nf_xfrm_me_harder(skb, AF_INET6);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
#endif
}
return ret;
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 2b6c226f5198..c9b6a6e6a1e8 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -204,7 +204,7 @@ static unsigned int __ipv6_conntrack_in(struct net *net,
if (ct != NULL && !nf_ct_is_untracked(ct)) {
help = nfct_help(ct);
if ((help && help->helper) || !nf_ct_is_confirmed(ct)) {
- nf_conntrack_get_reasm(skb);
+ nf_conntrack_get_reasm(reasm);
NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm,
(struct net_device *)in,
(struct net_device *)out,
@@ -330,12 +330,8 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
sizeof(sin6.sin6_addr));
nf_ct_put(ct);
-
- if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
- sin6.sin6_scope_id = sk->sk_bound_dev_if;
- else
- sin6.sin6_scope_id = 0;
-
+ sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr,
+ sk->sk_bound_dev_if);
return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
}
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 24df3dde0076..b3807c5cb888 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -131,7 +131,8 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
type + 128);
nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6))
- nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(nf_ct_net(ct), PF_INET6, 0, skb, NULL,
+ NULL, NULL,
"nf_ct_icmpv6: invalid new with type %d ",
type + 128);
return false;
@@ -203,7 +204,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
if (icmp6h == NULL) {
if (LOG_INVALID(net, IPPROTO_ICMPV6))
- nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
"nf_ct_icmpv6: short packet ");
return -NF_ACCEPT;
}
@@ -211,7 +212,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
if (LOG_INVALID(net, IPPROTO_ICMPV6))
- nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
"nf_ct_icmpv6: ICMPv6 checksum failed ");
return -NF_ACCEPT;
}
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 6700069949dd..dffdc1a389c5 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -41,6 +41,7 @@
#include <net/rawv6.h>
#include <net/ndisc.h>
#include <net/addrconf.h>
+#include <net/inet_ecn.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
#include <linux/sysctl.h>
#include <linux/netfilter.h>
@@ -138,6 +139,11 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
}
#endif
+static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
+{
+ return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
+}
+
static unsigned int nf_hashfn(struct inet_frag_queue *q)
{
const struct frag_queue *nq;
@@ -166,7 +172,7 @@ static void nf_ct_frag6_expire(unsigned long data)
/* Creation primitives. */
static inline struct frag_queue *fq_find(struct net *net, __be32 id,
u32 user, struct in6_addr *src,
- struct in6_addr *dst)
+ struct in6_addr *dst, u8 ecn)
{
struct inet_frag_queue *q;
struct ip6_create_arg arg;
@@ -176,6 +182,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
arg.user = user;
arg.src = src;
arg.dst = dst;
+ arg.ecn = ecn;
read_lock_bh(&nf_frags.lock);
hash = inet6_hash_frag(id, src, dst, nf_frags.rnd);
@@ -196,6 +203,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
struct sk_buff *prev, *next;
unsigned int payload_len;
int offset, end;
+ u8 ecn;
if (fq->q.last_in & INET_FRAG_COMPLETE) {
pr_debug("Already completed\n");
@@ -213,6 +221,8 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
return -1;
}
+ ecn = ip6_frag_ecn(ipv6_hdr(skb));
+
if (skb->ip_summed == CHECKSUM_COMPLETE) {
const unsigned char *nh = skb_network_header(skb);
skb->csum = csum_sub(skb->csum,
@@ -317,6 +327,7 @@ found:
}
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
+ fq->ecn |= ecn;
if (payload_len > fq->q.max_size)
fq->q.max_size = payload_len;
add_frag_mem_limit(&fq->q, skb->truesize);
@@ -352,12 +363,17 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
{
struct sk_buff *fp, *op, *head = fq->q.fragments;
int payload_len;
+ u8 ecn;
inet_frag_kill(&fq->q, &nf_frags);
WARN_ON(head == NULL);
WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
+ ecn = ip_frag_ecn_table[fq->ecn];
+ if (unlikely(ecn == 0xff))
+ goto out_fail;
+
/* Unfragmented part is taken from the first segment. */
payload_len = ((head->data - skb_network_header(head)) -
sizeof(struct ipv6hdr) + fq->q.len -
@@ -428,6 +444,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
head->dev = dev;
head->tstamp = fq->q.stamp;
ipv6_hdr(head)->payload_len = htons(payload_len);
+ ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
/* Yes, and fold redundant checksum back. 8) */
@@ -572,7 +589,8 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false);
local_bh_enable();
- fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr);
+ fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
+ ip6_frag_ecn(hdr));
if (fq == NULL) {
pr_debug("Can't find and can't create new queue\n");
goto ret_orig;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index bbbe53a99b57..51c3285b5d9b 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -90,6 +90,7 @@ static const struct snmp_mib snmp6_ipstats_list[] = {
SNMP_MIB_ITEM("Ip6OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS),
SNMP_MIB_ITEM("Ip6InBcastOctets", IPSTATS_MIB_INBCASTOCTETS),
SNMP_MIB_ITEM("Ip6OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS),
+ /* IPSTATS_MIB_CSUMERRORS is not relevant in IPv6 (no checksum) */
SNMP_MIB_SENTINEL
};
@@ -99,6 +100,7 @@ static const struct snmp_mib snmp6_icmp6_list[] = {
SNMP_MIB_ITEM("Icmp6InErrors", ICMP6_MIB_INERRORS),
SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS),
SNMP_MIB_ITEM("Icmp6OutErrors", ICMP6_MIB_OUTERRORS),
+ SNMP_MIB_ITEM("Icmp6InCsumErrors", ICMP6_MIB_CSUMERRORS),
SNMP_MIB_SENTINEL
};
@@ -129,6 +131,7 @@ static const struct snmp_mib snmp6_udp6_list[] = {
SNMP_MIB_ITEM("Udp6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
SNMP_MIB_ITEM("Udp6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS),
+ SNMP_MIB_ITEM("Udp6InCsumErrors", UDP_MIB_CSUMERRORS),
SNMP_MIB_SENTINEL
};
@@ -139,6 +142,7 @@ static const struct snmp_mib snmp6_udplite6_list[] = {
SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
SNMP_MIB_ITEM("UdpLite6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
SNMP_MIB_ITEM("UdpLite6SndbufErrors", UDP_MIB_SNDBUFERRORS),
+ SNMP_MIB_ITEM("UdpLite6InCsumErrors", UDP_MIB_CSUMERRORS),
SNMP_MIB_SENTINEL
};
@@ -247,7 +251,7 @@ static int snmp6_dev_seq_show(struct seq_file *seq, void *v)
static int snmp6_dev_seq_open(struct inode *inode, struct file *file)
{
- return single_open(file, snmp6_dev_seq_show, PDE(inode)->data);
+ return single_open(file, snmp6_dev_seq_show, PDE_DATA(inode));
}
static const struct file_operations snmp6_dev_seq_fops = {
@@ -287,8 +291,7 @@ int snmp6_unregister_dev(struct inet6_dev *idev)
return -ENOENT;
if (!idev->stats.proc_dir_entry)
return -EINVAL;
- remove_proc_entry(idev->stats.proc_dir_entry->name,
- net->mib.proc_net_devsnmp6);
+ proc_remove(idev->stats.proc_dir_entry);
idev->stats.proc_dir_entry = NULL;
return 0;
}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 330b5e7b7df6..eedff8ccded5 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -263,7 +263,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (addr_type != IPV6_ADDR_ANY) {
struct net_device *dev = NULL;
- if (addr_type & IPV6_ADDR_LINKLOCAL) {
+ if (__ipv6_addr_needs_scope_id(addr_type)) {
if (addr_len >= sizeof(struct sockaddr_in6) &&
addr->sin6_scope_id) {
/* Override any existing binding, if another
@@ -498,9 +498,8 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
sin6->sin6_port = 0;
sin6->sin6_addr = ipv6_hdr(skb)->saddr;
sin6->sin6_flowinfo = 0;
- sin6->sin6_scope_id = 0;
- if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
- sin6->sin6_scope_id = IP6CB(skb)->iif;
+ sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr,
+ IP6CB(skb)->iif);
}
sock_recv_ts_and_drops(msg, sk, skb);
@@ -802,7 +801,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
if (addr_len >= sizeof(struct sockaddr_in6) &&
sin6->sin6_scope_id &&
- ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
+ __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr)))
fl6.flowi6_oif = sin6->sin6_scope_id;
} else {
if (sk->sk_state != TCP_ESTABLISHED)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 0ba10e53a629..790d9f4b8b0b 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -58,6 +58,7 @@
#include <net/ndisc.h>
#include <net/addrconf.h>
#include <net/inet_frag.h>
+#include <net/inet_ecn.h>
struct ip6frag_skb_cb
{
@@ -67,6 +68,10 @@ struct ip6frag_skb_cb
#define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb))
+static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
+{
+ return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
+}
static struct inet_frags ip6_frags;
@@ -119,6 +124,7 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a)
fq->user = arg->user;
fq->saddr = *arg->src;
fq->daddr = *arg->dst;
+ fq->ecn = arg->ecn;
}
EXPORT_SYMBOL(ip6_frag_init);
@@ -173,7 +179,8 @@ static void ip6_frag_expire(unsigned long data)
}
static __inline__ struct frag_queue *
-fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6_addr *dst)
+fq_find(struct net *net, __be32 id, const struct in6_addr *src,
+ const struct in6_addr *dst, u8 ecn)
{
struct inet_frag_queue *q;
struct ip6_create_arg arg;
@@ -183,6 +190,7 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6
arg.user = IP6_DEFRAG_LOCAL_DELIVER;
arg.src = src;
arg.dst = dst;
+ arg.ecn = ecn;
read_lock(&ip6_frags.lock);
hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd);
@@ -202,6 +210,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
struct net_device *dev;
int offset, end;
struct net *net = dev_net(skb_dst(skb)->dev);
+ u8 ecn;
if (fq->q.last_in & INET_FRAG_COMPLETE)
goto err;
@@ -219,6 +228,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
return -1;
}
+ ecn = ip6_frag_ecn(ipv6_hdr(skb));
+
if (skb->ip_summed == CHECKSUM_COMPLETE) {
const unsigned char *nh = skb_network_header(skb);
skb->csum = csum_sub(skb->csum,
@@ -319,6 +330,7 @@ found:
}
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
+ fq->ecn |= ecn;
add_frag_mem_limit(&fq->q, skb->truesize);
/* The first fragment.
@@ -370,9 +382,14 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
int payload_len;
unsigned int nhoff;
int sum_truesize;
+ u8 ecn;
inet_frag_kill(&fq->q, &ip6_frags);
+ ecn = ip_frag_ecn_table[fq->ecn];
+ if (unlikely(ecn == 0xff))
+ goto out_fail;
+
/* Make the one we just received the head. */
if (prev) {
head = prev->next;
@@ -471,6 +488,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
head->dev = dev;
head->tstamp = fq->q.stamp;
ipv6_hdr(head)->payload_len = htons(payload_len);
+ ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
IP6CB(head)->nhoff = nhoff;
/* Yes, and fold redundant checksum back. 8) */
@@ -534,7 +552,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_REASMFAILS, evicted);
- fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr);
+ fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
+ ip6_frag_ecn(hdr));
if (fq != NULL) {
int ret;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e5fe0041adfa..ad0aa6b0b86a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2355,7 +2355,7 @@ beginning:
return last_err;
}
-static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
{
struct fib6_config cfg;
int err;
@@ -2370,7 +2370,7 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a
return ip6_route_del(&cfg);
}
-static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
{
struct fib6_config cfg;
int err;
@@ -2562,7 +2562,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
prefix, 0, NLM_F_MULTI);
}
-static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
+static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
{
struct net *net = sock_net(in_skb->sk);
struct nlattr *tb[RTA_MAX+1];
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 02f96dcbcf02..335363478bbf 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -49,7 +49,7 @@
#include <net/ip.h>
#include <net/udp.h>
#include <net/icmp.h>
-#include <net/ipip.h>
+#include <net/ip_tunnels.h>
#include <net/inet_ecn.h>
#include <net/xfrm.h>
#include <net/dsfield.h>
@@ -87,41 +87,6 @@ struct sit_net {
struct net_device *fb_tunnel_dev;
};
-static struct rtnl_link_stats64 *ipip6_get_stats64(struct net_device *dev,
- struct rtnl_link_stats64 *tot)
-{
- int i;
-
- for_each_possible_cpu(i) {
- const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
- u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
- unsigned int start;
-
- do {
- start = u64_stats_fetch_begin_bh(&tstats->syncp);
- rx_packets = tstats->rx_packets;
- tx_packets = tstats->tx_packets;
- rx_bytes = tstats->rx_bytes;
- tx_bytes = tstats->tx_bytes;
- } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
-
- tot->rx_packets += rx_packets;
- tot->tx_packets += tx_packets;
- tot->rx_bytes += rx_bytes;
- tot->tx_bytes += tx_bytes;
- }
-
- tot->rx_errors = dev->stats.rx_errors;
- tot->rx_frame_errors = dev->stats.rx_frame_errors;
- tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
- tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
- tot->tx_dropped = dev->stats.tx_dropped;
- tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
- tot->tx_errors = dev->stats.tx_errors;
-
- return tot;
-}
-
/*
* Must be invoked with rcu_read_lock
*/
@@ -899,6 +864,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
if ((iph->ttl = tiph->ttl) == 0)
iph->ttl = iph6->hop_limit;
+ skb->ip_summed = CHECKSUM_NONE;
+ ip_select_ident(iph, skb_dst(skb), NULL);
iptunnel_xmit(skb, dev);
return NETDEV_TX_OK;
@@ -1200,7 +1167,7 @@ static const struct net_device_ops ipip6_netdev_ops = {
.ndo_start_xmit = ipip6_tunnel_xmit,
.ndo_do_ioctl = ipip6_tunnel_ioctl,
.ndo_change_mtu = ipip6_tunnel_change_mtu,
- .ndo_get_stats64= ipip6_get_stats64,
+ .ndo_get_stats64 = ip_tunnel_get_stats64,
};
static void ipip6_dev_free(struct net_device *dev)
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 8a0848b60b35..d5dda20bd717 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -149,7 +149,6 @@ static inline int cookie_check(const struct sk_buff *skb, __u32 cookie)
struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
{
struct tcp_options_received tcp_opt;
- const u8 *hash_location;
struct inet_request_sock *ireq;
struct inet6_request_sock *ireq6;
struct tcp_request_sock *treq;
@@ -177,7 +176,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
- tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL);
+ tcp_parse_options(skb, &tcp_opt, 0, NULL);
if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok))
goto out;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 46a5be85be87..0a17ed9eaf39 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -462,7 +462,6 @@ out:
static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
struct flowi6 *fl6,
struct request_sock *req,
- struct request_values *rvp,
u16 queue_mapping)
{
struct inet6_request_sock *treq = inet6_rsk(req);
@@ -474,7 +473,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
goto done;
- skb = tcp_make_synack(sk, dst, req, rvp, NULL);
+ skb = tcp_make_synack(sk, dst, req, NULL);
if (skb) {
__tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
@@ -489,13 +488,12 @@ done:
return err;
}
-static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
- struct request_values *rvp)
+static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)
{
struct flowi6 fl6;
int res;
- res = tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0);
+ res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0);
if (!res)
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
return res;
@@ -948,9 +946,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
*/
static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
- struct tcp_extend_values tmp_ext;
struct tcp_options_received tmp_opt;
- const u8 *hash_location;
struct request_sock *req;
struct inet6_request_sock *treq;
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -988,50 +984,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
tmp_opt.user_mss = tp->rx_opt.user_mss;
- tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
-
- if (tmp_opt.cookie_plus > 0 &&
- tmp_opt.saw_tstamp &&
- !tp->rx_opt.cookie_out_never &&
- (sysctl_tcp_cookie_size > 0 ||
- (tp->cookie_values != NULL &&
- tp->cookie_values->cookie_desired > 0))) {
- u8 *c;
- u32 *d;
- u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
- int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
-
- if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
- goto drop_and_free;
-
- /* Secret recipe starts with IP addresses */
- d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0];
- *mess++ ^= *d++;
- *mess++ ^= *d++;
- *mess++ ^= *d++;
- *mess++ ^= *d++;
- d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0];
- *mess++ ^= *d++;
- *mess++ ^= *d++;
- *mess++ ^= *d++;
- *mess++ ^= *d++;
-
- /* plus variable length Initiator Cookie */
- c = (u8 *)mess;
- while (l-- > 0)
- *c++ ^= *hash_location++;
-
- want_cookie = false; /* not our kind of cookie */
- tmp_ext.cookie_out_never = 0; /* false */
- tmp_ext.cookie_plus = tmp_opt.cookie_plus;
- } else if (!tp->rx_opt.cookie_in_always) {
- /* redundant indications, but ensure initialization. */
- tmp_ext.cookie_out_never = 1; /* true */
- tmp_ext.cookie_plus = 0;
- } else {
- goto drop_and_free;
- }
- tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
+ tcp_parse_options(skb, &tmp_opt, 0, NULL);
if (want_cookie && !tmp_opt.saw_tstamp)
tcp_clear_options(&tmp_opt);
@@ -1109,7 +1062,6 @@ have_isn:
goto drop_and_release;
if (tcp_v6_send_synack(sk, dst, &fl6, req,
- (struct request_values *)&tmp_ext,
skb_get_queue_mapping(skb)) ||
want_cookie)
goto drop_and_free;
@@ -1453,6 +1405,7 @@ discard:
kfree_skb(skb);
return 0;
csum_err:
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
goto discard;
@@ -1514,7 +1467,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
goto discard_it;
if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
- goto bad_packet;
+ goto csum_error;
th = tcp_hdr(skb);
hdr = ipv6_hdr(skb);
@@ -1578,6 +1531,8 @@ no_tcp_socket:
goto discard_it;
if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
+csum_error:
+ TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
bad_packet:
TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
} else {
@@ -1585,11 +1540,6 @@ bad_packet:
}
discard_it:
-
- /*
- * Discard frame
- */
-
kfree_skb(skb);
return 0;
@@ -1603,10 +1553,13 @@ do_time_wait:
goto discard_it;
}
- if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
- TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
+ if (skb->len < (th->doff<<2)) {
inet_twsk_put(inet_twsk(sk));
- goto discard_it;
+ goto bad_packet;
+ }
+ if (tcp_checksum_complete(skb)) {
+ inet_twsk_put(inet_twsk(sk));
+ goto csum_error;
}
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
@@ -1937,6 +1890,17 @@ void tcp6_proc_exit(struct net *net)
}
#endif
+static void tcp_v6_clear_sk(struct sock *sk, int size)
+{
+ struct inet_sock *inet = inet_sk(sk);
+
+ /* we do not want to clear pinet6 field, because of RCU lookups */
+ sk_prot_clear_nulls(sk, offsetof(struct inet_sock, pinet6));
+
+ size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6);
+ memset(&inet->pinet6 + 1, 0, size);
+}
+
struct proto tcpv6_prot = {
.name = "TCPv6",
.owner = THIS_MODULE,
@@ -1980,6 +1944,7 @@ struct proto tcpv6_prot = {
#ifdef CONFIG_MEMCG_KMEM
.proto_cgroup = tcp_proto_cgroup,
#endif
+ .clear_sk = tcp_v6_clear_sk,
};
static const struct inet6_protocol tcpv6_protocol = {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index d8e5e852fc7a..42923b14dfa6 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -450,15 +450,16 @@ try_again:
sin6->sin6_family = AF_INET6;
sin6->sin6_port = udp_hdr(skb)->source;
sin6->sin6_flowinfo = 0;
- sin6->sin6_scope_id = 0;
- if (is_udp4)
+ if (is_udp4) {
ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
&sin6->sin6_addr);
- else {
+ sin6->sin6_scope_id = 0;
+ } else {
sin6->sin6_addr = ipv6_hdr(skb)->saddr;
- if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
- sin6->sin6_scope_id = IP6CB(skb)->iif;
+ sin6->sin6_scope_id =
+ ipv6_iface_scope_id(&sin6->sin6_addr,
+ IP6CB(skb)->iif);
}
}
@@ -482,12 +483,17 @@ out:
csum_copy_err:
slow = lock_sock_fast(sk);
if (!skb_kill_datagram(sk, skb, flags)) {
- if (is_udp4)
+ if (is_udp4) {
+ UDP_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_CSUMERRORS, is_udplite);
UDP_INC_STATS_USER(sock_net(sk),
UDP_MIB_INERRORS, is_udplite);
- else
+ } else {
+ UDP6_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_CSUMERRORS, is_udplite);
UDP6_INC_STATS_USER(sock_net(sk),
UDP_MIB_INERRORS, is_udplite);
+ }
}
unlock_sock_fast(sk, slow);
@@ -636,7 +642,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
if (rcu_access_pointer(sk->sk_filter)) {
if (udp_lib_checksum_complete(skb))
- goto drop;
+ goto csum_error;
}
if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf))
@@ -655,6 +661,8 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
bh_unlock_sock(sk);
return rc;
+csum_error:
+ UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
drop:
UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
atomic_inc(&sk->sk_drops);
@@ -816,7 +824,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
}
if (udp6_csum_init(skb, uh, proto))
- goto discard;
+ goto csum_error;
/*
* Multicast receive code
@@ -849,7 +857,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
goto discard;
if (udp_lib_checksum_complete(skb))
- goto discard;
+ goto csum_error;
UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
@@ -866,7 +874,9 @@ short_packet:
skb->len,
daddr,
ntohs(uh->dest));
-
+ goto discard;
+csum_error:
+ UDP6_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
discard:
UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
kfree_skb(skb);
@@ -1118,7 +1128,7 @@ do_udp_sendmsg:
if (addr_len >= sizeof(struct sockaddr_in6) &&
sin6->sin6_scope_id &&
- ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
+ __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr)))
fl6.flowi6_oif = sin6->sin6_scope_id;
} else {
if (sk->sk_state != TCP_ESTABLISHED)
@@ -1422,6 +1432,17 @@ void udp6_proc_exit(struct net *net) {
}
#endif /* CONFIG_PROC_FS */
+void udp_v6_clear_sk(struct sock *sk, int size)
+{
+ struct inet_sock *inet = inet_sk(sk);
+
+ /* we do not want to clear pinet6 field, because of RCU lookups */
+ sk_prot_clear_portaddr_nulls(sk, offsetof(struct inet_sock, pinet6));
+
+ size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6);
+ memset(&inet->pinet6 + 1, 0, size);
+}
+
/* ------------------------------------------------------------------------ */
struct proto udpv6_prot = {
@@ -1452,7 +1473,7 @@ struct proto udpv6_prot = {
.compat_setsockopt = compat_udpv6_setsockopt,
.compat_getsockopt = compat_udpv6_getsockopt,
#endif
- .clear_sk = sk_prot_clear_portaddr_nulls,
+ .clear_sk = udp_v6_clear_sk,
};
static struct inet_protosw udpv6_protosw = {
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index d7571046bfc4..4691ed50a928 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -31,6 +31,8 @@ extern int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
extern int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb);
extern void udpv6_destroy_sock(struct sock *sk);
+extern void udp_v6_clear_sk(struct sock *sk, int size);
+
#ifdef CONFIG_PROC_FS
extern int udp6_seq_show(struct seq_file *seq, void *v);
#endif
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index cf05cf073c51..d3cfaf9c7a08 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -21,6 +21,10 @@ static int udp6_ufo_send_check(struct sk_buff *skb)
const struct ipv6hdr *ipv6h;
struct udphdr *uh;
+ /* UDP Tunnel offload on ipv6 is not yet supported. */
+ if (skb->encapsulation)
+ return -EINVAL;
+
if (!pskb_may_pull(skb, sizeof(*uh)))
return -EINVAL;
@@ -42,11 +46,12 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
unsigned int mss;
unsigned int unfrag_ip6hlen, unfrag_len;
struct frag_hdr *fptr;
- u8 *mac_start, *prevhdr;
+ u8 *packet_start, *prevhdr;
u8 nexthdr;
u8 frag_hdr_sz = sizeof(struct frag_hdr);
int offset;
__wsum csum;
+ int tnl_hlen;
mss = skb_shinfo(skb)->gso_size;
if (unlikely(skb->len <= mss))
@@ -56,7 +61,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
/* Packet is from an untrusted source, reset gso_segs. */
int type = skb_shinfo(skb)->gso_type;
- if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
+ if (unlikely(type & ~(SKB_GSO_UDP |
+ SKB_GSO_DODGY |
+ SKB_GSO_UDP_TUNNEL |
SKB_GSO_GRE) ||
!(type & (SKB_GSO_UDP))))
goto out;
@@ -77,9 +84,11 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
skb->ip_summed = CHECKSUM_NONE;
/* Check if there is enough headroom to insert fragment header. */
- if ((skb_mac_header(skb) < skb->head + frag_hdr_sz) &&
- pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC))
- goto out;
+ tnl_hlen = skb_tnl_header_len(skb);
+ if (skb_headroom(skb) < (tnl_hlen + frag_hdr_sz)) {
+ if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz))
+ goto out;
+ }
/* Find the unfragmentable header and shift it left by frag_hdr_sz
* bytes to insert fragment header.
@@ -87,11 +96,12 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
nexthdr = *prevhdr;
*prevhdr = NEXTHDR_FRAGMENT;
- unfrag_len = skb_network_header(skb) - skb_mac_header(skb) +
- unfrag_ip6hlen;
- mac_start = skb_mac_header(skb);
- memmove(mac_start-frag_hdr_sz, mac_start, unfrag_len);
+ unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
+ unfrag_ip6hlen + tnl_hlen;
+ packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset;
+ memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len);
+ SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz;
skb->mac_header -= frag_hdr_sz;
skb->network_header -= frag_hdr_sz;
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 1d08e21d9f69..dfcc4be46898 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -56,7 +56,7 @@ struct proto udplitev6_prot = {
.compat_setsockopt = compat_udpv6_setsockopt,
.compat_getsockopt = compat_udpv6_getsockopt,
#endif
- .clear_sk = sk_prot_clear_portaddr_nulls,
+ .clear_sk = udp_v6_clear_sk,
};
static struct inet_protosw udplite6_protosw = {
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 9bf6a74a71d2..4770d515c2c8 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -49,8 +49,11 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
sizeof(top_iph->flow_lbl));
top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family);
- dsfield = XFRM_MODE_SKB_CB(skb)->tos;
- dsfield = INET_ECN_encapsulate(dsfield, dsfield);
+ if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
+ dsfield = 0;
+ else
+ dsfield = XFRM_MODE_SKB_CB(skb)->tos;
+ dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos);
if (x->props.flags & XFRM_STATE_NOECN)
dsfield &= ~INET_ECN_MASK;
ipv6_change_dsfield(top_iph, 0, dsfield);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 4ef7bdb65440..23ed03d786c8 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -103,8 +103,10 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
dev_hold(dev);
xdst->u.rt6.rt6i_idev = in6_dev_get(dev);
- if (!xdst->u.rt6.rt6i_idev)
+ if (!xdst->u.rt6.rt6i_idev) {
+ dev_put(dev);
return -ENODEV;
+ }
rt6_transfer_peer(&xdst->u.rt6, rt);
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index e493b3397ae3..0578d4fa00a9 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -305,8 +305,7 @@ static void irda_connect_response(struct irda_sock *self)
IRDA_DEBUG(2, "%s()\n", __func__);
- skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER,
- GFP_ATOMIC);
+ skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER, GFP_KERNEL);
if (skb == NULL) {
IRDA_DEBUG(0, "%s() Unable to allocate sk_buff!\n",
__func__);
@@ -1120,7 +1119,7 @@ static int irda_create(struct net *net, struct socket *sock, int protocol,
}
/* Allocate networking socket */
- sk = sk_alloc(net, PF_IRDA, GFP_ATOMIC, &irda_proto);
+ sk = sk_alloc(net, PF_IRDA, GFP_KERNEL, &irda_proto);
if (sk == NULL)
return -ENOMEM;
diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c
index 52079f19bbbe..b797daac063c 100644
--- a/net/irda/ircomm/ircomm_core.c
+++ b/net/irda/ircomm/ircomm_core.c
@@ -117,7 +117,7 @@ struct ircomm_cb *ircomm_open(notify_t *notify, __u8 service_type, int line)
IRDA_ASSERT(ircomm != NULL, return NULL;);
- self = kzalloc(sizeof(struct ircomm_cb), GFP_ATOMIC);
+ self = kzalloc(sizeof(struct ircomm_cb), GFP_KERNEL);
if (self == NULL)
return NULL;
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 362ba47968e4..41ac7938268b 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -328,7 +328,7 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self,
spin_unlock_irqrestore(&port->lock, flags);
while (1) {
- if (tty->termios.c_cflag & CBAUD)
+ if (C_BAUD(tty) && test_bit(ASYNCB_INITIALIZED, &port->flags))
tty_port_raise_dtr_rts(port);
set_current_state(TASK_INTERRUPTIBLE);
diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c
index edab393e0c82..a2a508f5f268 100644
--- a/net/irda/ircomm/ircomm_tty_attach.c
+++ b/net/irda/ircomm/ircomm_tty_attach.c
@@ -997,12 +997,8 @@ static int ircomm_tty_state_ready(struct ircomm_tty_cb *self,
self->settings.dce = IRCOMM_DELTA_CD;
ircomm_tty_check_modem_status(self);
} else {
- struct tty_struct *tty = tty_port_tty_get(&self->port);
IRDA_DEBUG(0, "%s(), hanging up!\n", __func__ );
- if (tty) {
- tty_hangup(tty);
- tty_kref_put(tty);
- }
+ tty_port_tty_hangup(&self->port, false);
}
break;
default:
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
index 8c004161a843..9ea0c933b9ff 100644
--- a/net/irda/irlap_frame.c
+++ b/net/irda/irlap_frame.c
@@ -544,7 +544,7 @@ static void irlap_recv_discovery_xid_cmd(struct irlap_cb *self,
/*
* We now have some discovery info to deliver!
*/
- discovery = kmalloc(sizeof(discovery_t), GFP_ATOMIC);
+ discovery = kzalloc(sizeof(discovery_t), GFP_ATOMIC);
if (!discovery) {
IRDA_WARNING("%s: unable to malloc!\n", __func__);
return;
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
index 1064621da6f6..98ad6ec4bd3c 100644
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -58,7 +58,7 @@ int sysctl_discovery_slots = 6; /* 6 slots by default */
int sysctl_lap_keepalive_time = LM_IDLE_TIMEOUT * 1000 / HZ;
char sysctl_devname[65];
-const char *irlmp_reasons[] = {
+static const char *irlmp_reasons[] = {
"ERROR, NOT USED",
"LM_USER_REQUEST",
"LM_LAP_DISCONNECT",
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 206ce6db2c36..ae691651b721 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1461,7 +1461,8 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
return iucv_accept_poll(sk);
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR;
+ mask |= POLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= POLLRDHUP;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 5b1e5af25713..9da862070dd8 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1710,6 +1710,7 @@ static int key_notify_sa_flush(const struct km_event *c)
hdr->sadb_msg_version = PF_KEY_V2;
hdr->sadb_msg_errno = (uint8_t) 0;
hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
+ hdr->sadb_msg_reserved = 0;
pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net);
@@ -2366,6 +2367,8 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
out:
xfrm_pol_put(xp);
+ if (err == 0)
+ xfrm_garbage_collect(net);
return err;
}
@@ -2615,6 +2618,8 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_
out:
xfrm_pol_put(xp);
+ if (delete && err == 0)
+ xfrm_garbage_collect(net);
return err;
}
@@ -2695,6 +2700,7 @@ static int key_notify_policy_flush(const struct km_event *c)
hdr->sadb_msg_errno = (uint8_t) 0;
hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC;
hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
+ hdr->sadb_msg_reserved = 0;
pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net);
return 0;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 8aecf5df6656..6984c3a353cd 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1777,7 +1777,7 @@ int l2tp_session_delete(struct l2tp_session *session)
if (session->session_close != NULL)
(*session->session_close)(session);
if (session->deref)
- (*session->ref)(session);
+ (*session->deref)(session);
l2tp_session_dec_refcount(session);
return 0;
}
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 637a341c1e2d..8dec6876dc50 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -346,19 +346,19 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
skb_put(skb, 2);
/* Copy user data into skb */
- error = memcpy_fromiovec(skb->data, m->msg_iov, total_len);
+ error = memcpy_fromiovec(skb_put(skb, total_len), m->msg_iov,
+ total_len);
if (error < 0) {
kfree_skb(skb);
goto error_put_sess_tun;
}
- skb_put(skb, total_len);
l2tp_xmit_skb(session, skb, session->hdr_len);
sock_put(ps->tunnel_sock);
sock_put(sk);
- return error;
+ return total_len;
error_put_sess_tun:
sock_put(ps->tunnel_sock);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index a6893602f87a..4fdb306e42e0 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -175,7 +175,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
* add it to the device after the station.
*/
if (!sta || !test_sta_flag(sta, WLAN_STA_ASSOC)) {
- ieee80211_key_free(sdata->local, key);
+ ieee80211_key_free_unused(key);
err = -ENOENT;
goto out_unlock;
}
@@ -214,8 +214,6 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
}
err = ieee80211_key_link(key, sdata, sta);
- if (err)
- ieee80211_key_free(sdata->local, key);
out_unlock:
mutex_unlock(&sdata->local->sta_mtx);
@@ -254,7 +252,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
goto out_unlock;
}
- __ieee80211_key_free(key);
+ ieee80211_key_free(key, true);
ret = 0;
out_unlock:
@@ -445,12 +443,14 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
struct ieee80211_sub_if_data *sdata = sta->sdata;
struct ieee80211_local *local = sdata->local;
struct timespec uptime;
+ u64 packets = 0;
+ int ac;
sinfo->generation = sdata->local->sta_generation;
sinfo->filled = STATION_INFO_INACTIVE_TIME |
- STATION_INFO_RX_BYTES |
- STATION_INFO_TX_BYTES |
+ STATION_INFO_RX_BYTES64 |
+ STATION_INFO_TX_BYTES64 |
STATION_INFO_RX_PACKETS |
STATION_INFO_TX_PACKETS |
STATION_INFO_TX_RETRIES |
@@ -467,10 +467,14 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
sinfo->connected_time = uptime.tv_sec - sta->last_connected;
sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
+ sinfo->tx_bytes = 0;
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+ sinfo->tx_bytes += sta->tx_bytes[ac];
+ packets += sta->tx_packets[ac];
+ }
+ sinfo->tx_packets = packets;
sinfo->rx_bytes = sta->rx_bytes;
- sinfo->tx_bytes = sta->tx_bytes;
sinfo->rx_packets = sta->rx_packets;
- sinfo->tx_packets = sta->tx_packets;
sinfo->tx_retries = sta->tx_retry_count;
sinfo->tx_failed = sta->tx_retry_failed;
sinfo->rx_dropped_misc = sta->rx_dropped;
@@ -598,8 +602,8 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy,
data[i++] += sta->rx_fragments; \
data[i++] += sta->rx_dropped; \
\
- data[i++] += sta->tx_packets; \
- data[i++] += sta->tx_bytes; \
+ data[i++] += sinfo.tx_packets; \
+ data[i++] += sinfo.tx_bytes; \
data[i++] += sta->tx_fragments; \
data[i++] += sta->tx_filtered_count; \
data[i++] += sta->tx_retry_failed; \
@@ -621,13 +625,14 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy,
if (!(sta && !WARN_ON(sta->sdata->dev != dev)))
goto do_survey;
+ sinfo.filled = 0;
+ sta_set_sinfo(sta, &sinfo);
+
i = 0;
ADD_STA_STATS(sta);
data[i++] = sta->sta_state;
- sinfo.filled = 0;
- sta_set_sinfo(sta, &sinfo);
if (sinfo.filled & STATION_INFO_TX_BITRATE)
data[i] = 100000 *
@@ -800,8 +805,7 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy,
IEEE80211_CHANCTX_EXCLUSIVE);
}
} else if (local->open_count == local->monitors) {
- local->_oper_channel = chandef->chan;
- local->_oper_channel_type = cfg80211_get_chandef_type(chandef);
+ local->_oper_chandef = *chandef;
ieee80211_hw_config(local, 0);
}
@@ -960,8 +964,13 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
sdata->vif.bss_conf.hidden_ssid =
(params->hidden_ssid != NL80211_HIDDEN_SSID_NOT_IN_USE);
- sdata->vif.bss_conf.p2p_ctwindow = params->p2p_ctwindow;
- sdata->vif.bss_conf.p2p_oppps = params->p2p_opp_ps;
+ memset(&sdata->vif.bss_conf.p2p_noa_attr, 0,
+ sizeof(sdata->vif.bss_conf.p2p_noa_attr));
+ sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow =
+ params->p2p_ctwindow & IEEE80211_P2P_OPPPS_CTWINDOW_MASK;
+ if (params->p2p_opp_ps)
+ sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow |=
+ IEEE80211_P2P_OPPPS_ENABLE_BIT;
err = ieee80211_assign_beacon(sdata, &params->beacon);
if (err < 0)
@@ -1034,15 +1043,26 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
sta_info_flush_defer(vlan);
sta_info_flush_defer(sdata);
+ synchronize_net();
rcu_barrier();
- list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
+ list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) {
sta_info_flush_cleanup(vlan);
+ ieee80211_free_keys(vlan);
+ }
sta_info_flush_cleanup(sdata);
+ ieee80211_free_keys(sdata);
sdata->vif.bss_conf.enable_beacon = false;
+ sdata->vif.bss_conf.ssid_len = 0;
clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);
ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED);
+ if (sdata->wdev.cac_started) {
+ cancel_delayed_work_sync(&sdata->dfs_cac_timer_work);
+ cfg80211_cac_event(sdata->dev, NL80211_RADAR_CAC_ABORTED,
+ GFP_KERNEL);
+ }
+
drv_stop_ap(sdata->local, sdata);
/* free all potentially still buffered bcast frames */
@@ -1177,6 +1197,18 @@ static int sta_apply_parameters(struct ieee80211_local *local,
mask |= BIT(NL80211_STA_FLAG_ASSOCIATED);
if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED))
set |= BIT(NL80211_STA_FLAG_ASSOCIATED);
+ } else if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
+ /*
+ * TDLS -- everything follows authorized, but
+ * only becoming authorized is possible, not
+ * going back
+ */
+ if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) {
+ set |= BIT(NL80211_STA_FLAG_AUTHENTICATED) |
+ BIT(NL80211_STA_FLAG_ASSOCIATED);
+ mask |= BIT(NL80211_STA_FLAG_AUTHENTICATED) |
+ BIT(NL80211_STA_FLAG_ASSOCIATED);
+ }
}
ret = sta_apply_auth_flags(local, sta, mask, set);
@@ -1261,7 +1293,8 @@ static int sta_apply_parameters(struct ieee80211_local *local,
if (ieee80211_vif_is_mesh(&sdata->vif)) {
#ifdef CONFIG_MAC80211_MESH
u32 changed = 0;
- if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED) {
+
+ if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) {
switch (params->plink_state) {
case NL80211_PLINK_ESTAB:
if (sta->plink_state != NL80211_PLINK_ESTAB)
@@ -1292,15 +1325,18 @@ static int sta_apply_parameters(struct ieee80211_local *local,
/* nothing */
break;
}
- } else {
- switch (params->plink_action) {
- case PLINK_ACTION_OPEN:
- changed |= mesh_plink_open(sta);
- break;
- case PLINK_ACTION_BLOCK:
- changed |= mesh_plink_block(sta);
- break;
- }
+ }
+
+ switch (params->plink_action) {
+ case NL80211_PLINK_ACTION_NO_ACTION:
+ /* nothing */
+ break;
+ case NL80211_PLINK_ACTION_OPEN:
+ changed |= mesh_plink_open(sta);
+ break;
+ case NL80211_PLINK_ACTION_BLOCK:
+ changed |= mesh_plink_block(sta);
+ break;
}
if (params->local_pm)
@@ -1346,8 +1382,10 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
* defaults -- if userspace wants something else we'll
* change it accordingly in sta_apply_parameters()
*/
- sta_info_pre_move_state(sta, IEEE80211_STA_AUTH);
- sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC);
+ if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) {
+ sta_info_pre_move_state(sta, IEEE80211_STA_AUTH);
+ sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC);
+ }
err = sta_apply_parameters(local, sta, params);
if (err) {
@@ -1356,8 +1394,8 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
}
/*
- * for TDLS, rate control should be initialized only when supported
- * rates are known.
+ * for TDLS, rate control should be initialized only when
+ * rates are known and station is marked authorized
*/
if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER))
rate_control_rate_init(sta);
@@ -1394,50 +1432,67 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev,
}
static int ieee80211_change_station(struct wiphy *wiphy,
- struct net_device *dev,
- u8 *mac,
+ struct net_device *dev, u8 *mac,
struct station_parameters *params)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = wiphy_priv(wiphy);
struct sta_info *sta;
struct ieee80211_sub_if_data *vlansdata;
+ enum cfg80211_station_type statype;
int err;
mutex_lock(&local->sta_mtx);
sta = sta_info_get_bss(sdata, mac);
if (!sta) {
- mutex_unlock(&local->sta_mtx);
- return -ENOENT;
+ err = -ENOENT;
+ goto out_err;
}
- /* in station mode, some updates are only valid with TDLS */
- if (sdata->vif.type == NL80211_IFTYPE_STATION &&
- (params->supported_rates || params->ht_capa || params->vht_capa ||
- params->sta_modify_mask ||
- (params->sta_flags_mask & BIT(NL80211_STA_FLAG_WME))) &&
- !test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
- mutex_unlock(&local->sta_mtx);
- return -EINVAL;
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_MESH_POINT:
+ if (sdata->u.mesh.user_mpm)
+ statype = CFG80211_STA_MESH_PEER_USER;
+ else
+ statype = CFG80211_STA_MESH_PEER_KERNEL;
+ break;
+ case NL80211_IFTYPE_ADHOC:
+ statype = CFG80211_STA_IBSS;
+ break;
+ case NL80211_IFTYPE_STATION:
+ if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
+ statype = CFG80211_STA_AP_STA;
+ break;
+ }
+ if (test_sta_flag(sta, WLAN_STA_AUTHORIZED))
+ statype = CFG80211_STA_TDLS_PEER_ACTIVE;
+ else
+ statype = CFG80211_STA_TDLS_PEER_SETUP;
+ break;
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_AP_VLAN:
+ statype = CFG80211_STA_AP_CLIENT;
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ goto out_err;
}
+ err = cfg80211_check_station_change(wiphy, params, statype);
+ if (err)
+ goto out_err;
+
if (params->vlan && params->vlan != sta->sdata->dev) {
bool prev_4addr = false;
bool new_4addr = false;
vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
- if (vlansdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
- vlansdata->vif.type != NL80211_IFTYPE_AP) {
- mutex_unlock(&local->sta_mtx);
- return -EINVAL;
- }
-
if (params->vlan->ieee80211_ptr->use_4addr) {
if (vlansdata->u.vlan.sta) {
- mutex_unlock(&local->sta_mtx);
- return -EBUSY;
+ err = -EBUSY;
+ goto out_err;
}
rcu_assign_pointer(vlansdata->u.vlan.sta, sta);
@@ -1464,12 +1519,12 @@ static int ieee80211_change_station(struct wiphy *wiphy,
}
err = sta_apply_parameters(local, sta, params);
- if (err) {
- mutex_unlock(&local->sta_mtx);
- return err;
- }
+ if (err)
+ goto out_err;
- if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && params->supported_rates)
+ /* When peer becomes authorized, init rate control as well */
+ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
+ test_sta_flag(sta, WLAN_STA_AUTHORIZED))
rate_control_rate_init(sta);
mutex_unlock(&local->sta_mtx);
@@ -1479,7 +1534,11 @@ static int ieee80211_change_station(struct wiphy *wiphy,
ieee80211_recalc_ps(local, -1);
ieee80211_recalc_ps_vif(sdata);
}
+
return 0;
+out_err:
+ mutex_unlock(&local->sta_mtx);
+ return err;
}
#ifdef CONFIG_MAC80211_MESH
@@ -1489,7 +1548,6 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev,
struct ieee80211_sub_if_data *sdata;
struct mesh_path *mpath;
struct sta_info *sta;
- int err;
sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -1500,17 +1558,12 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev,
return -ENOENT;
}
- err = mesh_path_add(sdata, dst);
- if (err) {
+ mpath = mesh_path_add(sdata, dst);
+ if (IS_ERR(mpath)) {
rcu_read_unlock();
- return err;
+ return PTR_ERR(mpath);
}
- mpath = mesh_path_lookup(sdata, dst);
- if (!mpath) {
- rcu_read_unlock();
- return -ENXIO;
- }
mesh_path_fix_nexthop(mpath, sta);
rcu_read_unlock();
@@ -1687,6 +1740,7 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,
ifmsh->mesh_sp_id = setup->sync_method;
ifmsh->mesh_pp_id = setup->path_sel_proto;
ifmsh->mesh_pm_id = setup->path_metric;
+ ifmsh->user_mpm = setup->user_mpm;
ifmsh->security = IEEE80211_MESH_SEC_NONE;
if (setup->is_authenticated)
ifmsh->security |= IEEE80211_MESH_SEC_AUTHED;
@@ -1730,8 +1784,11 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy,
conf->dot11MeshTTL = nconf->dot11MeshTTL;
if (_chg_mesh_attr(NL80211_MESHCONF_ELEMENT_TTL, mask))
conf->element_ttl = nconf->element_ttl;
- if (_chg_mesh_attr(NL80211_MESHCONF_AUTO_OPEN_PLINKS, mask))
+ if (_chg_mesh_attr(NL80211_MESHCONF_AUTO_OPEN_PLINKS, mask)) {
+ if (ifmsh->user_mpm)
+ return -EBUSY;
conf->auto_open_plinks = nconf->auto_open_plinks;
+ }
if (_chg_mesh_attr(NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR, mask))
conf->dot11MeshNbrOffsetMaxNeighbor =
nconf->dot11MeshNbrOffsetMaxNeighbor;
@@ -1910,12 +1967,20 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
}
if (params->p2p_ctwindow >= 0) {
- sdata->vif.bss_conf.p2p_ctwindow = params->p2p_ctwindow;
+ sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow &=
+ ~IEEE80211_P2P_OPPPS_CTWINDOW_MASK;
+ sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow |=
+ params->p2p_ctwindow & IEEE80211_P2P_OPPPS_CTWINDOW_MASK;
changed |= BSS_CHANGED_P2P_PS;
}
- if (params->p2p_opp_ps >= 0) {
- sdata->vif.bss_conf.p2p_oppps = params->p2p_opp_ps;
+ if (params->p2p_opp_ps > 0) {
+ sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow |=
+ IEEE80211_P2P_OPPPS_ENABLE_BIT;
+ changed |= BSS_CHANGED_P2P_PS;
+ } else if (params->p2p_opp_ps == 0) {
+ sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow &=
+ ~IEEE80211_P2P_OPPPS_ENABLE_BIT;
changed |= BSS_CHANGED_P2P_PS;
}
@@ -2359,9 +2424,22 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
}
for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
+ struct ieee80211_supported_band *sband = wiphy->bands[i];
+ int j;
+
sdata->rc_rateidx_mask[i] = mask->control[i].legacy;
memcpy(sdata->rc_rateidx_mcs_mask[i], mask->control[i].mcs,
sizeof(mask->control[i].mcs));
+
+ sdata->rc_has_mcs_mask[i] = false;
+ if (!sband)
+ continue;
+
+ for (j = 0; j < IEEE80211_HT_MCS_MASK_LEN; j++)
+ if (~sdata->rc_rateidx_mcs_mask[i][j]) {
+ sdata->rc_has_mcs_mask[i] = true;
+ break;
+ }
}
return 0;
@@ -2371,7 +2449,8 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct ieee80211_channel *channel,
unsigned int duration, u64 *cookie,
- struct sk_buff *txskb)
+ struct sk_buff *txskb,
+ enum ieee80211_roc_type type)
{
struct ieee80211_roc_work *roc, *tmp;
bool queued = false;
@@ -2390,6 +2469,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
roc->duration = duration;
roc->req_duration = duration;
roc->frame = txskb;
+ roc->type = type;
roc->mgmt_tx_cookie = (unsigned long)txskb;
roc->sdata = sdata;
INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work);
@@ -2420,7 +2500,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
if (!duration)
duration = 10;
- ret = drv_remain_on_channel(local, sdata, channel, duration);
+ ret = drv_remain_on_channel(local, sdata, channel, duration, type);
if (ret) {
kfree(roc);
return ret;
@@ -2439,10 +2519,13 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
*
* If it hasn't started yet, just increase the duration
* and add the new one to the list of dependents.
+ * If the type of the new ROC has higher priority, modify the
+ * type of the previous one to match that of the new one.
*/
if (!tmp->started) {
list_add_tail(&roc->list, &tmp->dependents);
tmp->duration = max(tmp->duration, roc->duration);
+ tmp->type = max(tmp->type, roc->type);
queued = true;
break;
}
@@ -2454,16 +2537,18 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
/*
* In the offloaded ROC case, if it hasn't begun, add
* this new one to the dependent list to be handled
- * when the the master one begins. If it has begun,
+ * when the master one begins. If it has begun,
* check that there's still a minimum time left and
* if so, start this one, transmitting the frame, but
- * add it to the list directly after this one with a
+ * add it to the list directly after this one with
* a reduced time so we'll ask the driver to execute
* it right after finishing the previous one, in the
* hope that it'll also be executed right afterwards,
* effectively extending the old one.
* If there's no minimum time left, just add it to the
* normal list.
+ * TODO: the ROC type is ignored here, assuming that it
+ * is better to immediately use the current ROC.
*/
if (!tmp->hw_begun) {
list_add_tail(&roc->list, &tmp->dependents);
@@ -2557,7 +2642,8 @@ static int ieee80211_remain_on_channel(struct wiphy *wiphy,
mutex_lock(&local->mtx);
ret = ieee80211_start_roc_work(local, sdata, chan,
- duration, cookie, NULL);
+ duration, cookie, NULL,
+ IEEE80211_ROC_TYPE_NORMAL);
mutex_unlock(&local->mtx);
return ret;
@@ -2792,7 +2878,8 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
/* This will handle all kinds of coalescing and immediate TX */
ret = ieee80211_start_roc_work(local, sdata, chan,
- wait, cookie, skb);
+ wait, cookie, skb,
+ IEEE80211_ROC_TYPE_MGMT_TX);
if (ret)
kfree_skb(skb);
out_unlock:
@@ -3302,9 +3389,7 @@ static int ieee80211_cfg_get_channel(struct wiphy *wiphy,
if (local->use_chanctx)
*chandef = local->monitor_chandef;
else
- cfg80211_chandef_create(chandef,
- local->_oper_channel,
- local->_oper_channel_type);
+ *chandef = local->_oper_chandef;
ret = 0;
}
rcu_read_unlock();
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 931be419ab5a..03e8d2e3270e 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -22,7 +22,7 @@ static void ieee80211_change_chanctx(struct ieee80211_local *local,
drv_change_chanctx(local, ctx, IEEE80211_CHANCTX_CHANGE_WIDTH);
if (!local->use_chanctx) {
- local->_oper_channel_type = cfg80211_get_chandef_type(chandef);
+ local->_oper_chandef = *chandef;
ieee80211_hw_config(local, 0);
}
}
@@ -57,6 +57,22 @@ ieee80211_find_chanctx(struct ieee80211_local *local,
return NULL;
}
+static bool ieee80211_is_radar_required(struct ieee80211_local *local)
+{
+ struct ieee80211_sub_if_data *sdata;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+ if (sdata->radar_required) {
+ rcu_read_unlock();
+ return true;
+ }
+ }
+ rcu_read_unlock();
+
+ return false;
+}
+
static struct ieee80211_chanctx *
ieee80211_new_chanctx(struct ieee80211_local *local,
const struct cfg80211_chan_def *chandef,
@@ -76,6 +92,9 @@ ieee80211_new_chanctx(struct ieee80211_local *local,
ctx->conf.rx_chains_static = 1;
ctx->conf.rx_chains_dynamic = 1;
ctx->mode = mode;
+ ctx->conf.radar_enabled = ieee80211_is_radar_required(local);
+ if (!local->use_chanctx)
+ local->hw.conf.radar_enabled = ctx->conf.radar_enabled;
/* acquire mutex to prevent idle from changing */
mutex_lock(&local->mtx);
@@ -85,9 +104,7 @@ ieee80211_new_chanctx(struct ieee80211_local *local,
ieee80211_hw_config(local, changed);
if (!local->use_chanctx) {
- local->_oper_channel_type =
- cfg80211_get_chandef_type(chandef);
- local->_oper_channel = chandef->chan;
+ local->_oper_chandef = *chandef;
ieee80211_hw_config(local, 0);
} else {
err = drv_add_chanctx(local, ctx);
@@ -112,12 +129,24 @@ ieee80211_new_chanctx(struct ieee80211_local *local,
static void ieee80211_free_chanctx(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx)
{
+ bool check_single_channel = false;
lockdep_assert_held(&local->chanctx_mtx);
WARN_ON_ONCE(ctx->refcount != 0);
if (!local->use_chanctx) {
- local->_oper_channel_type = NL80211_CHAN_NO_HT;
+ struct cfg80211_chan_def *chandef = &local->_oper_chandef;
+ chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
+ chandef->center_freq1 = chandef->chan->center_freq;
+ chandef->center_freq2 = 0;
+
+ /* NOTE: Disabling radar is only valid here for
+ * single channel context. To be sure, check it ...
+ */
+ if (local->hw.conf.radar_enabled)
+ check_single_channel = true;
+ local->hw.conf.radar_enabled = false;
+
ieee80211_hw_config(local, 0);
} else {
drv_remove_chanctx(local, ctx);
@@ -126,6 +155,9 @@ static void ieee80211_free_chanctx(struct ieee80211_local *local,
list_del_rcu(&ctx->list);
kfree_rcu(ctx, rcu_head);
+ /* throw a warning if this wasn't the only channel context. */
+ WARN_ON(check_single_channel && !list_empty(&local->chanctx_list));
+
mutex_lock(&local->mtx);
ieee80211_recalc_idle(local);
mutex_unlock(&local->mtx);
@@ -237,19 +269,11 @@ static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata)
void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local,
struct ieee80211_chanctx *chanctx)
{
- struct ieee80211_sub_if_data *sdata;
- bool radar_enabled = false;
+ bool radar_enabled;
lockdep_assert_held(&local->chanctx_mtx);
- rcu_read_lock();
- list_for_each_entry_rcu(sdata, &local->interfaces, list) {
- if (sdata->radar_required) {
- radar_enabled = true;
- break;
- }
- }
- rcu_read_unlock();
+ radar_enabled = ieee80211_is_radar_required(local);
if (radar_enabled == chanctx->conf.radar_enabled)
return;
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index c3a3082b72e5..1521cabad3d6 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -295,7 +295,7 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata)
char buf[50];
struct ieee80211_key *key;
- if (!sdata->debugfs.dir)
+ if (!sdata->vif.debugfs_dir)
return;
lockdep_assert_held(&sdata->local->key_mtx);
@@ -311,7 +311,7 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata)
sprintf(buf, "../keys/%d", key->debugfs.cnt);
sdata->debugfs.default_unicast_key =
debugfs_create_symlink("default_unicast_key",
- sdata->debugfs.dir, buf);
+ sdata->vif.debugfs_dir, buf);
}
if (sdata->debugfs.default_multicast_key) {
@@ -325,7 +325,7 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata)
sprintf(buf, "../keys/%d", key->debugfs.cnt);
sdata->debugfs.default_multicast_key =
debugfs_create_symlink("default_multicast_key",
- sdata->debugfs.dir, buf);
+ sdata->vif.debugfs_dir, buf);
}
}
@@ -334,7 +334,7 @@ void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata)
char buf[50];
struct ieee80211_key *key;
- if (!sdata->debugfs.dir)
+ if (!sdata->vif.debugfs_dir)
return;
key = key_mtx_dereference(sdata->local,
@@ -343,7 +343,7 @@ void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata)
sprintf(buf, "../keys/%d", key->debugfs.cnt);
sdata->debugfs.default_mgmt_key =
debugfs_create_symlink("default_mgmt_key",
- sdata->debugfs.dir, buf);
+ sdata->vif.debugfs_dir, buf);
} else
ieee80211_debugfs_key_remove_mgmt_default(sdata);
}
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 059bbb82e84f..14abcf44f974 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -124,6 +124,15 @@ static ssize_t ieee80211_if_fmt_##name( \
return scnprintf(buf, buflen, "%d\n", sdata->field / 16); \
}
+#define IEEE80211_IF_FMT_JIFFIES_TO_MS(name, field) \
+static ssize_t ieee80211_if_fmt_##name( \
+ const struct ieee80211_sub_if_data *sdata, \
+ char *buf, int buflen) \
+{ \
+ return scnprintf(buf, buflen, "%d\n", \
+ jiffies_to_msecs(sdata->field)); \
+}
+
#define __IEEE80211_IF_FILE(name, _write) \
static ssize_t ieee80211_if_read_##name(struct file *file, \
char __user *userbuf, \
@@ -197,6 +206,7 @@ IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC);
IEEE80211_IF_FILE(aid, u.mgd.aid, DEC);
IEEE80211_IF_FILE(last_beacon, u.mgd.last_beacon_signal, DEC);
IEEE80211_IF_FILE(ave_beacon, u.mgd.ave_beacon_signal, DEC_DIV_16);
+IEEE80211_IF_FILE(beacon_timeout, u.mgd.beacon_timeout, JIFFIES_TO_MS);
static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata,
enum ieee80211_smps_mode smps_mode)
@@ -521,7 +531,7 @@ IEEE80211_IF_FILE(dot11MeshAwakeWindowDuration,
#endif
#define DEBUGFS_ADD_MODE(name, mode) \
- debugfs_create_file(#name, mode, sdata->debugfs.dir, \
+ debugfs_create_file(#name, mode, sdata->vif.debugfs_dir, \
sdata, &name##_ops);
#define DEBUGFS_ADD(name) DEBUGFS_ADD_MODE(name, 0400)
@@ -542,6 +552,7 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)
DEBUGFS_ADD(aid);
DEBUGFS_ADD(last_beacon);
DEBUGFS_ADD(ave_beacon);
+ DEBUGFS_ADD(beacon_timeout);
DEBUGFS_ADD_MODE(smps, 0600);
DEBUGFS_ADD_MODE(tkip_mic_test, 0200);
DEBUGFS_ADD_MODE(uapsd_queues, 0600);
@@ -577,7 +588,7 @@ static void add_mesh_files(struct ieee80211_sub_if_data *sdata)
static void add_mesh_stats(struct ieee80211_sub_if_data *sdata)
{
struct dentry *dir = debugfs_create_dir("mesh_stats",
- sdata->debugfs.dir);
+ sdata->vif.debugfs_dir);
#define MESHSTATS_ADD(name)\
debugfs_create_file(#name, 0400, dir, sdata, &name##_ops);
@@ -594,7 +605,7 @@ static void add_mesh_stats(struct ieee80211_sub_if_data *sdata)
static void add_mesh_config(struct ieee80211_sub_if_data *sdata)
{
struct dentry *dir = debugfs_create_dir("mesh_config",
- sdata->debugfs.dir);
+ sdata->vif.debugfs_dir);
#define MESHPARAMS_ADD(name) \
debugfs_create_file(#name, 0600, dir, sdata, &name##_ops);
@@ -631,7 +642,7 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata)
static void add_files(struct ieee80211_sub_if_data *sdata)
{
- if (!sdata->debugfs.dir)
+ if (!sdata->vif.debugfs_dir)
return;
DEBUGFS_ADD(flags);
@@ -673,21 +684,21 @@ void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata)
char buf[10+IFNAMSIZ];
sprintf(buf, "netdev:%s", sdata->name);
- sdata->debugfs.dir = debugfs_create_dir(buf,
+ sdata->vif.debugfs_dir = debugfs_create_dir(buf,
sdata->local->hw.wiphy->debugfsdir);
- if (sdata->debugfs.dir)
+ if (sdata->vif.debugfs_dir)
sdata->debugfs.subdir_stations = debugfs_create_dir("stations",
- sdata->debugfs.dir);
+ sdata->vif.debugfs_dir);
add_files(sdata);
}
void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata)
{
- if (!sdata->debugfs.dir)
+ if (!sdata->vif.debugfs_dir)
return;
- debugfs_remove_recursive(sdata->debugfs.dir);
- sdata->debugfs.dir = NULL;
+ debugfs_remove_recursive(sdata->vif.debugfs_dir);
+ sdata->vif.debugfs_dir = NULL;
}
void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata)
@@ -695,7 +706,7 @@ void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata)
struct dentry *dir;
char buf[10 + IFNAMSIZ];
- dir = sdata->debugfs.dir;
+ dir = sdata->vif.debugfs_dir;
if (!dir)
return;
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index c7591f73dbc3..44e201d60a13 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -54,6 +54,7 @@ STA_FILE(aid, sta.aid, D);
STA_FILE(dev, sdata->name, S);
STA_FILE(last_signal, last_signal, D);
STA_FILE(last_ack_signal, last_ack_signal, D);
+STA_FILE(beacon_loss_count, beacon_loss_count, D);
static ssize_t sta_flags_read(struct file *file, char __user *userbuf,
size_t count, loff_t *ppos)
@@ -325,6 +326,36 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf,
}
STA_OPS(ht_capa);
+static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ char buf[128], *p = buf;
+ struct sta_info *sta = file->private_data;
+ struct ieee80211_sta_vht_cap *vhtc = &sta->sta.vht_cap;
+
+ p += scnprintf(p, sizeof(buf) + buf - p, "VHT %ssupported\n",
+ vhtc->vht_supported ? "" : "not ");
+ if (vhtc->vht_supported) {
+ p += scnprintf(p, sizeof(buf)+buf-p, "cap: %#.8x\n", vhtc->cap);
+
+ p += scnprintf(p, sizeof(buf)+buf-p, "RX MCS: %.4x\n",
+ le16_to_cpu(vhtc->vht_mcs.rx_mcs_map));
+ if (vhtc->vht_mcs.rx_highest)
+ p += scnprintf(p, sizeof(buf)+buf-p,
+ "MCS RX highest: %d Mbps\n",
+ le16_to_cpu(vhtc->vht_mcs.rx_highest));
+ p += scnprintf(p, sizeof(buf)+buf-p, "TX MCS: %.4x\n",
+ le16_to_cpu(vhtc->vht_mcs.tx_mcs_map));
+ if (vhtc->vht_mcs.tx_highest)
+ p += scnprintf(p, sizeof(buf)+buf-p,
+ "MCS TX highest: %d Mbps\n",
+ le16_to_cpu(vhtc->vht_mcs.tx_highest));
+ }
+
+ return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+}
+STA_OPS(vht_capa);
+
static ssize_t sta_current_tx_rate_read(struct file *file, char __user *userbuf,
size_t count, loff_t *ppos)
{
@@ -404,7 +435,9 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
DEBUGFS_ADD(agg_status);
DEBUGFS_ADD(dev);
DEBUGFS_ADD(last_signal);
+ DEBUGFS_ADD(beacon_loss_count);
DEBUGFS_ADD(ht_capa);
+ DEBUGFS_ADD(vht_capa);
DEBUGFS_ADD(last_ack_signal);
DEBUGFS_ADD(current_tx_rate);
DEBUGFS_ADD(last_rx_rate);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index ee56d0779d8b..169664c122e2 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -241,6 +241,22 @@ static inline u64 drv_prepare_multicast(struct ieee80211_local *local,
return ret;
}
+static inline void drv_set_multicast_list(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct netdev_hw_addr_list *mc_list)
+{
+ bool allmulti = sdata->flags & IEEE80211_SDATA_ALLMULTI;
+
+ trace_drv_set_multicast_list(local, sdata, mc_list->count);
+
+ check_sdata_in_driver(sdata);
+
+ if (local->ops->set_multicast_list)
+ local->ops->set_multicast_list(&local->hw, &sdata->vif,
+ allmulti, mc_list);
+ trace_drv_return_void(local);
+}
+
static inline void drv_configure_filter(struct ieee80211_local *local,
unsigned int changed_flags,
unsigned int *total_flags,
@@ -531,43 +547,6 @@ static inline void drv_sta_remove_debugfs(struct ieee80211_local *local,
local->ops->sta_remove_debugfs(&local->hw, &sdata->vif,
sta, dir);
}
-
-static inline
-void drv_add_interface_debugfs(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata)
-{
- might_sleep();
-
- check_sdata_in_driver(sdata);
-
- if (!local->ops->add_interface_debugfs)
- return;
-
- local->ops->add_interface_debugfs(&local->hw, &sdata->vif,
- sdata->debugfs.dir);
-}
-
-static inline
-void drv_remove_interface_debugfs(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata)
-{
- might_sleep();
-
- check_sdata_in_driver(sdata);
-
- if (!local->ops->remove_interface_debugfs)
- return;
-
- local->ops->remove_interface_debugfs(&local->hw, &sdata->vif,
- sdata->debugfs.dir);
-}
-#else
-static inline
-void drv_add_interface_debugfs(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata) {}
-static inline
-void drv_remove_interface_debugfs(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata) {}
#endif
static inline __must_check
@@ -741,13 +720,14 @@ static inline void drv_rfkill_poll(struct ieee80211_local *local)
local->ops->rfkill_poll(&local->hw);
}
-static inline void drv_flush(struct ieee80211_local *local, bool drop)
+static inline void drv_flush(struct ieee80211_local *local,
+ u32 queues, bool drop)
{
might_sleep();
- trace_drv_flush(local, drop);
+ trace_drv_flush(local, queues, drop);
if (local->ops->flush)
- local->ops->flush(&local->hw, drop);
+ local->ops->flush(&local->hw, queues, drop);
trace_drv_return_void(local);
}
@@ -787,15 +767,16 @@ static inline int drv_get_antenna(struct ieee80211_local *local,
static inline int drv_remain_on_channel(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct ieee80211_channel *chan,
- unsigned int duration)
+ unsigned int duration,
+ enum ieee80211_roc_type type)
{
int ret;
might_sleep();
- trace_drv_remain_on_channel(local, sdata, chan, duration);
+ trace_drv_remain_on_channel(local, sdata, chan, duration, type);
ret = local->ops->remain_on_channel(&local->hw, &sdata->vif,
- chan, duration);
+ chan, duration, type);
trace_drv_return_int(local, ret);
return ret;
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 0db25d4bb223..af8cee06e4f3 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -40,13 +40,6 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
if (!ht_cap->ht_supported)
return;
- if (sdata->vif.type != NL80211_IFTYPE_STATION) {
- /* AP interfaces call this code when adding new stations,
- * so just silently ignore non station interfaces.
- */
- return;
- }
-
/* NOTE: If you add more over-rides here, update register_hw
* ht_capa_mod_msk logic in main.c as well.
* And, if this method can ever change ht_cap.ht_supported, fix
@@ -97,7 +90,7 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
const struct ieee80211_ht_cap *ht_cap_ie,
struct sta_info *sta)
{
- struct ieee80211_sta_ht_cap ht_cap;
+ struct ieee80211_sta_ht_cap ht_cap, own_cap;
u8 ampdu_info, tx_mcs_set_cap;
int i, max_tx_streams;
bool changed;
@@ -111,6 +104,18 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
ht_cap.ht_supported = true;
+ own_cap = sband->ht_cap;
+
+ /*
+ * If user has specified capability over-rides, take care
+ * of that if the station we're setting up is the AP that
+ * we advertised a restricted capability set to. Override
+ * our own capabilities and then use those below.
+ */
+ if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+ !test_sta_flag(sta, WLAN_STA_TDLS_PEER))
+ ieee80211_apply_htcap_overrides(sdata, &own_cap);
+
/*
* The bits listed in this expression should be
* the same for the peer and us, if the station
@@ -118,21 +123,20 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
* we mask them out.
*/
ht_cap.cap = le16_to_cpu(ht_cap_ie->cap_info) &
- (sband->ht_cap.cap |
- ~(IEEE80211_HT_CAP_LDPC_CODING |
- IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
- IEEE80211_HT_CAP_GRN_FLD |
- IEEE80211_HT_CAP_SGI_20 |
- IEEE80211_HT_CAP_SGI_40 |
- IEEE80211_HT_CAP_DSSSCCK40));
+ (own_cap.cap | ~(IEEE80211_HT_CAP_LDPC_CODING |
+ IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
+ IEEE80211_HT_CAP_GRN_FLD |
+ IEEE80211_HT_CAP_SGI_20 |
+ IEEE80211_HT_CAP_SGI_40 |
+ IEEE80211_HT_CAP_DSSSCCK40));
/*
* The STBC bits are asymmetric -- if we don't have
* TX then mask out the peer's RX and vice versa.
*/
- if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_TX_STBC))
+ if (!(own_cap.cap & IEEE80211_HT_CAP_TX_STBC))
ht_cap.cap &= ~IEEE80211_HT_CAP_RX_STBC;
- if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_RX_STBC))
+ if (!(own_cap.cap & IEEE80211_HT_CAP_RX_STBC))
ht_cap.cap &= ~IEEE80211_HT_CAP_TX_STBC;
ampdu_info = ht_cap_ie->ampdu_params_info;
@@ -142,7 +146,7 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
(ampdu_info & IEEE80211_HT_AMPDU_PARM_DENSITY) >> 2;
/* own MCS TX capabilities */
- tx_mcs_set_cap = sband->ht_cap.mcs.tx_params;
+ tx_mcs_set_cap = own_cap.mcs.tx_params;
/* Copy peer MCS TX capabilities, the driver might need them. */
ht_cap.mcs.tx_params = ht_cap_ie->mcs.tx_params;
@@ -168,26 +172,20 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
*/
for (i = 0; i < max_tx_streams; i++)
ht_cap.mcs.rx_mask[i] =
- sband->ht_cap.mcs.rx_mask[i] & ht_cap_ie->mcs.rx_mask[i];
+ own_cap.mcs.rx_mask[i] & ht_cap_ie->mcs.rx_mask[i];
if (tx_mcs_set_cap & IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION)
for (i = IEEE80211_HT_MCS_UNEQUAL_MODULATION_START_BYTE;
i < IEEE80211_HT_MCS_MASK_LEN; i++)
ht_cap.mcs.rx_mask[i] =
- sband->ht_cap.mcs.rx_mask[i] &
+ own_cap.mcs.rx_mask[i] &
ht_cap_ie->mcs.rx_mask[i];
/* handle MCS rate 32 too */
- if (sband->ht_cap.mcs.rx_mask[32/8] & ht_cap_ie->mcs.rx_mask[32/8] & 1)
+ if (own_cap.mcs.rx_mask[32/8] & ht_cap_ie->mcs.rx_mask[32/8] & 1)
ht_cap.mcs.rx_mask[32/8] |= 1;
apply:
- /*
- * If user has specified capability over-rides, take care
- * of that here.
- */
- ieee80211_apply_htcap_overrides(sdata, &ht_cap);
-
changed = memcmp(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap));
memcpy(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap));
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 40b71dfcc79d..170f9a7fa319 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -44,7 +44,6 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
struct ieee80211_local *local = sdata->local;
int rates, i;
- struct sk_buff *skb;
struct ieee80211_mgmt *mgmt;
u8 *pos;
struct ieee80211_supported_band *sband;
@@ -52,20 +51,14 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
u32 bss_change;
u8 supp_rates[IEEE80211_MAX_SUPP_RATES];
struct cfg80211_chan_def chandef;
+ struct beacon_data *presp;
+ int frame_len;
lockdep_assert_held(&ifibss->mtx);
/* Reset own TSF to allow time synchronization work. */
drv_reset_tsf(local, sdata);
- skb = ifibss->skb;
- RCU_INIT_POINTER(ifibss->presp, NULL);
- synchronize_rcu();
- skb->data = skb->head;
- skb->len = 0;
- skb_reset_tail_pointer(skb);
- skb_reserve(skb, sdata->local->hw.extra_tx_headroom);
-
if (!ether_addr_equal(ifibss->bssid, bssid))
sta_info_flush(sdata);
@@ -73,10 +66,19 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
if (sdata->vif.bss_conf.ibss_joined) {
sdata->vif.bss_conf.ibss_joined = false;
sdata->vif.bss_conf.ibss_creator = false;
+ sdata->vif.bss_conf.enable_beacon = false;
netif_carrier_off(sdata->dev);
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IBSS);
+ ieee80211_bss_info_change_notify(sdata,
+ BSS_CHANGED_IBSS |
+ BSS_CHANGED_BEACON_ENABLED);
}
+ presp = rcu_dereference_protected(ifibss->presp,
+ lockdep_is_held(&ifibss->mtx));
+ rcu_assign_pointer(ifibss->presp, NULL);
+ if (presp)
+ kfree_rcu(presp, rcu_head);
+
sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0;
cfg80211_chandef_create(&chandef, chan, ifibss->channel_type);
@@ -98,19 +100,24 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
sband = local->hw.wiphy->bands[chan->band];
- /* build supported rates array */
- pos = supp_rates;
- for (i = 0; i < sband->n_bitrates; i++) {
- int rate = sband->bitrates[i].bitrate;
- u8 basic = 0;
- if (basic_rates & BIT(i))
- basic = 0x80;
- *pos++ = basic | (u8) (rate / 5);
- }
-
/* Build IBSS probe response */
- mgmt = (void *) skb_put(skb, 24 + sizeof(mgmt->u.beacon));
- memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon));
+ frame_len = sizeof(struct ieee80211_hdr_3addr) +
+ 12 /* struct ieee80211_mgmt.u.beacon */ +
+ 2 + IEEE80211_MAX_SSID_LEN /* max SSID */ +
+ 2 + 8 /* max Supported Rates */ +
+ 3 /* max DS params */ +
+ 4 /* IBSS params */ +
+ 2 + (IEEE80211_MAX_SUPP_RATES - 8) +
+ 2 + sizeof(struct ieee80211_ht_cap) +
+ 2 + sizeof(struct ieee80211_ht_operation) +
+ ifibss->ie_len;
+ presp = kzalloc(sizeof(*presp) + frame_len, GFP_KERNEL);
+ if (!presp)
+ return;
+
+ presp->head = (void *)(presp + 1);
+
+ mgmt = (void *) presp->head;
mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
IEEE80211_STYPE_PROBE_RESP);
eth_broadcast_addr(mgmt->da);
@@ -120,27 +127,30 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
mgmt->u.beacon.timestamp = cpu_to_le64(tsf);
mgmt->u.beacon.capab_info = cpu_to_le16(capability);
- pos = skb_put(skb, 2 + ifibss->ssid_len);
+ pos = (u8 *)mgmt + offsetof(struct ieee80211_mgmt, u.beacon.variable);
+
*pos++ = WLAN_EID_SSID;
*pos++ = ifibss->ssid_len;
memcpy(pos, ifibss->ssid, ifibss->ssid_len);
+ pos += ifibss->ssid_len;
- rates = sband->n_bitrates;
- if (rates > 8)
- rates = 8;
- pos = skb_put(skb, 2 + rates);
+ rates = min_t(int, 8, sband->n_bitrates);
*pos++ = WLAN_EID_SUPP_RATES;
*pos++ = rates;
- memcpy(pos, supp_rates, rates);
+ for (i = 0; i < rates; i++) {
+ int rate = sband->bitrates[i].bitrate;
+ u8 basic = 0;
+ if (basic_rates & BIT(i))
+ basic = 0x80;
+ *pos++ = basic | (u8) (rate / 5);
+ }
if (sband->band == IEEE80211_BAND_2GHZ) {
- pos = skb_put(skb, 2 + 1);
*pos++ = WLAN_EID_DS_PARAMS;
*pos++ = 1;
*pos++ = ieee80211_frequency_to_channel(chan->center_freq);
}
- pos = skb_put(skb, 2 + 2);
*pos++ = WLAN_EID_IBSS_PARAMS;
*pos++ = 2;
/* FIX: set ATIM window based on scan results */
@@ -148,23 +158,25 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
*pos++ = 0;
if (sband->n_bitrates > 8) {
- rates = sband->n_bitrates - 8;
- pos = skb_put(skb, 2 + rates);
*pos++ = WLAN_EID_EXT_SUPP_RATES;
- *pos++ = rates;
- memcpy(pos, &supp_rates[8], rates);
+ *pos++ = sband->n_bitrates - 8;
+ for (i = 8; i < sband->n_bitrates; i++) {
+ int rate = sband->bitrates[i].bitrate;
+ u8 basic = 0;
+ if (basic_rates & BIT(i))
+ basic = 0x80;
+ *pos++ = basic | (u8) (rate / 5);
+ }
}
- if (ifibss->ie_len)
- memcpy(skb_put(skb, ifibss->ie_len),
- ifibss->ie, ifibss->ie_len);
+ if (ifibss->ie_len) {
+ memcpy(pos, ifibss->ie, ifibss->ie_len);
+ pos += ifibss->ie_len;
+ }
/* add HT capability and information IEs */
if (chandef.width != NL80211_CHAN_WIDTH_20_NOHT &&
sband->ht_cap.ht_supported) {
- pos = skb_put(skb, 4 +
- sizeof(struct ieee80211_ht_cap) +
- sizeof(struct ieee80211_ht_operation));
pos = ieee80211_ie_build_ht_cap(pos, &sband->ht_cap,
sband->ht_cap.cap);
/*
@@ -177,7 +189,6 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
}
if (local->hw.queues >= IEEE80211_NUM_ACS) {
- pos = skb_put(skb, 9);
*pos++ = WLAN_EID_VENDOR_SPECIFIC;
*pos++ = 7; /* len */
*pos++ = 0x00; /* Microsoft OUI 00:50:F2 */
@@ -189,11 +200,17 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
*pos++ = 0; /* U-APSD no in use */
}
- rcu_assign_pointer(ifibss->presp, skb);
+ presp->head_len = pos - presp->head;
+ if (WARN_ON(presp->head_len > frame_len))
+ return;
+
+ rcu_assign_pointer(ifibss->presp, presp);
sdata->vif.bss_conf.enable_beacon = true;
sdata->vif.bss_conf.beacon_int = beacon_int;
sdata->vif.bss_conf.basic_rates = basic_rates;
+ sdata->vif.bss_conf.ssid_len = ifibss->ssid_len;
+ memcpy(sdata->vif.bss_conf.ssid, ifibss->ssid, ifibss->ssid_len);
bss_change = BSS_CHANGED_BEACON_INT;
bss_change |= ieee80211_reset_erp_info(sdata);
bss_change |= BSS_CHANGED_BSSID;
@@ -202,6 +219,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
bss_change |= BSS_CHANGED_BASIC_RATES;
bss_change |= BSS_CHANGED_HT;
bss_change |= BSS_CHANGED_IBSS;
+ bss_change |= BSS_CHANGED_SSID;
/*
* In 5 GHz/802.11a, we can always use short slot time.
@@ -227,7 +245,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL));
bss = cfg80211_inform_bss_frame(local->hw.wiphy, chan,
- mgmt, skb->len, 0, GFP_KERNEL);
+ mgmt, presp->head_len, 0, GFP_KERNEL);
cfg80211_put_bss(local->hw.wiphy, bss);
netif_carrier_on(sdata->dev);
cfg80211_ibss_joined(sdata->dev, ifibss->bssid, GFP_KERNEL);
@@ -448,7 +466,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band];
bool rates_updated = false;
- if (elems->ds_params && elems->ds_params_len == 1)
+ if (elems->ds_params)
freq = ieee80211_channel_to_frequency(elems->ds_params[0],
band);
else
@@ -822,8 +840,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
int tx_last_beacon, len = req->len;
struct sk_buff *skb;
- struct ieee80211_mgmt *resp;
- struct sk_buff *presp;
+ struct beacon_data *presp;
u8 *pos, *end;
lockdep_assert_held(&ifibss->mtx);
@@ -864,13 +881,15 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
}
/* Reply with ProbeResp */
- skb = skb_copy(presp, GFP_KERNEL);
+ skb = dev_alloc_skb(local->tx_headroom + presp->head_len);
if (!skb)
return;
- resp = (struct ieee80211_mgmt *) skb->data;
- memcpy(resp->da, mgmt->sa, ETH_ALEN);
- ibss_dbg(sdata, "Sending ProbeResp to %pM\n", resp->da);
+ skb_reserve(skb, local->tx_headroom);
+ memcpy(skb_put(skb, presp->head_len), presp->head, presp->head_len);
+
+ memcpy(((struct ieee80211_mgmt *) skb->data)->da, mgmt->sa, ETH_ALEN);
+ ibss_dbg(sdata, "Sending ProbeResp to %pM\n", mgmt->sa);
IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
ieee80211_tx_skb(sdata, skb);
}
@@ -895,7 +914,7 @@ void ieee80211_rx_mgmt_probe_beacon(struct ieee80211_sub_if_data *sdata,
return;
ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen,
- &elems);
+ false, &elems);
ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
}
@@ -985,36 +1004,9 @@ static void ieee80211_ibss_timer(unsigned long data)
{
struct ieee80211_sub_if_data *sdata =
(struct ieee80211_sub_if_data *) data;
- struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
- struct ieee80211_local *local = sdata->local;
-
- if (local->quiescing) {
- ifibss->timer_running = true;
- return;
- }
-
- ieee80211_queue_work(&local->hw, &sdata->work);
-}
-
-#ifdef CONFIG_PM
-void ieee80211_ibss_quiesce(struct ieee80211_sub_if_data *sdata)
-{
- struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
- if (del_timer_sync(&ifibss->timer))
- ifibss->timer_running = true;
-}
-
-void ieee80211_ibss_restart(struct ieee80211_sub_if_data *sdata)
-{
- struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
-
- if (ifibss->timer_running) {
- add_timer(&ifibss->timer);
- ifibss->timer_running = false;
- }
+ ieee80211_queue_work(&sdata->local->hw, &sdata->work);
}
-#endif
void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata)
{
@@ -1047,23 +1039,8 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local)
int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
struct cfg80211_ibss_params *params)
{
- struct sk_buff *skb;
u32 changed = 0;
- skb = dev_alloc_skb(sdata->local->hw.extra_tx_headroom +
- sizeof(struct ieee80211_hdr_3addr) +
- 12 /* struct ieee80211_mgmt.u.beacon */ +
- 2 + IEEE80211_MAX_SSID_LEN /* max SSID */ +
- 2 + 8 /* max Supported Rates */ +
- 3 /* max DS params */ +
- 4 /* IBSS params */ +
- 2 + (IEEE80211_MAX_SUPP_RATES - 8) +
- 2 + sizeof(struct ieee80211_ht_cap) +
- 2 + sizeof(struct ieee80211_ht_operation) +
- params->ie_len);
- if (!skb)
- return -ENOMEM;
-
mutex_lock(&sdata->u.ibss.mtx);
if (params->bssid) {
@@ -1092,7 +1069,6 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
sdata->u.ibss.ie_len = params->ie_len;
}
- sdata->u.ibss.skb = skb;
sdata->u.ibss.state = IEEE80211_IBSS_MLME_SEARCH;
sdata->u.ibss.ibss_join_req = jiffies;
@@ -1128,13 +1104,13 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
{
- struct sk_buff *skb;
struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
struct ieee80211_local *local = sdata->local;
struct cfg80211_bss *cbss;
u16 capability;
int active_ibss;
struct sta_info *sta;
+ struct beacon_data *presp;
mutex_lock(&sdata->u.ibss.mtx);
@@ -1180,17 +1156,18 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
/* remove beacon */
kfree(sdata->u.ibss.ie);
- skb = rcu_dereference_protected(sdata->u.ibss.presp,
- lockdep_is_held(&sdata->u.ibss.mtx));
+ presp = rcu_dereference_protected(ifibss->presp,
+ lockdep_is_held(&sdata->u.ibss.mtx));
RCU_INIT_POINTER(sdata->u.ibss.presp, NULL);
sdata->vif.bss_conf.ibss_joined = false;
sdata->vif.bss_conf.ibss_creator = false;
sdata->vif.bss_conf.enable_beacon = false;
+ sdata->vif.bss_conf.ssid_len = 0;
clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);
ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED |
BSS_CHANGED_IBSS);
synchronize_rcu();
- kfree_skb(skb);
+ kfree(presp);
skb_queue_purge(&sdata->skb_queue);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 5672533a0832..9ca8e3278cc0 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -156,6 +156,7 @@ struct ieee80211_tx_data {
struct ieee80211_sub_if_data *sdata;
struct sta_info *sta;
struct ieee80211_key *key;
+ struct ieee80211_tx_rate rate;
unsigned int flags;
};
@@ -316,6 +317,7 @@ struct ieee80211_roc_work {
u32 duration, req_duration;
struct sk_buff *frame;
u64 cookie, mgmt_tx_cookie;
+ enum ieee80211_roc_type type;
};
/* flags used in struct ieee80211_if_managed.flags */
@@ -401,7 +403,6 @@ struct ieee80211_if_managed {
u16 aid;
- unsigned long timers_running; /* used for quiesce/restart */
bool powersave; /* powersave requested for this iface */
bool broken_ap; /* AP is broken -- turn off powersave */
u8 dtim_period;
@@ -443,7 +444,7 @@ struct ieee80211_if_managed {
u8 use_4addr;
- u8 p2p_noa_index;
+ s16 p2p_noa_index;
/* Signal strength from the last Beacon frame in the current BSS. */
int last_beacon_signal;
@@ -480,6 +481,8 @@ struct ieee80211_if_managed {
struct ieee80211_ht_cap ht_capa; /* configured ht-cap over-rides */
struct ieee80211_ht_cap ht_capa_mask; /* Valid parts of ht_capa */
+ struct ieee80211_vht_cap vht_capa; /* configured VHT overrides */
+ struct ieee80211_vht_cap vht_capa_mask; /* Valid parts of vht_capa */
};
struct ieee80211_if_ibss {
@@ -491,8 +494,6 @@ struct ieee80211_if_ibss {
u32 basic_rates;
- bool timer_running;
-
bool fixed_bssid;
bool fixed_channel;
bool privacy;
@@ -509,8 +510,7 @@ struct ieee80211_if_ibss {
unsigned long ibss_join_req;
/* probe response/beacon for IBSS */
- struct sk_buff __rcu *presp;
- struct sk_buff *skb;
+ struct beacon_data __rcu *presp;
spinlock_t incomplete_lock;
struct list_head incomplete_stations;
@@ -544,8 +544,6 @@ struct ieee80211_if_mesh {
struct timer_list mesh_path_timer;
struct timer_list mesh_path_root_timer;
- unsigned long timers_running;
-
unsigned long wrkq_flags;
u8 mesh_id[IEEE80211_MAX_MESH_ID_LEN];
@@ -591,6 +589,7 @@ struct ieee80211_if_mesh {
IEEE80211_MESH_SEC_AUTHED = 0x1,
IEEE80211_MESH_SEC_SECURED = 0x2,
} security;
+ bool user_mpm;
/* Extensible Synchronization Framework */
const struct ieee80211_mesh_sync_ops *sync_ops;
s64 sync_offset_clockdrift_max;
@@ -683,6 +682,8 @@ struct ieee80211_sub_if_data {
/* count for keys needing tailroom space allocation */
int crypto_tx_tailroom_needed_cnt;
+ int crypto_tx_tailroom_pending_dec;
+ struct delayed_work dec_tailroom_needed_wk;
struct net_device *dev;
struct ieee80211_local *local;
@@ -740,6 +741,8 @@ struct ieee80211_sub_if_data {
/* bitmap of allowed (non-MCS) rate indexes for rate control */
u32 rc_rateidx_mask[IEEE80211_NUM_BANDS];
+
+ bool rc_has_mcs_mask[IEEE80211_NUM_BANDS];
u8 rc_rateidx_mcs_mask[IEEE80211_NUM_BANDS][IEEE80211_HT_MCS_MASK_LEN];
union {
@@ -758,7 +761,6 @@ struct ieee80211_sub_if_data {
#ifdef CONFIG_MAC80211_DEBUGFS
struct {
- struct dentry *dir;
struct dentry *subdir_stations;
struct dentry *default_unicast_key;
struct dentry *default_multicast_key;
@@ -766,10 +768,6 @@ struct ieee80211_sub_if_data {
} debugfs;
#endif
-#ifdef CONFIG_PM
- struct ieee80211_bss_conf suspend_bss_conf;
-#endif
-
/* must be last, dynamically sized area in this! */
struct ieee80211_vif vif;
};
@@ -804,11 +802,6 @@ enum sdata_queue_type {
enum {
IEEE80211_RX_MSG = 1,
IEEE80211_TX_STATUS_MSG = 2,
- IEEE80211_EOSP_MSG = 3,
-};
-
-struct skb_eosp_msg_data {
- u8 sta[ETH_ALEN], iface[ETH_ALEN];
};
enum queue_stop_reason {
@@ -819,6 +812,7 @@ enum queue_stop_reason {
IEEE80211_QUEUE_STOP_REASON_SUSPEND,
IEEE80211_QUEUE_STOP_REASON_SKB_ADD,
IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL,
+ IEEE80211_QUEUE_STOP_REASON_FLUSH,
};
#ifdef CONFIG_MAC80211_LEDS
@@ -1029,10 +1023,9 @@ struct ieee80211_local {
enum mac80211_scan_state next_scan_state;
struct delayed_work scan_work;
struct ieee80211_sub_if_data __rcu *scan_sdata;
- struct ieee80211_channel *csa_channel;
+ struct cfg80211_chan_def csa_chandef;
/* For backward compatibility only -- do not use */
- struct ieee80211_channel *_oper_channel;
- enum nl80211_channel_type _oper_channel_type;
+ struct cfg80211_chan_def _oper_chandef;
/* Temporary remain-on-channel for off-channel operations */
struct ieee80211_channel *tmp_channel;
@@ -1137,11 +1130,6 @@ struct ieee80211_local {
struct ieee80211_sub_if_data __rcu *p2p_sdata;
- /* dummy netdev for use w/ NAPI */
- struct net_device napi_dev;
-
- struct napi_struct napi;
-
/* virtual monitor interface */
struct ieee80211_sub_if_data __rcu *monitor_sdata;
struct cfg80211_chan_def monitor_chandef;
@@ -1173,11 +1161,8 @@ struct ieee802_11_elems {
/* pointers to IEs */
const u8 *ssid;
const u8 *supp_rates;
- const u8 *fh_params;
const u8 *ds_params;
- const u8 *cf_params;
const struct ieee80211_tim_ie *tim;
- const u8 *ibss_params;
const u8 *challenge;
const u8 *rsn;
const u8 *erp_info;
@@ -1197,23 +1182,20 @@ struct ieee802_11_elems {
const u8 *perr;
const struct ieee80211_rann_ie *rann;
const struct ieee80211_channel_sw_ie *ch_switch_ie;
+ const struct ieee80211_ext_chansw_ie *ext_chansw_ie;
+ const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie;
const u8 *country_elem;
const u8 *pwr_constr_elem;
- const u8 *quiet_elem; /* first quite element */
- const u8 *timeout_int;
+ const struct ieee80211_timeout_interval_ie *timeout_int;
const u8 *opmode_notif;
+ const struct ieee80211_sec_chan_offs_ie *sec_chan_offs;
/* length of them, respectively */
u8 ssid_len;
u8 supp_rates_len;
- u8 fh_params_len;
- u8 ds_params_len;
- u8 cf_params_len;
u8 tim_len;
- u8 ibss_params_len;
u8 challenge_len;
u8 rsn_len;
- u8 erp_info_len;
u8 ext_supp_rates_len;
u8 wmm_info_len;
u8 wmm_param_len;
@@ -1223,9 +1205,6 @@ struct ieee802_11_elems {
u8 prep_len;
u8 perr_len;
u8 country_elem_len;
- u8 quiet_elem_len;
- u8 num_of_quiet_elem; /* can be more the one */
- u8 timeout_int_len;
/* whether a parse error occurred while retrieving these elements */
bool parse_error;
@@ -1280,12 +1259,6 @@ void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata);
int ieee80211_max_network_latency(struct notifier_block *nb,
unsigned long data, void *dummy);
int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata);
-void
-ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
- const struct ieee80211_channel_sw_ie *sw_elem,
- struct ieee80211_bss *bss, u64 timestamp);
-void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata);
-void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata);
void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata);
void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb);
@@ -1294,6 +1267,7 @@ void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata);
void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata);
void ieee80211_mgd_conn_tx_status(struct ieee80211_sub_if_data *sdata,
__le16 fc, bool acked);
+void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata);
/* IBSS code */
void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local);
@@ -1303,8 +1277,6 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata,
int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
struct cfg80211_ibss_params *params);
int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata);
-void ieee80211_ibss_quiesce(struct ieee80211_sub_if_data *sdata);
-void ieee80211_ibss_restart(struct ieee80211_sub_if_data *sdata);
void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata);
void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb);
@@ -1347,7 +1319,8 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local);
void ieee80211_offchannel_return(struct ieee80211_local *local);
void ieee80211_roc_setup(struct ieee80211_local *local);
void ieee80211_start_next_roc(struct ieee80211_local *local);
-void ieee80211_roc_purge(struct ieee80211_sub_if_data *sdata);
+void ieee80211_roc_purge(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata);
void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free);
void ieee80211_sw_roc_work(struct work_struct *work);
void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc);
@@ -1368,6 +1341,8 @@ void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata,
const int offset);
int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up);
void ieee80211_sdata_stop(struct ieee80211_sub_if_data *sdata);
+int ieee80211_add_virtual_monitor(struct ieee80211_local *local);
+void ieee80211_del_virtual_monitor(struct ieee80211_local *local);
bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata);
void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata);
@@ -1443,6 +1418,8 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta);
void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta, u8 opmode,
enum ieee80211_band band, bool nss_only);
+void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_sta_vht_cap *vht_cap);
/* Spectrum management */
void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
@@ -1520,11 +1497,16 @@ static inline void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata,
ieee80211_tx_skb_tid(sdata, skb, 7);
}
-void ieee802_11_parse_elems(u8 *start, size_t len,
- struct ieee802_11_elems *elems);
-u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
+u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
struct ieee802_11_elems *elems,
u64 filter, u32 crc);
+static inline void ieee802_11_parse_elems(const u8 *start, size_t len,
+ bool action,
+ struct ieee802_11_elems *elems)
+{
+ ieee802_11_parse_elems_crc(start, len, action, elems, 0, 0);
+}
+
u32 ieee80211_mandatory_rates(struct ieee80211_local *local,
enum ieee80211_band band);
@@ -1540,8 +1522,10 @@ void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
struct ieee80211_hdr *hdr, bool ack);
void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
+ unsigned long queues,
enum queue_stop_reason reason);
void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
+ unsigned long queues,
enum queue_stop_reason reason);
void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue,
enum queue_stop_reason reason);
@@ -1558,6 +1542,8 @@ static inline void ieee80211_add_pending_skbs(struct ieee80211_local *local,
{
ieee80211_add_pending_skbs_fn(local, skbs, NULL, NULL);
}
+void ieee80211_flush_queues(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata);
void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
u16 transaction, u16 auth_alg, u16 status,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 9ed49ad0380f..98d20c0f6fed 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1,5 +1,5 @@
/*
- * Interface handling (except master interface)
+ * Interface handling
*
* Copyright 2002-2005, Instant802 Networks, Inc.
* Copyright 2005-2006, Devicescape Software, Inc.
@@ -92,7 +92,7 @@ static u32 __ieee80211_idle_on(struct ieee80211_local *local)
if (local->hw.conf.flags & IEEE80211_CONF_IDLE)
return 0;
- drv_flush(local, false);
+ ieee80211_flush_queues(local, NULL);
local->hw.conf.flags |= IEEE80211_CONF_IDLE;
return IEEE80211_CONF_CHANGE_IDLE;
@@ -159,9 +159,10 @@ static int ieee80211_change_mtu(struct net_device *dev, int new_mtu)
return 0;
}
-static int ieee80211_verify_mac(struct ieee80211_local *local, u8 *addr)
+static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr)
{
- struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_sub_if_data *iter;
u64 new, mask, tmp;
u8 *m;
int ret = 0;
@@ -181,11 +182,14 @@ static int ieee80211_verify_mac(struct ieee80211_local *local, u8 *addr)
mutex_lock(&local->iflist_mtx);
- list_for_each_entry(sdata, &local->interfaces, list) {
- if (sdata->vif.type == NL80211_IFTYPE_MONITOR)
+ list_for_each_entry(iter, &local->interfaces, list) {
+ if (iter == sdata)
+ continue;
+
+ if (iter->vif.type == NL80211_IFTYPE_MONITOR)
continue;
- m = sdata->vif.addr;
+ m = iter->vif.addr;
tmp = ((u64)m[0] << 5*8) | ((u64)m[1] << 4*8) |
((u64)m[2] << 3*8) | ((u64)m[3] << 2*8) |
((u64)m[4] << 1*8) | ((u64)m[5] << 0*8);
@@ -209,7 +213,7 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr)
if (ieee80211_sdata_running(sdata))
return -EBUSY;
- ret = ieee80211_verify_mac(sdata->local, sa->sa_data);
+ ret = ieee80211_verify_mac(sdata, sa->sa_data);
if (ret)
return ret;
@@ -357,7 +361,7 @@ static void ieee80211_set_default_queues(struct ieee80211_sub_if_data *sdata)
sdata->vif.cab_queue = IEEE80211_INVAL_HW_QUEUE;
}
-static int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
+int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
{
struct ieee80211_sub_if_data *sdata;
int ret;
@@ -410,7 +414,7 @@ static int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
return 0;
}
-static void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
+void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
{
struct ieee80211_sub_if_data *sdata;
@@ -474,6 +478,9 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
master->control_port_protocol;
sdata->control_port_no_encrypt =
master->control_port_no_encrypt;
+ sdata->vif.cab_queue = master->vif.cab_queue;
+ memcpy(sdata->vif.hw_queue, master->vif.hw_queue,
+ sizeof(sdata->vif.hw_queue));
break;
}
case NL80211_IFTYPE_AP:
@@ -499,8 +506,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
res = drv_start(local);
if (res)
goto err_del_bss;
- if (local->ops->napi_poll)
- napi_enable(&local->napi);
/* we're brought up, everything changes */
hw_reconf_flags = ~0;
ieee80211_led_radio(local, true);
@@ -573,8 +578,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
goto err_del_interface;
}
- drv_add_interface_debugfs(local, sdata);
-
if (sdata->vif.type == NL80211_IFTYPE_AP) {
local->fif_pspoll++;
local->fif_probe_req++;
@@ -599,7 +602,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
case NL80211_IFTYPE_P2P_DEVICE:
break;
default:
- netif_carrier_on(dev);
+ /* not reached */
+ WARN_ON(1);
}
/*
@@ -656,8 +660,32 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
ieee80211_recalc_ps(local, -1);
- if (dev)
+ if (sdata->vif.type == NL80211_IFTYPE_MONITOR ||
+ sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
+ /* XXX: for AP_VLAN, actually track AP queues */
netif_tx_start_all_queues(dev);
+ } else if (dev) {
+ unsigned long flags;
+ int n_acs = IEEE80211_NUM_ACS;
+ int ac;
+
+ if (local->hw.queues < IEEE80211_NUM_ACS)
+ n_acs = 1;
+
+ spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
+ if (sdata->vif.cab_queue == IEEE80211_INVAL_HW_QUEUE ||
+ (local->queue_stop_reasons[sdata->vif.cab_queue] == 0 &&
+ skb_queue_empty(&local->pending[sdata->vif.cab_queue]))) {
+ for (ac = 0; ac < n_acs; ac++) {
+ int ac_queue = sdata->vif.hw_queue[ac];
+
+ if (local->queue_stop_reasons[ac_queue] == 0 &&
+ skb_queue_empty(&local->pending[ac_queue]))
+ netif_start_subqueue(dev, ac);
+ }
+ }
+ spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
+ }
return 0;
err_del_interface:
@@ -711,7 +739,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
if (sdata->dev)
netif_tx_stop_all_queues(sdata->dev);
- ieee80211_roc_purge(sdata);
+ ieee80211_roc_purge(local, sdata);
if (sdata->vif.type == NL80211_IFTYPE_STATION)
ieee80211_mgd_stop(sdata);
@@ -736,12 +764,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
WARN_ON_ONCE((sdata->vif.type != NL80211_IFTYPE_WDS && flushed > 0) ||
(sdata->vif.type == NL80211_IFTYPE_WDS && flushed != 1));
- /*
- * Don't count this interface for promisc/allmulti while it
- * is down. dev_mc_unsync() will invoke set_multicast_list
- * on the master interface which will sync these down to the
- * hardware as filter flags.
- */
+ /* don't count this interface for promisc/allmulti while it is down */
if (sdata->flags & IEEE80211_SDATA_ALLMULTI)
atomic_dec(&local->iff_allmultis);
@@ -762,8 +785,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
sdata->dev->addr_len);
spin_unlock_bh(&local->filter_lock);
netif_addr_unlock_bh(sdata->dev);
-
- ieee80211_configure_filter(local);
}
del_timer_sync(&local->dynamic_ps_timer);
@@ -774,6 +795,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
cancel_delayed_work_sync(&sdata->dfs_cac_timer_work);
if (sdata->wdev.cac_started) {
+ WARN_ON(local->suspended);
mutex_lock(&local->iflist_mtx);
ieee80211_vif_release_channel(sdata);
mutex_unlock(&local->iflist_mtx);
@@ -824,14 +846,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
if (local->monitors == 0) {
local->hw.conf.flags &= ~IEEE80211_CONF_MONITOR;
hw_reconf_flags |= IEEE80211_CONF_CHANGE_MONITOR;
- ieee80211_del_virtual_monitor(local);
}
ieee80211_adjust_monitor_flags(sdata, -1);
- ieee80211_configure_filter(local);
- mutex_lock(&local->mtx);
- ieee80211_recalc_idle(local);
- mutex_unlock(&local->mtx);
break;
case NL80211_IFTYPE_P2P_DEVICE:
/* relies on synchronize_rcu() below */
@@ -844,46 +861,28 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
*
* sta_info_flush_cleanup() requires rcu_barrier()
* first to wait for the station call_rcu() calls
- * to complete, here we need at least sychronize_rcu()
- * it to wait for the RX path in case it is using the
+ * to complete, and we also need synchronize_rcu()
+ * to wait for the RX path in case it is using the
* interface and enqueuing frames at this very time on
* another CPU.
*/
+ synchronize_rcu();
rcu_barrier();
sta_info_flush_cleanup(sdata);
- skb_queue_purge(&sdata->skb_queue);
-
/*
* Free all remaining keys, there shouldn't be any,
- * except maybe group keys in AP more or WDS?
+ * except maybe in WDS mode?
*/
ieee80211_free_keys(sdata);
- drv_remove_interface_debugfs(local, sdata);
-
- if (going_down)
- drv_remove_interface(local, sdata);
+ /* fall through */
+ case NL80211_IFTYPE_AP:
+ skb_queue_purge(&sdata->skb_queue);
}
sdata->bss = NULL;
- ieee80211_recalc_ps(local, -1);
-
- if (local->open_count == 0) {
- if (local->ops->napi_poll)
- napi_disable(&local->napi);
- ieee80211_clear_tx_pending(local);
- ieee80211_stop_device(local);
-
- /* no reconfiguring after stop! */
- hw_reconf_flags = 0;
- }
-
- /* do after stop to avoid reconfiguring when we stop anyway */
- if (hw_reconf_flags)
- ieee80211_hw_config(local, hw_reconf_flags);
-
spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
for (i = 0; i < IEEE80211_MAX_QUEUES; i++) {
skb_queue_walk_safe(&local->pending[i], skb, tmp) {
@@ -896,7 +895,54 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
}
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
- if (local->monitors == local->open_count && local->monitors > 0)
+ if (local->open_count == 0)
+ ieee80211_clear_tx_pending(local);
+
+ /*
+ * If the interface goes down while suspended, presumably because
+ * the device was unplugged and that happens before our resume,
+ * then the driver is already unconfigured and the remainder of
+ * this function isn't needed.
+ * XXX: what about WoWLAN? If the device has software state, e.g.
+ * memory allocated, it might expect teardown commands from
+ * mac80211 here?
+ */
+ if (local->suspended) {
+ WARN_ON(local->wowlan);
+ WARN_ON(rtnl_dereference(local->monitor_sdata));
+ return;
+ }
+
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_AP_VLAN:
+ break;
+ case NL80211_IFTYPE_MONITOR:
+ if (local->monitors == 0)
+ ieee80211_del_virtual_monitor(local);
+
+ mutex_lock(&local->mtx);
+ ieee80211_recalc_idle(local);
+ mutex_unlock(&local->mtx);
+ break;
+ default:
+ if (going_down)
+ drv_remove_interface(local, sdata);
+ }
+
+ ieee80211_recalc_ps(local, -1);
+
+ if (local->open_count == 0) {
+ ieee80211_stop_device(local);
+
+ /* no reconfiguring after stop! */
+ return;
+ }
+
+ /* do after stop to avoid reconfiguring when we stop anyway */
+ ieee80211_configure_filter(local);
+ ieee80211_hw_config(local, hw_reconf_flags);
+
+ if (local->monitors == local->open_count)
ieee80211_add_virtual_monitor(local);
}
@@ -935,6 +981,17 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
atomic_dec(&local->iff_promiscs);
sdata->flags ^= IEEE80211_SDATA_PROMISC;
}
+
+ /*
+ * TODO: If somebody needs this on AP interfaces,
+ * it can be enabled easily but multicast
+ * addresses from VLANs need to be synced.
+ */
+ if (sdata->vif.type != NL80211_IFTYPE_MONITOR &&
+ sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
+ sdata->vif.type != NL80211_IFTYPE_AP)
+ drv_set_multicast_list(local, sdata, &dev->mc);
+
spin_lock_bh(&local->filter_lock);
__hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len);
spin_unlock_bh(&local->filter_lock);
@@ -1433,7 +1490,17 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local,
break;
}
+ /*
+ * Pick address of existing interface in case user changed
+ * MAC address manually, default to perm_addr.
+ */
m = local->hw.wiphy->perm_addr;
+ list_for_each_entry(sdata, &local->interfaces, list) {
+ if (sdata->vif.type == NL80211_IFTYPE_MONITOR)
+ continue;
+ m = sdata->vif.addr;
+ break;
+ }
start = ((u64)m[0] << 5*8) | ((u64)m[1] << 4*8) |
((u64)m[2] << 3*8) | ((u64)m[3] << 2*8) |
((u64)m[4] << 1*8) | ((u64)m[5] << 0*8);
@@ -1561,6 +1628,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
INIT_WORK(&sdata->cleanup_stations_wk, ieee80211_cleanup_sdata_stas_wk);
INIT_DELAYED_WORK(&sdata->dfs_cac_timer_work,
ieee80211_dfs_cac_timer_work);
+ INIT_DELAYED_WORK(&sdata->dec_tailroom_needed_wk,
+ ieee80211_delayed_tailroom_dec);
for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
struct ieee80211_supported_band *sband;
@@ -1648,6 +1717,15 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local)
ASSERT_RTNL();
+ /*
+ * Close all AP_VLAN interfaces first, as otherwise they
+ * might be closed while the AP interface they belong to
+ * is closed, causing unregister_netdevice_many() to crash.
+ */
+ list_for_each_entry(sdata, &local->interfaces, list)
+ if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+ dev_close(sdata->dev);
+
mutex_lock(&local->iflist_mtx);
list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) {
list_del(&sdata->list);
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index ef252eb58c36..67059b88fea5 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -248,11 +248,11 @@ void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
}
-static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
- struct sta_info *sta,
- bool pairwise,
- struct ieee80211_key *old,
- struct ieee80211_key *new)
+static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta,
+ bool pairwise,
+ struct ieee80211_key *old,
+ struct ieee80211_key *new)
{
int idx;
bool defunikey, defmultikey, defmgmtkey;
@@ -397,7 +397,41 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
return key;
}
-static void __ieee80211_key_destroy(struct ieee80211_key *key)
+static void ieee80211_key_free_common(struct ieee80211_key *key)
+{
+ if (key->conf.cipher == WLAN_CIPHER_SUITE_CCMP)
+ ieee80211_aes_key_free(key->u.ccmp.tfm);
+ if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC)
+ ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
+ kfree(key);
+}
+
+static void __ieee80211_key_destroy(struct ieee80211_key *key,
+ bool delay_tailroom)
+{
+ if (key->local)
+ ieee80211_key_disable_hw_accel(key);
+
+ if (key->local) {
+ struct ieee80211_sub_if_data *sdata = key->sdata;
+
+ ieee80211_debugfs_key_remove(key);
+
+ if (delay_tailroom) {
+ /* see ieee80211_delayed_tailroom_dec */
+ sdata->crypto_tx_tailroom_pending_dec++;
+ schedule_delayed_work(&sdata->dec_tailroom_needed_wk,
+ HZ/2);
+ } else {
+ sdata->crypto_tx_tailroom_needed_cnt--;
+ }
+ }
+
+ ieee80211_key_free_common(key);
+}
+
+static void ieee80211_key_destroy(struct ieee80211_key *key,
+ bool delay_tailroom)
{
if (!key)
return;
@@ -408,19 +442,13 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key)
*/
synchronize_net();
- if (key->local)
- ieee80211_key_disable_hw_accel(key);
-
- if (key->conf.cipher == WLAN_CIPHER_SUITE_CCMP)
- ieee80211_aes_key_free(key->u.ccmp.tfm);
- if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC)
- ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
- if (key->local) {
- ieee80211_debugfs_key_remove(key);
- key->sdata->crypto_tx_tailroom_needed_cnt--;
- }
+ __ieee80211_key_destroy(key, delay_tailroom);
+}
- kfree(key);
+void ieee80211_key_free_unused(struct ieee80211_key *key)
+{
+ WARN_ON(key->sdata || key->local);
+ ieee80211_key_free_common(key);
}
int ieee80211_key_link(struct ieee80211_key *key,
@@ -440,32 +468,6 @@ int ieee80211_key_link(struct ieee80211_key *key,
key->sdata = sdata;
key->sta = sta;
- if (sta) {
- /*
- * some hardware cannot handle TKIP with QoS, so
- * we indicate whether QoS could be in use.
- */
- if (test_sta_flag(sta, WLAN_STA_WME))
- key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA;
- } else {
- if (sdata->vif.type == NL80211_IFTYPE_STATION) {
- struct sta_info *ap;
-
- /*
- * We're getting a sta pointer in, so must be under
- * appropriate locking for sta_info_get().
- */
-
- /* same here, the AP could be using QoS */
- ap = sta_info_get(key->sdata, key->sdata->u.mgd.bssid);
- if (ap) {
- if (test_sta_flag(ap, WLAN_STA_WME))
- key->conf.flags |=
- IEEE80211_KEY_FLAG_WMM_STA;
- }
- }
- }
-
mutex_lock(&sdata->local->key_mtx);
if (sta && pairwise)
@@ -477,19 +479,22 @@ int ieee80211_key_link(struct ieee80211_key *key,
increment_tailroom_need_count(sdata);
- __ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
- __ieee80211_key_destroy(old_key);
+ ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
+ ieee80211_key_destroy(old_key, true);
ieee80211_debugfs_key_add(key);
ret = ieee80211_key_enable_hw_accel(key);
+ if (ret)
+ ieee80211_key_free(key, true);
+
mutex_unlock(&sdata->local->key_mtx);
return ret;
}
-void __ieee80211_key_free(struct ieee80211_key *key)
+void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom)
{
if (!key)
return;
@@ -498,18 +503,10 @@ void __ieee80211_key_free(struct ieee80211_key *key)
* Replace key with nothingness if it was ever used.
*/
if (key->sdata)
- __ieee80211_key_replace(key->sdata, key->sta,
+ ieee80211_key_replace(key->sdata, key->sta,
key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
key, NULL);
- __ieee80211_key_destroy(key);
-}
-
-void ieee80211_key_free(struct ieee80211_local *local,
- struct ieee80211_key *key)
-{
- mutex_lock(&local->key_mtx);
- __ieee80211_key_free(key);
- mutex_unlock(&local->key_mtx);
+ ieee80211_key_destroy(key, delay_tailroom);
}
void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata)
@@ -566,36 +563,109 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw,
}
EXPORT_SYMBOL(ieee80211_iter_keys);
-void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata)
+void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata)
{
- struct ieee80211_key *key;
+ struct ieee80211_key *key, *tmp;
+ LIST_HEAD(keys);
- ASSERT_RTNL();
+ cancel_delayed_work_sync(&sdata->dec_tailroom_needed_wk);
mutex_lock(&sdata->local->key_mtx);
- list_for_each_entry(key, &sdata->key_list, list)
- ieee80211_key_disable_hw_accel(key);
+ sdata->crypto_tx_tailroom_needed_cnt -=
+ sdata->crypto_tx_tailroom_pending_dec;
+ sdata->crypto_tx_tailroom_pending_dec = 0;
+
+ ieee80211_debugfs_key_remove_mgmt_default(sdata);
+
+ list_for_each_entry_safe(key, tmp, &sdata->key_list, list) {
+ ieee80211_key_replace(key->sdata, key->sta,
+ key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
+ key, NULL);
+ list_add_tail(&key->list, &keys);
+ }
+
+ ieee80211_debugfs_key_update_default(sdata);
+
+ if (!list_empty(&keys)) {
+ synchronize_net();
+ list_for_each_entry_safe(key, tmp, &keys, list)
+ __ieee80211_key_destroy(key, false);
+ }
+
+ WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt ||
+ sdata->crypto_tx_tailroom_pending_dec);
mutex_unlock(&sdata->local->key_mtx);
}
-void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata)
+void ieee80211_free_sta_keys(struct ieee80211_local *local,
+ struct sta_info *sta)
{
struct ieee80211_key *key, *tmp;
+ LIST_HEAD(keys);
+ int i;
- mutex_lock(&sdata->local->key_mtx);
+ mutex_lock(&local->key_mtx);
+ for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
+ key = key_mtx_dereference(local, sta->gtk[i]);
+ if (!key)
+ continue;
+ ieee80211_key_replace(key->sdata, key->sta,
+ key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
+ key, NULL);
+ list_add(&key->list, &keys);
+ }
- ieee80211_debugfs_key_remove_mgmt_default(sdata);
+ key = key_mtx_dereference(local, sta->ptk);
+ if (key) {
+ ieee80211_key_replace(key->sdata, key->sta,
+ key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
+ key, NULL);
+ list_add(&key->list, &keys);
+ }
- list_for_each_entry_safe(key, tmp, &sdata->key_list, list)
- __ieee80211_key_free(key);
+ /*
+ * NB: the station code relies on this being
+ * done even if there aren't any keys
+ */
+ synchronize_net();
- ieee80211_debugfs_key_update_default(sdata);
+ list_for_each_entry_safe(key, tmp, &keys, list)
+ __ieee80211_key_destroy(key, true);
- mutex_unlock(&sdata->local->key_mtx);
+ mutex_unlock(&local->key_mtx);
}
+void ieee80211_delayed_tailroom_dec(struct work_struct *wk)
+{
+ struct ieee80211_sub_if_data *sdata;
+
+ sdata = container_of(wk, struct ieee80211_sub_if_data,
+ dec_tailroom_needed_wk.work);
+
+ /*
+ * The reason for the delayed tailroom needed decrementing is to
+ * make roaming faster: during roaming, all keys are first deleted
+ * and then new keys are installed. The first new key causes the
+ * crypto_tx_tailroom_needed_cnt to go from 0 to 1, which invokes
+ * the cost of synchronize_net() (which can be slow). Avoid this
+ * by deferring the crypto_tx_tailroom_needed_cnt decrementing on
+ * key removal for a while, so if we roam the value is larger than
+ * zero and no 0->1 transition happens.
+ *
+ * The cost is that if the AP switching was from an AP with keys
+ * to one without, we still allocate tailroom while it would no
+ * longer be needed. However, in the typical (fast) roaming case
+ * within an ESS this usually won't happen.
+ */
+
+ mutex_lock(&sdata->local->key_mtx);
+ sdata->crypto_tx_tailroom_needed_cnt -=
+ sdata->crypto_tx_tailroom_pending_dec;
+ sdata->crypto_tx_tailroom_pending_dec = 0;
+ mutex_unlock(&sdata->local->key_mtx);
+}
void ieee80211_gtk_rekey_notify(struct ieee80211_vif *vif, const u8 *bssid,
const u8 *replay_ctr, gfp_t gfp)
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 382dc44ed330..e8de3e6d7804 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -129,23 +129,25 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
size_t seq_len, const u8 *seq);
/*
* Insert a key into data structures (sdata, sta if necessary)
- * to make it used, free old key.
+ * to make it used, free old key. On failure, also free the new key.
*/
-int __must_check ieee80211_key_link(struct ieee80211_key *key,
- struct ieee80211_sub_if_data *sdata,
- struct sta_info *sta);
-void __ieee80211_key_free(struct ieee80211_key *key);
-void ieee80211_key_free(struct ieee80211_local *local,
- struct ieee80211_key *key);
+int ieee80211_key_link(struct ieee80211_key *key,
+ struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta);
+void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom);
+void ieee80211_key_free_unused(struct ieee80211_key *key);
void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx,
bool uni, bool multi);
void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
int idx);
void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata);
+void ieee80211_free_sta_keys(struct ieee80211_local *local,
+ struct sta_info *sta);
void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata);
-void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata);
#define key_mtx_dereference(local, ref) \
rcu_dereference_protected(ref, lockdep_is_held(&((local)->key_mtx)))
+void ieee80211_delayed_tailroom_dec(struct work_struct *wk);
+
#endif /* IEEE80211_KEY_H */
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 1a8591b77a13..8eae74ac4e1e 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -95,43 +95,47 @@ static void ieee80211_reconfig_filter(struct work_struct *work)
static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)
{
struct ieee80211_sub_if_data *sdata;
- struct ieee80211_channel *chan;
+ struct cfg80211_chan_def chandef = {};
u32 changed = 0;
int power;
- enum nl80211_channel_type channel_type;
u32 offchannel_flag;
- bool scanning = false;
offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
+
if (local->scan_channel) {
- chan = local->scan_channel;
+ chandef.chan = local->scan_channel;
/* If scanning on oper channel, use whatever channel-type
* is currently in use.
*/
- if (chan == local->_oper_channel)
- channel_type = local->_oper_channel_type;
- else
- channel_type = NL80211_CHAN_NO_HT;
+ if (chandef.chan == local->_oper_chandef.chan) {
+ chandef = local->_oper_chandef;
+ } else {
+ chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
+ chandef.center_freq1 = chandef.chan->center_freq;
+ }
} else if (local->tmp_channel) {
- chan = local->tmp_channel;
- channel_type = NL80211_CHAN_NO_HT;
- } else {
- chan = local->_oper_channel;
- channel_type = local->_oper_channel_type;
- }
-
- if (chan != local->_oper_channel ||
- channel_type != local->_oper_channel_type)
+ chandef.chan = local->tmp_channel;
+ chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
+ chandef.center_freq1 = chandef.chan->center_freq;
+ } else
+ chandef = local->_oper_chandef;
+
+ WARN(!cfg80211_chandef_valid(&chandef),
+ "control:%d MHz width:%d center: %d/%d MHz",
+ chandef.chan->center_freq, chandef.width,
+ chandef.center_freq1, chandef.center_freq2);
+
+ if (!cfg80211_chandef_identical(&chandef, &local->_oper_chandef))
local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;
else
local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL;
offchannel_flag ^= local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
- if (offchannel_flag || chan != local->hw.conf.channel ||
- channel_type != local->hw.conf.channel_type) {
- local->hw.conf.channel = chan;
- local->hw.conf.channel_type = channel_type;
+ if (offchannel_flag ||
+ !cfg80211_chandef_identical(&local->hw.conf.chandef,
+ &local->_oper_chandef)) {
+ local->hw.conf.chandef = chandef;
changed |= IEEE80211_CONF_CHANGE_CHANNEL;
}
@@ -147,10 +151,7 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)
changed |= IEEE80211_CONF_CHANGE_SMPS;
}
- scanning = test_bit(SCAN_SW_SCANNING, &local->scanning) ||
- test_bit(SCAN_ONCHANNEL_SCANNING, &local->scanning) ||
- test_bit(SCAN_HW_SCANNING, &local->scanning);
- power = chan->max_power;
+ power = chandef.chan->max_power;
rcu_read_lock();
list_for_each_entry_rcu(sdata, &local->interfaces, list) {
@@ -226,8 +227,6 @@ u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata)
static void ieee80211_tasklet_handler(unsigned long data)
{
struct ieee80211_local *local = (struct ieee80211_local *) data;
- struct sta_info *sta, *tmp;
- struct skb_eosp_msg_data *eosp_data;
struct sk_buff *skb;
while ((skb = skb_dequeue(&local->skb_queue)) ||
@@ -243,18 +242,6 @@ static void ieee80211_tasklet_handler(unsigned long data)
skb->pkt_type = 0;
ieee80211_tx_status(&local->hw, skb);
break;
- case IEEE80211_EOSP_MSG:
- eosp_data = (void *)skb->cb;
- for_each_sta_info(local, eosp_data->sta, sta, tmp) {
- /* skip wrong virtual interface */
- if (memcmp(eosp_data->iface,
- sta->sdata->vif.addr, ETH_ALEN))
- continue;
- clear_sta_flag(sta, WLAN_STA_SP);
- break;
- }
- dev_kfree_skb(skb);
- break;
default:
WARN(1, "mac80211: Packet is of unknown type %d\n",
skb->pkt_type);
@@ -295,8 +282,8 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw)
"Hardware restart was requested\n");
/* use this reason, ieee80211_reconfig will unblock it */
- ieee80211_stop_queues_by_reason(hw,
- IEEE80211_QUEUE_STOP_REASON_SUSPEND);
+ ieee80211_stop_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
+ IEEE80211_QUEUE_STOP_REASON_SUSPEND);
/*
* Stop all Rx during the reconfig. We don't want state changes
@@ -399,30 +386,6 @@ static int ieee80211_ifa6_changed(struct notifier_block *nb,
}
#endif
-static int ieee80211_napi_poll(struct napi_struct *napi, int budget)
-{
- struct ieee80211_local *local =
- container_of(napi, struct ieee80211_local, napi);
-
- return local->ops->napi_poll(&local->hw, budget);
-}
-
-void ieee80211_napi_schedule(struct ieee80211_hw *hw)
-{
- struct ieee80211_local *local = hw_to_local(hw);
-
- napi_schedule(&local->napi);
-}
-EXPORT_SYMBOL(ieee80211_napi_schedule);
-
-void ieee80211_napi_complete(struct ieee80211_hw *hw)
-{
- struct ieee80211_local *local = hw_to_local(hw);
-
- napi_complete(&local->napi);
-}
-EXPORT_SYMBOL(ieee80211_napi_complete);
-
/* There isn't a lot of sense in it, but you can transmit anything you like */
static const struct ieee80211_txrx_stypes
ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = {
@@ -501,6 +464,27 @@ static const struct ieee80211_ht_cap mac80211_ht_capa_mod_mask = {
},
};
+static const struct ieee80211_vht_cap mac80211_vht_capa_mod_mask = {
+ .vht_cap_info =
+ cpu_to_le32(IEEE80211_VHT_CAP_RXLDPC |
+ IEEE80211_VHT_CAP_SHORT_GI_80 |
+ IEEE80211_VHT_CAP_SHORT_GI_160 |
+ IEEE80211_VHT_CAP_RXSTBC_1 |
+ IEEE80211_VHT_CAP_RXSTBC_2 |
+ IEEE80211_VHT_CAP_RXSTBC_3 |
+ IEEE80211_VHT_CAP_RXSTBC_4 |
+ IEEE80211_VHT_CAP_TXSTBC |
+ IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE |
+ IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE |
+ IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN |
+ IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN |
+ IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK),
+ .supp_mcs = {
+ .rx_mcs_map = cpu_to_le16(~0),
+ .tx_mcs_map = cpu_to_le16(~0),
+ },
+};
+
static const u8 extended_capabilities[] = {
0, 0, 0, 0, 0, 0, 0,
WLAN_EXT_CAPA8_OPMODE_NOTIF,
@@ -572,7 +556,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
wiphy->features |= NL80211_FEATURE_SK_TX_STATUS |
NL80211_FEATURE_SAE |
NL80211_FEATURE_HT_IBSS |
- NL80211_FEATURE_VIF_TXPOWER;
+ NL80211_FEATURE_VIF_TXPOWER |
+ NL80211_FEATURE_USERSPACE_MPM;
if (!ops->hw_scan)
wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN |
@@ -607,8 +592,11 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
IEEE80211_RADIOTAP_MCS_HAVE_BW;
local->hw.radiotap_vht_details = IEEE80211_RADIOTAP_VHT_KNOWN_GI |
IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH;
+ local->hw.uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES;
+ local->hw.uapsd_max_sp_len = IEEE80211_DEFAULT_MAX_SP_LEN;
local->user_power_level = IEEE80211_UNSET_POWER_LEVEL;
wiphy->ht_capa_mod_mask = &mac80211_ht_capa_mod_mask;
+ wiphy->vht_capa_mod_mask = &mac80211_vht_capa_mod_mask;
INIT_LIST_HEAD(&local->interfaces);
@@ -664,9 +652,6 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
skb_queue_head_init(&local->skb_queue);
skb_queue_head_init(&local->skb_queue_unreliable);
- /* init dummy netdev for use w/ NAPI */
- init_dummy_netdev(&local->napi_dev);
-
ieee80211_led_names(local);
ieee80211_roc_setup(local);
@@ -683,6 +668,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
int channels, max_bitrates;
bool supp_ht, supp_vht;
netdev_features_t feature_whitelist;
+ struct cfg80211_chan_def dflt_chandef = {};
static const u32 cipher_suites[] = {
/* keep WEP first, it may be removed below */
WLAN_CIPHER_SUITE_WEP40,
@@ -760,15 +746,19 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
sband = local->hw.wiphy->bands[band];
if (!sband)
continue;
- if (!local->use_chanctx && !local->_oper_channel) {
+
+ if (!dflt_chandef.chan) {
+ cfg80211_chandef_create(&dflt_chandef,
+ &sband->channels[0],
+ NL80211_CHAN_NO_HT);
/* init channel we're on */
- local->hw.conf.channel =
- local->_oper_channel = &sband->channels[0];
- local->hw.conf.channel_type = NL80211_CHAN_NO_HT;
+ if (!local->use_chanctx && !local->_oper_chandef.chan) {
+ local->hw.conf.chandef = dflt_chandef;
+ local->_oper_chandef = dflt_chandef;
+ }
+ local->monitor_chandef = dflt_chandef;
}
- cfg80211_chandef_create(&local->monitor_chandef,
- &sband->channels[0],
- NL80211_CHAN_NO_HT);
+
channels += sband->n_channels;
if (max_bitrates < sband->n_bitrates)
@@ -851,22 +841,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
if (supp_ht)
local->scan_ies_len += 2 + sizeof(struct ieee80211_ht_cap);
- if (supp_vht) {
+ if (supp_vht)
local->scan_ies_len +=
2 + sizeof(struct ieee80211_vht_cap);
- /*
- * (for now at least), drivers wanting to use VHT must
- * support channel contexts, as they contain all the
- * necessary VHT information and the global hw config
- * doesn't (yet)
- */
- if (WARN_ON(!local->use_chanctx)) {
- result = -EINVAL;
- goto fail_wiphy_register;
- }
- }
-
if (!local->ops->hw_scan) {
/* For hw_scan, driver needs to set these up. */
local->hw.wiphy->max_scan_ssids = 4;
@@ -943,7 +921,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
hw->queues = IEEE80211_MAX_QUEUES;
local->workqueue =
- alloc_ordered_workqueue(wiphy_name(local->hw.wiphy), 0);
+ alloc_ordered_workqueue("%s", 0, wiphy_name(local->hw.wiphy));
if (!local->workqueue) {
result = -ENOMEM;
goto fail_workqueue;
@@ -1021,9 +999,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
goto fail_ifa6;
#endif
- netif_napi_add(&local->napi_dev, &local->napi, ieee80211_napi_poll,
- local->hw.napi_weight);
-
return 0;
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 4749b3858695..6952760881c8 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -13,10 +13,6 @@
#include "ieee80211_i.h"
#include "mesh.h"
-#define TMR_RUNNING_HK 0
-#define TMR_RUNNING_MP 1
-#define TMR_RUNNING_MPR 2
-
static int mesh_allocated;
static struct kmem_cache *rm_cache;
@@ -50,11 +46,6 @@ static void ieee80211_mesh_housekeeping_timer(unsigned long data)
set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags);
- if (local->quiescing) {
- set_bit(TMR_RUNNING_HK, &ifmsh->timers_running);
- return;
- }
-
ieee80211_queue_work(&local->hw, &sdata->work);
}
@@ -165,7 +156,7 @@ void mesh_sta_cleanup(struct sta_info *sta)
* an update.
*/
changed = mesh_accept_plinks_update(sdata);
- if (sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) {
+ if (!sdata->u.mesh.user_mpm) {
changed |= mesh_plink_deactivate(sta);
del_timer_sync(&sta->plink_timer);
}
@@ -479,15 +470,8 @@ static void ieee80211_mesh_path_timer(unsigned long data)
{
struct ieee80211_sub_if_data *sdata =
(struct ieee80211_sub_if_data *) data;
- struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
- struct ieee80211_local *local = sdata->local;
-
- if (local->quiescing) {
- set_bit(TMR_RUNNING_MP, &ifmsh->timers_running);
- return;
- }
- ieee80211_queue_work(&local->hw, &sdata->work);
+ ieee80211_queue_work(&sdata->local->hw, &sdata->work);
}
static void ieee80211_mesh_path_root_timer(unsigned long data)
@@ -495,16 +479,10 @@ static void ieee80211_mesh_path_root_timer(unsigned long data)
struct ieee80211_sub_if_data *sdata =
(struct ieee80211_sub_if_data *) data;
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
- struct ieee80211_local *local = sdata->local;
set_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags);
- if (local->quiescing) {
- set_bit(TMR_RUNNING_MPR, &ifmsh->timers_running);
- return;
- }
-
- ieee80211_queue_work(&local->hw, &sdata->work);
+ ieee80211_queue_work(&sdata->local->hw, &sdata->work);
}
void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh)
@@ -622,35 +600,6 @@ static void ieee80211_mesh_rootpath(struct ieee80211_sub_if_data *sdata)
round_jiffies(TU_TO_EXP_TIME(interval)));
}
-#ifdef CONFIG_PM
-void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata)
-{
- struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
-
- /* use atomic bitops in case all timers fire at the same time */
-
- if (del_timer_sync(&ifmsh->housekeeping_timer))
- set_bit(TMR_RUNNING_HK, &ifmsh->timers_running);
- if (del_timer_sync(&ifmsh->mesh_path_timer))
- set_bit(TMR_RUNNING_MP, &ifmsh->timers_running);
- if (del_timer_sync(&ifmsh->mesh_path_root_timer))
- set_bit(TMR_RUNNING_MPR, &ifmsh->timers_running);
-}
-
-void ieee80211_mesh_restart(struct ieee80211_sub_if_data *sdata)
-{
- struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
-
- if (test_and_clear_bit(TMR_RUNNING_HK, &ifmsh->timers_running))
- add_timer(&ifmsh->housekeeping_timer);
- if (test_and_clear_bit(TMR_RUNNING_MP, &ifmsh->timers_running))
- add_timer(&ifmsh->mesh_path_timer);
- if (test_and_clear_bit(TMR_RUNNING_MPR, &ifmsh->timers_running))
- add_timer(&ifmsh->mesh_path_root_timer);
- ieee80211_mesh_root_setup(ifmsh);
-}
-#endif
-
static int
ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
{
@@ -750,10 +699,8 @@ out_free:
static int
ieee80211_mesh_rebuild_beacon(struct ieee80211_if_mesh *ifmsh)
{
- struct ieee80211_sub_if_data *sdata;
struct beacon_data *old_bcn;
int ret;
- sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh);
mutex_lock(&ifmsh->mtx);
@@ -871,8 +818,6 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
local->fif_other_bss--;
atomic_dec(&local->iff_allmultis);
ieee80211_configure_filter(local);
-
- sdata->u.mesh.timers_running = 0;
}
static void
@@ -886,15 +831,14 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *hdr;
struct ieee802_11_elems elems;
size_t baselen;
- u8 *pos, *end;
+ u8 *pos;
- end = ((u8 *) mgmt) + len;
pos = mgmt->u.probe_req.variable;
baselen = (u8 *) pos - (u8 *) mgmt;
if (baselen > len)
return;
- ieee802_11_parse_elems(pos, len - baselen, &elems);
+ ieee802_11_parse_elems(pos, len - baselen, false, &elems);
/* 802.11-2012 10.1.4.3.2 */
if ((!ether_addr_equal(mgmt->da, sdata->vif.addr) &&
@@ -955,7 +899,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
return;
ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen,
- &elems);
+ false, &elems);
/* ignore non-mesh or secure / unsecure mismatch */
if ((!elems.mesh_id || !elems.mesh_config) ||
@@ -963,7 +907,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
(!elems.rsn && sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE))
return;
- if (elems.ds_params && elems.ds_params_len == 1)
+ if (elems.ds_params)
freq = ieee80211_channel_to_frequency(elems.ds_params[0], band);
else
freq = rx_status->freq;
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 336c88a16687..da158774eebb 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -275,7 +275,8 @@ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop);
void mesh_path_expire(struct ieee80211_sub_if_data *sdata);
void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *mgmt, size_t len);
-int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst);
+struct mesh_path *
+mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst);
int mesh_path_add_gate(struct mesh_path *mpath);
int mesh_path_send_to_gates(struct mesh_path *mpath);
@@ -313,8 +314,6 @@ void mesh_path_timer(unsigned long data);
void mesh_path_flush_by_nexthop(struct sta_info *sta);
void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb);
-void mesh_path_quiesce(struct ieee80211_sub_if_data *sdata);
-void mesh_path_restart(struct ieee80211_sub_if_data *sdata);
void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata);
bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt);
@@ -359,22 +358,12 @@ static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local);
-void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata);
-void ieee80211_mesh_restart(struct ieee80211_sub_if_data *sdata);
-void mesh_plink_quiesce(struct sta_info *sta);
-void mesh_plink_restart(struct sta_info *sta);
void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata);
void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata);
void ieee80211s_stop(void);
#else
static inline void
ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) {}
-static inline void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata)
-{}
-static inline void ieee80211_mesh_restart(struct ieee80211_sub_if_data *sdata)
-{}
-static inline void mesh_plink_quiesce(struct sta_info *sta) {}
-static inline void mesh_plink_restart(struct sta_info *sta) {}
static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
{ return false; }
static inline void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index bdb8d3b14587..486819cd02cd 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -144,7 +144,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
*pos++ = WLAN_EID_PREQ;
break;
case MPATH_PREP:
- mhwmp_dbg(sdata, "sending PREP to %pM\n", target);
+ mhwmp_dbg(sdata, "sending PREP to %pM\n", orig_addr);
ie_len = 31;
pos = skb_put(skb, 2 + ie_len);
*pos++ = WLAN_EID_PREP;
@@ -445,9 +445,8 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
}
}
} else {
- mesh_path_add(sdata, orig_addr);
- mpath = mesh_path_lookup(sdata, orig_addr);
- if (!mpath) {
+ mpath = mesh_path_add(sdata, orig_addr);
+ if (IS_ERR(mpath)) {
rcu_read_unlock();
return 0;
}
@@ -486,9 +485,8 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
(last_hop_metric > mpath->metric)))
fresh_info = false;
} else {
- mesh_path_add(sdata, ta);
- mpath = mesh_path_lookup(sdata, ta);
- if (!mpath) {
+ mpath = mesh_path_add(sdata, ta);
+ if (IS_ERR(mpath)) {
rcu_read_unlock();
return 0;
}
@@ -661,7 +659,7 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata,
u32 target_sn, orig_sn, lifetime;
mhwmp_dbg(sdata, "received PREP from %pM\n",
- PREP_IE_ORIG_ADDR(prep_elem));
+ PREP_IE_TARGET_ADDR(prep_elem));
orig_addr = PREP_IE_ORIG_ADDR(prep_elem);
if (ether_addr_equal(orig_addr, sdata->vif.addr))
@@ -804,9 +802,8 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
mpath = mesh_path_lookup(sdata, orig_addr);
if (!mpath) {
- mesh_path_add(sdata, orig_addr);
- mpath = mesh_path_lookup(sdata, orig_addr);
- if (!mpath) {
+ mpath = mesh_path_add(sdata, orig_addr);
+ if (IS_ERR(mpath)) {
rcu_read_unlock();
sdata->u.mesh.mshstats.dropped_frames_no_route++;
return;
@@ -883,7 +880,7 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
baselen = (u8 *) mgmt->u.action.u.mesh_action.variable - (u8 *) mgmt;
ieee802_11_parse_elems(mgmt->u.action.u.mesh_action.variable,
- len - baselen, &elems);
+ len - baselen, false, &elems);
if (elems.preq) {
if (elems.preq_len != 37)
@@ -1098,11 +1095,10 @@ int mesh_nexthop_resolve(struct ieee80211_sub_if_data *sdata,
/* no nexthop found, start resolving */
mpath = mesh_path_lookup(sdata, target_addr);
if (!mpath) {
- mesh_path_add(sdata, target_addr);
- mpath = mesh_path_lookup(sdata, target_addr);
- if (!mpath) {
+ mpath = mesh_path_add(sdata, target_addr);
+ if (IS_ERR(mpath)) {
mesh_path_discard_frame(sdata, skb);
- err = -ENOSPC;
+ err = PTR_ERR(mpath);
goto endlookup;
}
}
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index dc7c8df40c2c..89aacfd2756d 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -493,7 +493,8 @@ int mesh_gate_num(struct ieee80211_sub_if_data *sdata)
*
* State: the initial state of the new path is set to 0
*/
-int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst)
+struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata,
+ const u8 *dst)
{
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
struct ieee80211_local *local = sdata->local;
@@ -502,18 +503,33 @@ int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst)
struct mpath_node *node, *new_node;
struct hlist_head *bucket;
int grow = 0;
- int err = 0;
+ int err;
u32 hash_idx;
if (ether_addr_equal(dst, sdata->vif.addr))
/* never add ourselves as neighbours */
- return -ENOTSUPP;
+ return ERR_PTR(-ENOTSUPP);
if (is_multicast_ether_addr(dst))
- return -ENOTSUPP;
+ return ERR_PTR(-ENOTSUPP);
if (atomic_add_unless(&sdata->u.mesh.mpaths, 1, MESH_MAX_MPATHS) == 0)
- return -ENOSPC;
+ return ERR_PTR(-ENOSPC);
+
+ read_lock_bh(&pathtbl_resize_lock);
+ tbl = resize_dereference_mesh_paths();
+
+ hash_idx = mesh_table_hash(dst, sdata, tbl);
+ bucket = &tbl->hash_buckets[hash_idx];
+
+ spin_lock(&tbl->hashwlock[hash_idx]);
+
+ hlist_for_each_entry(node, bucket, list) {
+ mpath = node->mpath;
+ if (mpath->sdata == sdata &&
+ ether_addr_equal(dst, mpath->dst))
+ goto found;
+ }
err = -ENOMEM;
new_mpath = kzalloc(sizeof(struct mesh_path), GFP_ATOMIC);
@@ -524,7 +540,6 @@ int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst)
if (!new_node)
goto err_node_alloc;
- read_lock_bh(&pathtbl_resize_lock);
memcpy(new_mpath->dst, dst, ETH_ALEN);
eth_broadcast_addr(new_mpath->rann_snd_addr);
new_mpath->is_root = false;
@@ -538,21 +553,6 @@ int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst)
spin_lock_init(&new_mpath->state_lock);
init_timer(&new_mpath->timer);
- tbl = resize_dereference_mesh_paths();
-
- hash_idx = mesh_table_hash(dst, sdata, tbl);
- bucket = &tbl->hash_buckets[hash_idx];
-
- spin_lock(&tbl->hashwlock[hash_idx]);
-
- err = -EEXIST;
- hlist_for_each_entry(node, bucket, list) {
- mpath = node->mpath;
- if (mpath->sdata == sdata &&
- ether_addr_equal(dst, mpath->dst))
- goto err_exists;
- }
-
hlist_add_head_rcu(&new_node->list, bucket);
if (atomic_inc_return(&tbl->entries) >=
tbl->mean_chain_len * (tbl->hash_mask + 1))
@@ -560,23 +560,23 @@ int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst)
mesh_paths_generation++;
- spin_unlock(&tbl->hashwlock[hash_idx]);
- read_unlock_bh(&pathtbl_resize_lock);
if (grow) {
set_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags);
ieee80211_queue_work(&local->hw, &sdata->work);
}
- return 0;
-
-err_exists:
+ mpath = new_mpath;
+found:
spin_unlock(&tbl->hashwlock[hash_idx]);
read_unlock_bh(&pathtbl_resize_lock);
- kfree(new_node);
+ return mpath;
+
err_node_alloc:
kfree(new_mpath);
err_path_alloc:
atomic_dec(&sdata->u.mesh.mpaths);
- return err;
+ spin_unlock(&tbl->hashwlock[hash_idx]);
+ read_unlock_bh(&pathtbl_resize_lock);
+ return ERR_PTR(err);
}
static void mesh_table_free_rcu(struct rcu_head *rcu)
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 07d396d57079..09bebed99416 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -420,7 +420,6 @@ __mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *hw_addr)
return NULL;
sta->plink_state = NL80211_PLINK_LISTEN;
- init_timer(&sta->plink_timer);
sta_info_pre_move_state(sta, IEEE80211_STA_AUTH);
sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC);
@@ -437,8 +436,9 @@ mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *addr,
{
struct sta_info *sta = NULL;
- /* Userspace handles peer allocation when security is enabled */
- if (sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED)
+ /* Userspace handles station allocation */
+ if (sdata->u.mesh.user_mpm ||
+ sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED)
cfg80211_notify_new_peer_candidate(sdata->dev, addr,
elems->ie_start,
elems->total_len,
@@ -534,10 +534,8 @@ static void mesh_plink_timer(unsigned long data)
*/
sta = (struct sta_info *) data;
- if (sta->sdata->local->quiescing) {
- sta->plink_timer_was_running = true;
+ if (sta->sdata->local->quiescing)
return;
- }
spin_lock_bh(&sta->lock);
if (sta->ignore_plink_timer) {
@@ -546,8 +544,8 @@ static void mesh_plink_timer(unsigned long data)
return;
}
mpl_dbg(sta->sdata,
- "Mesh plink timer for %pM fired on state %d\n",
- sta->sta.addr, sta->plink_state);
+ "Mesh plink timer for %pM fired on state %s\n",
+ sta->sta.addr, mplstates[sta->plink_state]);
reason = 0;
llid = sta->llid;
plid = sta->plid;
@@ -598,29 +596,6 @@ static void mesh_plink_timer(unsigned long data)
}
}
-#ifdef CONFIG_PM
-void mesh_plink_quiesce(struct sta_info *sta)
-{
- if (!ieee80211_vif_is_mesh(&sta->sdata->vif))
- return;
-
- /* no kernel mesh sta timers have been initialized */
- if (sta->sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE)
- return;
-
- if (del_timer_sync(&sta->plink_timer))
- sta->plink_timer_was_running = true;
-}
-
-void mesh_plink_restart(struct sta_info *sta)
-{
- if (sta->plink_timer_was_running) {
- add_timer(&sta->plink_timer);
- sta->plink_timer_was_running = false;
- }
-}
-#endif
-
static inline void mesh_plink_timer_set(struct sta_info *sta, int timeout)
{
sta->plink_timer.expires = jiffies + (HZ * timeout / 1000);
@@ -695,6 +670,10 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata,
if (len < IEEE80211_MIN_ACTION_SIZE + 3)
return;
+ if (sdata->u.mesh.user_mpm)
+ /* userspace must register for these */
+ return;
+
if (is_multicast_ether_addr(mgmt->da)) {
mpl_dbg(sdata,
"Mesh plink: ignore frame from multicast address\n");
@@ -708,7 +687,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata,
baseaddr += 4;
baselen += 4;
}
- ieee802_11_parse_elems(baseaddr, len - baselen, &elems);
+ ieee802_11_parse_elems(baseaddr, len - baselen, true, &elems);
if (!elems.peering) {
mpl_dbg(sdata,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 346ad4cfb013..741448b30825 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -56,7 +56,10 @@ MODULE_PARM_DESC(max_probe_tries,
* probe on beacon miss before declaring the connection lost
* default to what we want.
*/
-#define IEEE80211_BEACON_LOSS_COUNT 7
+static int beacon_loss_count = 7;
+module_param(beacon_loss_count, int, 0644);
+MODULE_PARM_DESC(beacon_loss_count,
+ "Number of beacon intervals before we decide beacon was lost.");
/*
* Time the connection can be idle before we probe
@@ -87,9 +90,6 @@ MODULE_PARM_DESC(probe_wait_ms,
*/
#define IEEE80211_SIGNAL_AVE_MIN_COUNT 4
-#define TMR_RUNNING_TIMER 0
-#define TMR_RUNNING_CHANSW 1
-
/*
* All cfg80211 functions have to be called outside a locked
* section so that they can acquire a lock themselves... This
@@ -289,6 +289,8 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
} else {
/* 40 MHz (and 80 MHz) must be supported for VHT */
ret = IEEE80211_STA_DISABLE_VHT;
+ /* also mark 40 MHz disabled */
+ ret |= IEEE80211_STA_DISABLE_40MHZ;
goto out;
}
@@ -303,12 +305,6 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
channel->band);
vht_chandef.center_freq2 = 0;
- if (vht_oper->center_freq_seg2_idx)
- vht_chandef.center_freq2 =
- ieee80211_channel_to_frequency(
- vht_oper->center_freq_seg2_idx,
- channel->band);
-
switch (vht_oper->chan_width) {
case IEEE80211_VHT_CHANWIDTH_USE_HT:
vht_chandef.width = chandef->width;
@@ -321,6 +317,10 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
break;
case IEEE80211_VHT_CHANWIDTH_80P80MHZ:
vht_chandef.width = NL80211_CHAN_WIDTH_80P80;
+ vht_chandef.center_freq2 =
+ ieee80211_channel_to_frequency(
+ vht_oper->center_freq_seg2_idx,
+ channel->band);
break;
default:
if (verbose)
@@ -604,11 +604,11 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
u8 *pos;
u32 cap;
struct ieee80211_sta_vht_cap vht_cap;
- int i;
BUILD_BUG_ON(sizeof(vht_cap) != sizeof(sband->vht_cap));
memcpy(&vht_cap, &sband->vht_cap, sizeof(vht_cap));
+ ieee80211_apply_vhtcap_overrides(sdata, &vht_cap);
/* determine capability flags */
cap = vht_cap.cap;
@@ -631,37 +631,6 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE)))
cap &= ~IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE;
- if (!(ap_vht_cap->vht_cap_info &
- cpu_to_le32(IEEE80211_VHT_CAP_TXSTBC)))
- cap &= ~(IEEE80211_VHT_CAP_RXSTBC_1 |
- IEEE80211_VHT_CAP_RXSTBC_3 |
- IEEE80211_VHT_CAP_RXSTBC_4);
-
- for (i = 0; i < 8; i++) {
- int shift = i * 2;
- u16 mask = IEEE80211_VHT_MCS_NOT_SUPPORTED << shift;
- u16 ap_mcs, our_mcs;
-
- ap_mcs = (le16_to_cpu(ap_vht_cap->supp_mcs.tx_mcs_map) &
- mask) >> shift;
- our_mcs = (le16_to_cpu(vht_cap.vht_mcs.rx_mcs_map) &
- mask) >> shift;
-
- if (our_mcs == IEEE80211_VHT_MCS_NOT_SUPPORTED)
- continue;
-
- switch (ap_mcs) {
- default:
- if (our_mcs <= ap_mcs)
- break;
- /* fall through */
- case IEEE80211_VHT_MCS_NOT_SUPPORTED:
- vht_cap.vht_mcs.rx_mcs_map &= cpu_to_le16(~mask);
- vht_cap.vht_mcs.rx_mcs_map |=
- cpu_to_le16(ap_mcs << shift);
- }
- }
-
/* reserve and fill IE */
pos = skb_put(skb, sizeof(struct ieee80211_vht_cap) + 2);
ieee80211_ie_build_vht_cap(pos, &vht_cap, cap);
@@ -987,6 +956,7 @@ static void ieee80211_chswitch_work(struct work_struct *work)
{
struct ieee80211_sub_if_data *sdata =
container_of(work, struct ieee80211_sub_if_data, u.mgd.chswitch_work);
+ struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
if (!ieee80211_sdata_running(sdata))
@@ -996,21 +966,22 @@ static void ieee80211_chswitch_work(struct work_struct *work)
if (!ifmgd->associated)
goto out;
- sdata->local->_oper_channel = sdata->local->csa_channel;
- if (!sdata->local->ops->channel_switch) {
+ local->_oper_chandef = local->csa_chandef;
+
+ if (!local->ops->channel_switch) {
/* call "hw_config" only if doing sw channel switch */
- ieee80211_hw_config(sdata->local,
- IEEE80211_CONF_CHANGE_CHANNEL);
+ ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
} else {
/* update the device channel directly */
- sdata->local->hw.conf.channel = sdata->local->_oper_channel;
+ local->hw.conf.chandef = local->_oper_chandef;
}
/* XXX: shouldn't really modify cfg80211-owned data! */
- ifmgd->associated->channel = sdata->local->_oper_channel;
+ ifmgd->associated->channel = local->_oper_chandef.chan;
/* XXX: wait for a beacon first? */
- ieee80211_wake_queues_by_reason(&sdata->local->hw,
+ ieee80211_wake_queues_by_reason(&local->hw,
+ IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_CSA);
out:
ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED;
@@ -1038,66 +1009,212 @@ static void ieee80211_chswitch_timer(unsigned long data)
{
struct ieee80211_sub_if_data *sdata =
(struct ieee80211_sub_if_data *) data;
- struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-
- if (sdata->local->quiescing) {
- set_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running);
- return;
- }
- ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work);
+ ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.chswitch_work);
}
-void
+static void
ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
- const struct ieee80211_channel_sw_ie *sw_elem,
- struct ieee80211_bss *bss, u64 timestamp)
+ u64 timestamp, struct ieee802_11_elems *elems,
+ bool beacon)
{
- struct cfg80211_bss *cbss =
- container_of((void *)bss, struct cfg80211_bss, priv);
- struct ieee80211_channel *new_ch;
+ struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- int new_freq = ieee80211_channel_to_frequency(sw_elem->new_ch_num,
- cbss->channel->band);
+ struct cfg80211_bss *cbss = ifmgd->associated;
+ struct ieee80211_bss *bss;
struct ieee80211_chanctx *chanctx;
+ enum ieee80211_band new_band;
+ int new_freq;
+ u8 new_chan_no;
+ u8 count;
+ u8 mode;
+ struct ieee80211_channel *new_chan;
+ struct cfg80211_chan_def new_chandef = {};
+ struct cfg80211_chan_def new_vht_chandef = {};
+ const struct ieee80211_sec_chan_offs_ie *sec_chan_offs;
+ const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie;
+ const struct ieee80211_ht_operation *ht_oper;
+ int secondary_channel_offset = -1;
ASSERT_MGD_MTX(ifmgd);
- if (!ifmgd->associated)
+ if (!cbss)
+ return;
+
+ if (local->scanning)
return;
- if (sdata->local->scanning)
+ /* disregard subsequent announcements if we are already processing */
+ if (ifmgd->flags & IEEE80211_STA_CSA_RECEIVED)
return;
- /* Disregard subsequent beacons if we are already running a timer
- processing a CSA */
+ sec_chan_offs = elems->sec_chan_offs;
+ wide_bw_chansw_ie = elems->wide_bw_chansw_ie;
+ ht_oper = elems->ht_operation;
- if (ifmgd->flags & IEEE80211_STA_CSA_RECEIVED)
+ if (ifmgd->flags & (IEEE80211_STA_DISABLE_HT |
+ IEEE80211_STA_DISABLE_40MHZ)) {
+ sec_chan_offs = NULL;
+ wide_bw_chansw_ie = NULL;
+ /* only used for bandwidth here */
+ ht_oper = NULL;
+ }
+
+ if (ifmgd->flags & IEEE80211_STA_DISABLE_VHT)
+ wide_bw_chansw_ie = NULL;
+
+ if (elems->ext_chansw_ie) {
+ if (!ieee80211_operating_class_to_band(
+ elems->ext_chansw_ie->new_operating_class,
+ &new_band)) {
+ sdata_info(sdata,
+ "cannot understand ECSA IE operating class %d, disconnecting\n",
+ elems->ext_chansw_ie->new_operating_class);
+ ieee80211_queue_work(&local->hw,
+ &ifmgd->csa_connection_drop_work);
+ }
+ new_chan_no = elems->ext_chansw_ie->new_ch_num;
+ count = elems->ext_chansw_ie->count;
+ mode = elems->ext_chansw_ie->mode;
+ } else if (elems->ch_switch_ie) {
+ new_band = cbss->channel->band;
+ new_chan_no = elems->ch_switch_ie->new_ch_num;
+ count = elems->ch_switch_ie->count;
+ mode = elems->ch_switch_ie->mode;
+ } else {
+ /* nothing here we understand */
return;
+ }
- new_ch = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq);
- if (!new_ch || new_ch->flags & IEEE80211_CHAN_DISABLED) {
+ bss = (void *)cbss->priv;
+
+ new_freq = ieee80211_channel_to_frequency(new_chan_no, new_band);
+ new_chan = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq);
+ if (!new_chan || new_chan->flags & IEEE80211_CHAN_DISABLED) {
sdata_info(sdata,
"AP %pM switches to unsupported channel (%d MHz), disconnecting\n",
ifmgd->associated->bssid, new_freq);
- ieee80211_queue_work(&sdata->local->hw,
+ ieee80211_queue_work(&local->hw,
+ &ifmgd->csa_connection_drop_work);
+ return;
+ }
+
+ if (!beacon && sec_chan_offs) {
+ secondary_channel_offset = sec_chan_offs->sec_chan_offs;
+ } else if (beacon && ht_oper) {
+ secondary_channel_offset =
+ ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET;
+ } else if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) {
+ /*
+ * If it's not a beacon, HT is enabled and the IE not present,
+ * it's 20 MHz, 802.11-2012 8.5.2.6:
+ * This element [the Secondary Channel Offset Element] is
+ * present when switching to a 40 MHz channel. It may be
+ * present when switching to a 20 MHz channel (in which
+ * case the secondary channel offset is set to SCN).
+ */
+ secondary_channel_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE;
+ }
+
+ switch (secondary_channel_offset) {
+ default:
+ /* secondary_channel_offset was present but is invalid */
+ case IEEE80211_HT_PARAM_CHA_SEC_NONE:
+ cfg80211_chandef_create(&new_chandef, new_chan,
+ NL80211_CHAN_HT20);
+ break;
+ case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
+ cfg80211_chandef_create(&new_chandef, new_chan,
+ NL80211_CHAN_HT40PLUS);
+ break;
+ case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
+ cfg80211_chandef_create(&new_chandef, new_chan,
+ NL80211_CHAN_HT40MINUS);
+ break;
+ case -1:
+ cfg80211_chandef_create(&new_chandef, new_chan,
+ NL80211_CHAN_NO_HT);
+ break;
+ }
+
+ if (wide_bw_chansw_ie) {
+ new_vht_chandef.chan = new_chan;
+ new_vht_chandef.center_freq1 =
+ ieee80211_channel_to_frequency(
+ wide_bw_chansw_ie->new_center_freq_seg0,
+ new_band);
+
+ switch (wide_bw_chansw_ie->new_channel_width) {
+ default:
+ /* hmmm, ignore VHT and use HT if present */
+ case IEEE80211_VHT_CHANWIDTH_USE_HT:
+ new_vht_chandef.chan = NULL;
+ break;
+ case IEEE80211_VHT_CHANWIDTH_80MHZ:
+ new_vht_chandef.width = NL80211_CHAN_WIDTH_80;
+ break;
+ case IEEE80211_VHT_CHANWIDTH_160MHZ:
+ new_vht_chandef.width = NL80211_CHAN_WIDTH_160;
+ break;
+ case IEEE80211_VHT_CHANWIDTH_80P80MHZ:
+ /* field is otherwise reserved */
+ new_vht_chandef.center_freq2 =
+ ieee80211_channel_to_frequency(
+ wide_bw_chansw_ie->new_center_freq_seg1,
+ new_band);
+ new_vht_chandef.width = NL80211_CHAN_WIDTH_80P80;
+ break;
+ }
+ if (ifmgd->flags & IEEE80211_STA_DISABLE_80P80MHZ &&
+ new_vht_chandef.width == NL80211_CHAN_WIDTH_80P80)
+ chandef_downgrade(&new_vht_chandef);
+ if (ifmgd->flags & IEEE80211_STA_DISABLE_160MHZ &&
+ new_vht_chandef.width == NL80211_CHAN_WIDTH_160)
+ chandef_downgrade(&new_vht_chandef);
+ if (ifmgd->flags & IEEE80211_STA_DISABLE_40MHZ &&
+ new_vht_chandef.width > NL80211_CHAN_WIDTH_20)
+ chandef_downgrade(&new_vht_chandef);
+ }
+
+ /* if VHT data is there validate & use it */
+ if (new_vht_chandef.chan) {
+ if (!cfg80211_chandef_compatible(&new_vht_chandef,
+ &new_chandef)) {
+ sdata_info(sdata,
+ "AP %pM CSA has inconsistent channel data, disconnecting\n",
+ ifmgd->associated->bssid);
+ ieee80211_queue_work(&local->hw,
+ &ifmgd->csa_connection_drop_work);
+ return;
+ }
+ new_chandef = new_vht_chandef;
+ }
+
+ if (!cfg80211_chandef_usable(local->hw.wiphy, &new_chandef,
+ IEEE80211_CHAN_DISABLED)) {
+ sdata_info(sdata,
+ "AP %pM switches to unsupported channel (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n",
+ ifmgd->associated->bssid, new_freq,
+ new_chandef.width, new_chandef.center_freq1,
+ new_chandef.center_freq2);
+ ieee80211_queue_work(&local->hw,
&ifmgd->csa_connection_drop_work);
return;
}
ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED;
- if (sdata->local->use_chanctx) {
+ if (local->use_chanctx) {
sdata_info(sdata,
"not handling channel switch with channel contexts\n");
- ieee80211_queue_work(&sdata->local->hw,
+ ieee80211_queue_work(&local->hw,
&ifmgd->csa_connection_drop_work);
return;
}
- mutex_lock(&sdata->local->chanctx_mtx);
+ mutex_lock(&local->chanctx_mtx);
if (WARN_ON(!rcu_access_pointer(sdata->vif.chanctx_conf))) {
- mutex_unlock(&sdata->local->chanctx_mtx);
+ mutex_unlock(&local->chanctx_mtx);
return;
}
chanctx = container_of(rcu_access_pointer(sdata->vif.chanctx_conf),
@@ -1105,39 +1222,39 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
if (chanctx->refcount > 1) {
sdata_info(sdata,
"channel switch with multiple interfaces on the same channel, disconnecting\n");
- ieee80211_queue_work(&sdata->local->hw,
+ ieee80211_queue_work(&local->hw,
&ifmgd->csa_connection_drop_work);
- mutex_unlock(&sdata->local->chanctx_mtx);
+ mutex_unlock(&local->chanctx_mtx);
return;
}
- mutex_unlock(&sdata->local->chanctx_mtx);
+ mutex_unlock(&local->chanctx_mtx);
- sdata->local->csa_channel = new_ch;
+ local->csa_chandef = new_chandef;
- if (sw_elem->mode)
- ieee80211_stop_queues_by_reason(&sdata->local->hw,
+ if (mode)
+ ieee80211_stop_queues_by_reason(&local->hw,
+ IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_CSA);
- if (sdata->local->ops->channel_switch) {
+ if (local->ops->channel_switch) {
/* use driver's channel switch callback */
struct ieee80211_channel_switch ch_switch = {
.timestamp = timestamp,
- .block_tx = sw_elem->mode,
- .channel = new_ch,
- .count = sw_elem->count,
+ .block_tx = mode,
+ .chandef = new_chandef,
+ .count = count,
};
- drv_channel_switch(sdata->local, &ch_switch);
+ drv_channel_switch(local, &ch_switch);
return;
}
/* channel switch handled in software */
- if (sw_elem->count <= 1)
- ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work);
+ if (count <= 1)
+ ieee80211_queue_work(&local->hw, &ifmgd->chswitch_work);
else
mod_timer(&ifmgd->chswitch_timer,
- TU_TO_EXP_TIME(sw_elem->count *
- cbss->beacon_interval));
+ TU_TO_EXP_TIME(count * cbss->beacon_interval));
}
static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
@@ -1383,6 +1500,7 @@ void ieee80211_dynamic_ps_disable_work(struct work_struct *work)
}
ieee80211_wake_queues_by_reason(&local->hw,
+ IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_PS);
}
@@ -1435,16 +1553,14 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) &&
!(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) {
- netif_tx_stop_all_queues(sdata->dev);
-
- if (drv_tx_frames_pending(local))
+ if (drv_tx_frames_pending(local)) {
mod_timer(&local->dynamic_ps_timer, jiffies +
msecs_to_jiffies(
local->hw.conf.dynamic_ps_timeout));
- else {
+ } else {
ieee80211_send_nullfunc(local, sdata, 1);
/* Flush to get the tx status of nullfunc frame */
- drv_flush(local, false);
+ ieee80211_flush_queues(local, sdata);
}
}
@@ -1455,9 +1571,6 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
local->hw.conf.flags |= IEEE80211_CONF_PS;
ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
}
-
- if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)
- netif_tx_wake_all_queues(sdata->dev);
}
void ieee80211_dynamic_ps_timer(unsigned long data)
@@ -1563,6 +1676,7 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
params.cw_max = ecw2cw((pos[1] & 0xf0) >> 4);
params.cw_min = ecw2cw(pos[1] & 0x0f);
params.txop = get_unaligned_le16(pos + 2);
+ params.acm = acm;
params.uapsd = uapsd;
mlme_dbg(sdata,
@@ -1650,7 +1764,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
bss_conf->assoc_capability, bss->has_erp_value, bss->erp_value);
sdata->u.mgd.beacon_timeout = usecs_to_jiffies(ieee80211_tu_to_usec(
- IEEE80211_BEACON_LOSS_COUNT * bss_conf->beacon_int));
+ beacon_loss_count * bss_conf->beacon_int));
sdata->u.mgd.associated = cbss;
memcpy(sdata->u.mgd.bssid, cbss->bssid, ETH_ALEN);
@@ -1663,18 +1777,17 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
rcu_read_lock();
ies = rcu_dereference(cbss->ies);
if (ies) {
- u8 noa[2];
int ret;
ret = cfg80211_get_p2p_attr(
ies->data, ies->len,
IEEE80211_P2P_ATTR_ABSENCE_NOTICE,
- noa, sizeof(noa));
+ (u8 *) &bss_conf->p2p_noa_attr,
+ sizeof(bss_conf->p2p_noa_attr));
if (ret >= 2) {
- bss_conf->p2p_oppps = noa[1] & 0x80;
- bss_conf->p2p_ctwindow = noa[1] & 0x7f;
+ sdata->u.mgd.p2p_noa_index =
+ bss_conf->p2p_noa_attr.index;
bss_info_changed |= BSS_CHANGED_P2P_PS;
- sdata->u.mgd.p2p_noa_index = noa[0];
}
}
rcu_read_unlock();
@@ -1718,7 +1831,6 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
ieee80211_recalc_smps(sdata);
ieee80211_recalc_ps_vif(sdata);
- netif_tx_start_all_queues(sdata->dev);
netif_carrier_on(sdata->dev);
}
@@ -1741,22 +1853,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
ieee80211_stop_poll(sdata);
ifmgd->associated = NULL;
-
- /*
- * we need to commit the associated = NULL change because the
- * scan code uses that to determine whether this iface should
- * go to/wake up from powersave or not -- and could otherwise
- * wake the queues erroneously.
- */
- smp_mb();
-
- /*
- * Thus, we can only afterwards stop the queues -- to account
- * for the case where another CPU is finishing a scan at this
- * time -- we don't want the scan code to enable queues.
- */
-
- netif_tx_stop_all_queues(sdata->dev);
netif_carrier_off(sdata->dev);
/*
@@ -1775,7 +1871,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
/* flush out any pending frame (e.g. DELBA) before deauth/disassoc */
if (tx)
- drv_flush(local, false);
+ ieee80211_flush_queues(local, sdata);
/* deauthenticate/disassociate now */
if (tx || frame_buf)
@@ -1784,7 +1880,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
/* flush out frame */
if (tx)
- drv_flush(local, false);
+ ieee80211_flush_queues(local, sdata);
/* clear bssid only after building the needed mgmt frames */
memset(ifmgd->bssid, 0, ETH_ALEN);
@@ -1799,12 +1895,15 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
changed |= BSS_CHANGED_ASSOC;
sdata->vif.bss_conf.assoc = false;
- sdata->vif.bss_conf.p2p_ctwindow = 0;
- sdata->vif.bss_conf.p2p_oppps = false;
+ ifmgd->p2p_noa_index = -1;
+ memset(&sdata->vif.bss_conf.p2p_noa_attr, 0,
+ sizeof(sdata->vif.bss_conf.p2p_noa_attr));
- /* on the next assoc, re-program HT parameters */
+ /* on the next assoc, re-program HT/VHT parameters */
memset(&ifmgd->ht_capa, 0, sizeof(ifmgd->ht_capa));
memset(&ifmgd->ht_capa_mask, 0, sizeof(ifmgd->ht_capa_mask));
+ memset(&ifmgd->vht_capa, 0, sizeof(ifmgd->vht_capa));
+ memset(&ifmgd->vht_capa_mask, 0, sizeof(ifmgd->vht_capa_mask));
sdata->ap_power_level = IEEE80211_UNSET_POWER_LEVEL;
@@ -1830,8 +1929,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
del_timer_sync(&sdata->u.mgd.timer);
del_timer_sync(&sdata->u.mgd.chswitch_timer);
- sdata->u.mgd.timers_running = 0;
-
sdata->vif.bss_conf.dtim_period = 0;
ifmgd->flags = 0;
@@ -1956,7 +2053,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms);
run_again(ifmgd, ifmgd->probe_timeout);
if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
- drv_flush(sdata->local, false);
+ ieee80211_flush_queues(sdata->local, sdata);
}
static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
@@ -1980,12 +2077,15 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
goto out;
}
- if (beacon)
+ if (beacon) {
mlme_dbg_ratelimited(sdata,
- "detected beacon loss from AP - probing\n");
+ "detected beacon loss from AP (missed %d beacons) - probing\n",
+ beacon_loss_count);
- ieee80211_cqm_rssi_notify(&sdata->vif,
- NL80211_CQM_RSSI_BEACON_LOSS_EVENT, GFP_KERNEL);
+ ieee80211_cqm_rssi_notify(&sdata->vif,
+ NL80211_CQM_RSSI_BEACON_LOSS_EVENT,
+ GFP_KERNEL);
+ }
/*
* The driver/our work has already reported this event or the
@@ -2079,6 +2179,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
true, frame_buf);
ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED;
ieee80211_wake_queues_by_reason(&sdata->local->hw,
+ IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_CSA);
mutex_unlock(&ifmgd->mtx);
@@ -2130,7 +2231,6 @@ void ieee80211_beacon_loss(struct ieee80211_vif *vif)
trace_api_beacon_loss(sdata);
- WARN_ON(hw->flags & IEEE80211_HW_CONNECTION_MONITOR);
sdata->u.mgd.connection_loss = false;
ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work);
}
@@ -2180,7 +2280,7 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
u32 tx_flags = 0;
pos = mgmt->u.auth.variable;
- ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
+ ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), false, &elems);
if (!elems.challenge)
return;
auth_data->expected_transaction = 4;
@@ -2422,8 +2522,11 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
u16 capab_info, aid;
struct ieee802_11_elems elems;
struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
+ const struct cfg80211_bss_ies *bss_ies = NULL;
+ struct ieee80211_mgd_assoc_data *assoc_data = ifmgd->assoc_data;
u32 changed = 0;
int err;
+ bool ret;
/* AssocResp and ReassocResp have identical structure */
@@ -2445,7 +2548,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
}
pos = mgmt->u.assoc_resp.variable;
- ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
+ ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), false, &elems);
if (!elems.supp_rates) {
sdata_info(sdata, "no SuppRates element in AssocResp\n");
@@ -2455,21 +2558,86 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
ifmgd->aid = aid;
/*
+ * Some APs are erroneously not including some information in their
+ * (re)association response frames. Try to recover by using the data
+ * from the beacon or probe response. This seems to afflict mobile
+ * 2G/3G/4G wifi routers, reported models include the "Onda PN51T",
+ * "Vodafone PocketWiFi 2", "ZTE MF60" and a similar T-Mobile device.
+ */
+ if ((assoc_data->wmm && !elems.wmm_param) ||
+ (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) &&
+ (!elems.ht_cap_elem || !elems.ht_operation)) ||
+ (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) &&
+ (!elems.vht_cap_elem || !elems.vht_operation))) {
+ const struct cfg80211_bss_ies *ies;
+ struct ieee802_11_elems bss_elems;
+
+ rcu_read_lock();
+ ies = rcu_dereference(cbss->ies);
+ if (ies)
+ bss_ies = kmemdup(ies, sizeof(*ies) + ies->len,
+ GFP_ATOMIC);
+ rcu_read_unlock();
+ if (!bss_ies)
+ return false;
+
+ ieee802_11_parse_elems(bss_ies->data, bss_ies->len,
+ false, &bss_elems);
+ if (assoc_data->wmm &&
+ !elems.wmm_param && bss_elems.wmm_param) {
+ elems.wmm_param = bss_elems.wmm_param;
+ sdata_info(sdata,
+ "AP bug: WMM param missing from AssocResp\n");
+ }
+
+ /*
+ * Also check if we requested HT/VHT, otherwise the AP doesn't
+ * have to include the IEs in the (re)association response.
+ */
+ if (!elems.ht_cap_elem && bss_elems.ht_cap_elem &&
+ !(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) {
+ elems.ht_cap_elem = bss_elems.ht_cap_elem;
+ sdata_info(sdata,
+ "AP bug: HT capability missing from AssocResp\n");
+ }
+ if (!elems.ht_operation && bss_elems.ht_operation &&
+ !(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) {
+ elems.ht_operation = bss_elems.ht_operation;
+ sdata_info(sdata,
+ "AP bug: HT operation missing from AssocResp\n");
+ }
+ if (!elems.vht_cap_elem && bss_elems.vht_cap_elem &&
+ !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) {
+ elems.vht_cap_elem = bss_elems.vht_cap_elem;
+ sdata_info(sdata,
+ "AP bug: VHT capa missing from AssocResp\n");
+ }
+ if (!elems.vht_operation && bss_elems.vht_operation &&
+ !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) {
+ elems.vht_operation = bss_elems.vht_operation;
+ sdata_info(sdata,
+ "AP bug: VHT operation missing from AssocResp\n");
+ }
+ }
+
+ /*
* We previously checked these in the beacon/probe response, so
* they should be present here. This is just a safety net.
*/
if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) &&
(!elems.wmm_param || !elems.ht_cap_elem || !elems.ht_operation)) {
sdata_info(sdata,
- "HT AP is missing WMM params or HT capability/operation in AssocResp\n");
- return false;
+ "HT AP is missing WMM params or HT capability/operation\n");
+ ret = false;
+ goto out;
}
if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) &&
(!elems.vht_cap_elem || !elems.vht_operation)) {
sdata_info(sdata,
- "VHT AP is missing VHT capability/operation in AssocResp\n");
- return false;
+ "VHT AP is missing VHT capability/operation\n");
+ ret = false;
+ goto out;
}
mutex_lock(&sdata->local->sta_mtx);
@@ -2480,7 +2648,8 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
sta = sta_info_get(sdata, cbss->bssid);
if (WARN_ON(!sta)) {
mutex_unlock(&sdata->local->sta_mtx);
- return false;
+ ret = false;
+ goto out;
}
sband = local->hw.wiphy->bands[ieee80211_get_sdata_band(sdata)];
@@ -2533,7 +2702,8 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
sta->sta.addr);
WARN_ON(__sta_info_destroy(sta));
mutex_unlock(&sdata->local->sta_mtx);
- return false;
+ ret = false;
+ goto out;
}
mutex_unlock(&sdata->local->sta_mtx);
@@ -2573,7 +2743,10 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
ieee80211_sta_rx_notify(sdata, (struct ieee80211_hdr *)mgmt);
ieee80211_sta_reset_beacon_monitor(sdata);
- return true;
+ ret = true;
+ out:
+ kfree(bss_ies);
+ return ret;
}
static enum rx_mgmt_action __must_check
@@ -2614,13 +2787,13 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14))));
pos = mgmt->u.assoc_resp.variable;
- ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
+ ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), false, &elems);
if (status_code == WLAN_STATUS_ASSOC_REJECTED_TEMPORARILY &&
- elems.timeout_int && elems.timeout_int_len == 5 &&
- elems.timeout_int[0] == WLAN_TIMEOUT_ASSOC_COMEBACK) {
+ elems.timeout_int &&
+ elems.timeout_int->type == WLAN_TIMEOUT_ASSOC_COMEBACK) {
u32 tu, ms;
- tu = get_unaligned_le32(elems.timeout_int + 1);
+ tu = le32_to_cpu(elems.timeout_int->value);
ms = tu * 1024 / 1000;
sdata_info(sdata,
"%pM rejected association temporarily; comeback duration %u TU (%u ms)\n",
@@ -2669,6 +2842,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
struct ieee80211_channel *channel;
bool need_ps = false;
+ lockdep_assert_held(&sdata->u.mgd.mtx);
+
if ((sdata->u.mgd.associated &&
ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid)) ||
(sdata->u.mgd.assoc_data &&
@@ -2683,7 +2858,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
}
}
- if (elems->ds_params && elems->ds_params_len == 1)
+ if (elems->ds_params)
freq = ieee80211_channel_to_frequency(elems->ds_params[0],
rx_status->band);
else
@@ -2699,7 +2874,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
if (bss)
ieee80211_rx_bss_put(local, bss);
- if (!sdata->u.mgd.associated)
+ if (!sdata->u.mgd.associated ||
+ !ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid))
return;
if (need_ps) {
@@ -2708,10 +2884,9 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
mutex_unlock(&local->iflist_mtx);
}
- if (elems->ch_switch_ie &&
- memcmp(mgmt->bssid, sdata->u.mgd.associated->bssid, ETH_ALEN) == 0)
- ieee80211_sta_process_chanswitch(sdata, elems->ch_switch_ie,
- bss, rx_status->mactime);
+ ieee80211_sta_process_chanswitch(sdata, rx_status->mactime,
+ elems, true);
+
}
@@ -2736,7 +2911,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
return;
ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen,
- &elems);
+ false, &elems);
ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
@@ -2819,7 +2994,7 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
if (ifmgd->assoc_data && ifmgd->assoc_data->need_beacon &&
ether_addr_equal(mgmt->bssid, ifmgd->assoc_data->bss->bssid)) {
ieee802_11_parse_elems(mgmt->u.beacon.variable,
- len - baselen, &elems);
+ len - baselen, false, &elems);
ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
ifmgd->assoc_data->have_beacon = true;
@@ -2929,7 +3104,7 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
ncrc = crc32_be(0, (void *)&mgmt->u.beacon.beacon_int, 4);
ncrc = ieee802_11_parse_elems_crc(mgmt->u.beacon.variable,
- len - baselen, &elems,
+ len - baselen, false, &elems,
care_about_ies, ncrc);
if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) {
@@ -2961,22 +3136,30 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
}
if (sdata->vif.p2p) {
- u8 noa[2];
+ struct ieee80211_p2p_noa_attr noa = {};
int ret;
ret = cfg80211_get_p2p_attr(mgmt->u.beacon.variable,
len - baselen,
IEEE80211_P2P_ATTR_ABSENCE_NOTICE,
- noa, sizeof(noa));
- if (ret >= 2 && sdata->u.mgd.p2p_noa_index != noa[0]) {
- bss_conf->p2p_oppps = noa[1] & 0x80;
- bss_conf->p2p_ctwindow = noa[1] & 0x7f;
+ (u8 *) &noa, sizeof(noa));
+ if (ret >= 2) {
+ if (sdata->u.mgd.p2p_noa_index != noa.index) {
+ /* valid noa_attr and index changed */
+ sdata->u.mgd.p2p_noa_index = noa.index;
+ memcpy(&bss_conf->p2p_noa_attr, &noa, sizeof(noa));
+ changed |= BSS_CHANGED_P2P_PS;
+ /*
+ * make sure we update all information, the CRC
+ * mechanism doesn't look at P2P attributes.
+ */
+ ifmgd->beacon_crc_valid = false;
+ }
+ } else if (sdata->u.mgd.p2p_noa_index != -1) {
+ /* noa_attr not found and we had valid noa_attr before */
+ sdata->u.mgd.p2p_noa_index = -1;
+ memset(&bss_conf->p2p_noa_attr, 0, sizeof(bss_conf->p2p_noa_attr));
changed |= BSS_CHANGED_P2P_PS;
- sdata->u.mgd.p2p_noa_index = noa[0];
- /*
- * make sure we update all information, the CRC
- * mechanism doesn't look at P2P attributes.
- */
ifmgd->beacon_crc_valid = false;
}
}
@@ -3018,7 +3201,7 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
changed |= BSS_CHANGED_DTIM_PERIOD;
}
- if (elems.erp_info && elems.erp_info_len >= 1) {
+ if (elems.erp_info) {
erp_valid = true;
erp_value = elems.erp_info[0];
} else {
@@ -3068,6 +3251,8 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
enum rx_mgmt_action rma = RX_MGMT_NONE;
u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN];
u16 fc;
+ struct ieee802_11_elems elems;
+ int ies_len;
rx_status = (struct ieee80211_rx_status *) skb->cb;
mgmt = (struct ieee80211_mgmt *) skb->data;
@@ -3097,14 +3282,48 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
rma = ieee80211_rx_mgmt_assoc_resp(sdata, mgmt, skb->len, &bss);
break;
case IEEE80211_STYPE_ACTION:
- switch (mgmt->u.action.category) {
- case WLAN_CATEGORY_SPECTRUM_MGMT:
+ if (mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) {
+ ies_len = skb->len -
+ offsetof(struct ieee80211_mgmt,
+ u.action.u.chan_switch.variable);
+
+ if (ies_len < 0)
+ break;
+
+ ieee802_11_parse_elems(
+ mgmt->u.action.u.chan_switch.variable,
+ ies_len, true, &elems);
+
+ if (elems.parse_error)
+ break;
+
ieee80211_sta_process_chanswitch(sdata,
- &mgmt->u.action.u.chan_switch.sw_elem,
- (void *)ifmgd->associated->priv,
- rx_status->mactime);
- break;
+ rx_status->mactime,
+ &elems, false);
+ } else if (mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) {
+ ies_len = skb->len -
+ offsetof(struct ieee80211_mgmt,
+ u.action.u.ext_chan_switch.variable);
+
+ if (ies_len < 0)
+ break;
+
+ ieee802_11_parse_elems(
+ mgmt->u.action.u.ext_chan_switch.variable,
+ ies_len, true, &elems);
+
+ if (elems.parse_error)
+ break;
+
+ /* for the handling code pretend this was also an IE */
+ elems.ext_chansw_ie =
+ &mgmt->u.action.u.ext_chan_switch.data;
+
+ ieee80211_sta_process_chanswitch(sdata,
+ rx_status->mactime,
+ &elems, false);
}
+ break;
}
mutex_unlock(&ifmgd->mtx);
@@ -3140,15 +3359,8 @@ static void ieee80211_sta_timer(unsigned long data)
{
struct ieee80211_sub_if_data *sdata =
(struct ieee80211_sub_if_data *) data;
- struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- struct ieee80211_local *local = sdata->local;
-
- if (local->quiescing) {
- set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running);
- return;
- }
- ieee80211_queue_work(&local->hw, &sdata->work);
+ ieee80211_queue_work(&sdata->local->hw, &sdata->work);
}
static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata,
@@ -3182,10 +3394,6 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata)
if (WARN_ON_ONCE(!auth_data))
return -EINVAL;
- if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
- tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
- IEEE80211_TX_INTFL_MLME_CONN_TX;
-
auth_data->tries++;
if (auth_data->tries > IEEE80211_AUTH_MAX_TRIES) {
@@ -3219,6 +3427,10 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata)
auth_data->expected_transaction = trans;
}
+ if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
+ tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
+ IEEE80211_TX_INTFL_MLME_CONN_TX;
+
ieee80211_send_auth(sdata, trans, auth_data->algorithm, status,
auth_data->data, auth_data->data_len,
auth_data->bss->bssid,
@@ -3242,12 +3454,12 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata)
* will not answer to direct packet in unassociated state.
*/
ieee80211_send_probe_req(sdata, NULL, ssidie + 2, ssidie[1],
- NULL, 0, (u32) -1, true, tx_flags,
+ NULL, 0, (u32) -1, true, 0,
auth_data->bss->channel, false);
rcu_read_unlock();
}
- if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) {
+ if (tx_flags == 0) {
auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT;
ifmgd->auth_data->timeout_started = true;
run_again(ifmgd, auth_data->timeout);
@@ -3501,37 +3713,6 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
}
#ifdef CONFIG_PM
-void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata)
-{
- struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-
- /*
- * Stop timers before deleting work items, as timers
- * could race and re-add the work-items. They will be
- * re-established on connection.
- */
- del_timer_sync(&ifmgd->conn_mon_timer);
- del_timer_sync(&ifmgd->bcn_mon_timer);
-
- /*
- * we need to use atomic bitops for the running bits
- * only because both timers might fire at the same
- * time -- the code here is properly synchronised.
- */
-
- cancel_work_sync(&ifmgd->request_smps_work);
-
- cancel_work_sync(&ifmgd->monitor_work);
- cancel_work_sync(&ifmgd->beacon_connection_loss_work);
- cancel_work_sync(&ifmgd->csa_connection_drop_work);
- if (del_timer_sync(&ifmgd->timer))
- set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running);
-
- if (del_timer_sync(&ifmgd->chswitch_timer))
- set_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running);
- cancel_work_sync(&ifmgd->chswitch_work);
-}
-
void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -3553,16 +3734,6 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata)
return;
}
mutex_unlock(&ifmgd->mtx);
-
- if (test_and_clear_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running))
- add_timer(&ifmgd->timer);
- if (test_and_clear_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running))
- add_timer(&ifmgd->chswitch_timer);
- ieee80211_sta_reset_beacon_monitor(sdata);
-
- mutex_lock(&sdata->local->mtx);
- ieee80211_restart_sta_timer(sdata);
- mutex_unlock(&sdata->local->mtx);
}
#endif
@@ -3590,8 +3761,9 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
ifmgd->flags = 0;
ifmgd->powersave = sdata->wdev.ps;
- ifmgd->uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES;
- ifmgd->uapsd_max_sp_len = IEEE80211_DEFAULT_MAX_SP_LEN;
+ ifmgd->uapsd_queues = sdata->local->hw.uapsd_queues;
+ ifmgd->uapsd_max_sp_len = sdata->local->hw.uapsd_max_sp_len;
+ ifmgd->p2p_noa_index = -1;
mutex_init(&ifmgd->mtx);
@@ -4089,6 +4261,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
}
+ if (req->flags & ASSOC_REQ_DISABLE_VHT)
+ ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
+
/* Also disable HT if we don't support it or the AP doesn't use WMM */
sband = local->hw.wiphy->bands[req->bss->channel->band];
if (!sband->ht_cap.ht_supported ||
@@ -4112,6 +4287,10 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
memcpy(&ifmgd->ht_capa_mask, &req->ht_capa_mask,
sizeof(ifmgd->ht_capa_mask));
+ memcpy(&ifmgd->vht_capa, &req->vht_capa, sizeof(ifmgd->vht_capa));
+ memcpy(&ifmgd->vht_capa_mask, &req->vht_capa_mask,
+ sizeof(ifmgd->vht_capa_mask));
+
if (req->ie && req->ie_len) {
memcpy(assoc_data->ie, req->ie, req->ie_len);
assoc_data->ie_len = req->ie_len;
@@ -4149,7 +4328,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
rcu_read_unlock();
if (bss->wmm_used && bss->uapsd_supported &&
- (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)) {
+ (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD) &&
+ sdata->wmm_acm != 0xff) {
assoc_data->uapsd = true;
ifmgd->flags |= IEEE80211_STA_UAPSD_ENABLED;
} else {
@@ -4263,7 +4443,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
bool tx = !req->local_state_change;
- bool sent_frame = false;
+ bool report_frame = false;
mutex_lock(&ifmgd->mtx);
@@ -4280,7 +4460,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
ieee80211_destroy_auth_data(sdata, false);
mutex_unlock(&ifmgd->mtx);
- sent_frame = tx;
+ report_frame = true;
goto out;
}
@@ -4288,12 +4468,12 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
ether_addr_equal(ifmgd->associated->bssid, req->bssid)) {
ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
req->reason_code, tx, frame_buf);
- sent_frame = tx;
+ report_frame = true;
}
mutex_unlock(&ifmgd->mtx);
out:
- if (sent_frame)
+ if (report_frame)
__cfg80211_send_deauth(sdata->dev, frame_buf,
IEEE80211_DEAUTH_FRAME_LEN);
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 430bd254e496..acd1f71adc03 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -118,9 +118,9 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local)
* Stop queues and transmit all frames queued by the driver
* before sending nullfunc to enable powersave at the AP.
*/
- ieee80211_stop_queues_by_reason(&local->hw,
+ ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL);
- drv_flush(local, false);
+ ieee80211_flush_queues(local, NULL);
mutex_lock(&local->iflist_mtx);
list_for_each_entry(sdata, &local->interfaces, list) {
@@ -181,7 +181,7 @@ void ieee80211_offchannel_return(struct ieee80211_local *local)
}
mutex_unlock(&local->iflist_mtx);
- ieee80211_wake_queues_by_reason(&local->hw,
+ ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL);
}
@@ -277,7 +277,7 @@ void ieee80211_start_next_roc(struct ieee80211_local *local)
duration = 10;
ret = drv_remain_on_channel(local, roc->sdata, roc->chan,
- duration);
+ duration, roc->type);
roc->started = true;
@@ -382,7 +382,7 @@ void ieee80211_sw_roc_work(struct work_struct *work)
ieee80211_roc_notify_destroy(roc, !roc->abort);
if (started) {
- drv_flush(local, false);
+ ieee80211_flush_queues(local, NULL);
local->tmp_channel = NULL;
ieee80211_hw_config(local, 0);
@@ -445,15 +445,15 @@ void ieee80211_roc_setup(struct ieee80211_local *local)
INIT_LIST_HEAD(&local->roc_list);
}
-void ieee80211_roc_purge(struct ieee80211_sub_if_data *sdata)
+void ieee80211_roc_purge(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata)
{
- struct ieee80211_local *local = sdata->local;
struct ieee80211_roc_work *roc, *tmp;
LIST_HEAD(tmp_list);
mutex_lock(&local->mtx);
list_for_each_entry_safe(roc, tmp, &local->roc_list, list) {
- if (roc->sdata != sdata)
+ if (sdata && roc->sdata != sdata)
continue;
if (roc->started && local->ops->remain_on_channel) {
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index d0275f34bf70..7fc5d0d8149a 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -6,32 +6,11 @@
#include "driver-ops.h"
#include "led.h"
-/* return value indicates whether the driver should be further notified */
-static void ieee80211_quiesce(struct ieee80211_sub_if_data *sdata)
-{
- switch (sdata->vif.type) {
- case NL80211_IFTYPE_STATION:
- ieee80211_sta_quiesce(sdata);
- break;
- case NL80211_IFTYPE_ADHOC:
- ieee80211_ibss_quiesce(sdata);
- break;
- case NL80211_IFTYPE_MESH_POINT:
- ieee80211_mesh_quiesce(sdata);
- break;
- default:
- break;
- }
-
- cancel_work_sync(&sdata->work);
-}
-
int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
{
struct ieee80211_local *local = hw_to_local(hw);
struct ieee80211_sub_if_data *sdata;
struct sta_info *sta;
- struct ieee80211_chanctx *ctx;
if (!local->open_count)
goto suspend;
@@ -40,6 +19,10 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
ieee80211_dfs_cac_cancel(local);
+ ieee80211_roc_purge(local, NULL);
+
+ ieee80211_del_virtual_monitor(local);
+
if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) {
mutex_lock(&local->sta_mtx);
list_for_each_entry(sta, &local->sta_list, list) {
@@ -51,12 +34,14 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
}
ieee80211_stop_queues_by_reason(hw,
- IEEE80211_QUEUE_STOP_REASON_SUSPEND);
+ IEEE80211_MAX_QUEUE_MAP,
+ IEEE80211_QUEUE_STOP_REASON_SUSPEND);
- /* flush out all packets */
+ /* flush out all packets and station cleanup call_rcu()s */
synchronize_net();
+ rcu_barrier();
- drv_flush(local, false);
+ ieee80211_flush_queues(local, NULL);
local->quiescing = true;
/* make quiescing visible to timers everywhere */
@@ -89,23 +74,17 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
mutex_unlock(&local->sta_mtx);
}
ieee80211_wake_queues_by_reason(hw,
+ IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_SUSPEND);
return err;
} else if (err > 0) {
WARN_ON(err != 1);
- local->wowlan = false;
+ return err;
} else {
- list_for_each_entry(sdata, &local->interfaces, list)
- if (ieee80211_sdata_running(sdata))
- ieee80211_quiesce(sdata);
goto suspend;
}
}
- /* disable keys */
- list_for_each_entry(sdata, &local->interfaces, list)
- ieee80211_disable_keys(sdata);
-
/* tear down aggregation sessions and remove STAs */
mutex_lock(&local->sta_mtx);
list_for_each_entry(sta, &local->sta_list, list) {
@@ -117,100 +96,21 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
WARN_ON(drv_sta_state(local, sta->sdata, sta,
state, state - 1));
}
-
- mesh_plink_quiesce(sta);
}
mutex_unlock(&local->sta_mtx);
/* remove all interfaces */
list_for_each_entry(sdata, &local->interfaces, list) {
- static u8 zero_addr[ETH_ALEN] = {};
- u32 changed = 0;
-
if (!ieee80211_sdata_running(sdata))
continue;
-
- switch (sdata->vif.type) {
- case NL80211_IFTYPE_AP_VLAN:
- case NL80211_IFTYPE_MONITOR:
- /* skip these */
- continue;
- case NL80211_IFTYPE_STATION:
- if (sdata->vif.bss_conf.assoc)
- changed = BSS_CHANGED_ASSOC |
- BSS_CHANGED_BSSID |
- BSS_CHANGED_IDLE;
- break;
- case NL80211_IFTYPE_AP:
- case NL80211_IFTYPE_ADHOC:
- case NL80211_IFTYPE_MESH_POINT:
- if (sdata->vif.bss_conf.enable_beacon)
- changed = BSS_CHANGED_BEACON_ENABLED;
- break;
- default:
- break;
- }
-
- ieee80211_quiesce(sdata);
-
- sdata->suspend_bss_conf = sdata->vif.bss_conf;
- memset(&sdata->vif.bss_conf, 0, sizeof(sdata->vif.bss_conf));
- sdata->vif.bss_conf.idle = true;
- if (sdata->suspend_bss_conf.bssid)
- sdata->vif.bss_conf.bssid = zero_addr;
-
- /* disable beaconing or remove association */
- ieee80211_bss_info_change_notify(sdata, changed);
-
- if (sdata->vif.type == NL80211_IFTYPE_AP &&
- rcu_access_pointer(sdata->u.ap.beacon))
- drv_stop_ap(local, sdata);
-
- if (local->use_chanctx) {
- struct ieee80211_chanctx_conf *conf;
-
- mutex_lock(&local->chanctx_mtx);
- conf = rcu_dereference_protected(
- sdata->vif.chanctx_conf,
- lockdep_is_held(&local->chanctx_mtx));
- if (conf) {
- ctx = container_of(conf,
- struct ieee80211_chanctx,
- conf);
- drv_unassign_vif_chanctx(local, sdata, ctx);
- }
-
- mutex_unlock(&local->chanctx_mtx);
- }
- drv_remove_interface(local, sdata);
- }
-
- sdata = rtnl_dereference(local->monitor_sdata);
- if (sdata) {
- if (local->use_chanctx) {
- struct ieee80211_chanctx_conf *conf;
-
- mutex_lock(&local->chanctx_mtx);
- conf = rcu_dereference_protected(
- sdata->vif.chanctx_conf,
- lockdep_is_held(&local->chanctx_mtx));
- if (conf) {
- ctx = container_of(conf,
- struct ieee80211_chanctx,
- conf);
- drv_unassign_vif_chanctx(local, sdata, ctx);
- }
-
- mutex_unlock(&local->chanctx_mtx);
- }
-
drv_remove_interface(local, sdata);
}
- mutex_lock(&local->chanctx_mtx);
- list_for_each_entry(ctx, &local->chanctx_list, list)
- drv_remove_chanctx(local, ctx);
- mutex_unlock(&local->chanctx_mtx);
+ /*
+ * We disconnected on all interfaces before suspend, all channel
+ * contexts should be released.
+ */
+ WARN_ON(!list_empty(&local->chanctx_list));
/* stop hardware - this must stop RX */
if (local->open_count)
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index dd88381c53b7..a02bef35b134 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -252,6 +252,25 @@ rate_lowest_non_cck_index(struct ieee80211_supported_band *sband,
return 0;
}
+static void __rate_control_send_low(struct ieee80211_hw *hw,
+ struct ieee80211_supported_band *sband,
+ struct ieee80211_sta *sta,
+ struct ieee80211_tx_info *info)
+{
+ if ((sband->band != IEEE80211_BAND_2GHZ) ||
+ !(info->flags & IEEE80211_TX_CTL_NO_CCK_RATE))
+ info->control.rates[0].idx = rate_lowest_index(sband, sta);
+ else
+ info->control.rates[0].idx =
+ rate_lowest_non_cck_index(sband, sta);
+
+ info->control.rates[0].count =
+ (info->flags & IEEE80211_TX_CTL_NO_ACK) ?
+ 1 : hw->max_rate_tries;
+
+ info->control.skip_table = 1;
+}
+
bool rate_control_send_low(struct ieee80211_sta *sta,
void *priv_sta,
@@ -262,16 +281,8 @@ bool rate_control_send_low(struct ieee80211_sta *sta,
int mcast_rate;
if (!sta || !priv_sta || rc_no_data_or_no_ack_use_min(txrc)) {
- if ((sband->band != IEEE80211_BAND_2GHZ) ||
- !(info->flags & IEEE80211_TX_CTL_NO_CCK_RATE))
- info->control.rates[0].idx =
- rate_lowest_index(txrc->sband, sta);
- else
- info->control.rates[0].idx =
- rate_lowest_non_cck_index(txrc->sband, sta);
- info->control.rates[0].count =
- (info->flags & IEEE80211_TX_CTL_NO_ACK) ?
- 1 : txrc->hw->max_rate_tries;
+ __rate_control_send_low(txrc->hw, sband, sta, info);
+
if (!sta && txrc->bss) {
mcast_rate = txrc->bss_conf->mcast_rate[sband->band];
if (mcast_rate > 0) {
@@ -355,7 +366,8 @@ static bool rate_idx_match_mcs_mask(struct ieee80211_tx_rate *rate,
static void rate_idx_match_mask(struct ieee80211_tx_rate *rate,
- struct ieee80211_tx_rate_control *txrc,
+ struct ieee80211_supported_band *sband,
+ enum nl80211_chan_width chan_width,
u32 mask,
u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN])
{
@@ -375,27 +387,17 @@ static void rate_idx_match_mask(struct ieee80211_tx_rate *rate,
IEEE80211_TX_RC_USE_SHORT_PREAMBLE);
alt_rate.count = rate->count;
if (rate_idx_match_legacy_mask(&alt_rate,
- txrc->sband->n_bitrates,
- mask)) {
+ sband->n_bitrates, mask)) {
*rate = alt_rate;
return;
}
} else {
- struct sk_buff *skb = txrc->skb;
- struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
- __le16 fc;
-
/* handle legacy rates */
- if (rate_idx_match_legacy_mask(rate, txrc->sband->n_bitrates,
- mask))
+ if (rate_idx_match_legacy_mask(rate, sband->n_bitrates, mask))
return;
/* if HT BSS, and we handle a data frame, also try HT rates */
- if (txrc->bss_conf->chandef.width == NL80211_CHAN_WIDTH_20_NOHT)
- return;
-
- fc = hdr->frame_control;
- if (!ieee80211_is_data(fc))
+ if (chan_width == NL80211_CHAN_WIDTH_20_NOHT)
return;
alt_rate.idx = 0;
@@ -408,7 +410,7 @@ static void rate_idx_match_mask(struct ieee80211_tx_rate *rate,
alt_rate.flags |= IEEE80211_TX_RC_MCS;
- if (txrc->bss_conf->chandef.width == NL80211_CHAN_WIDTH_40)
+ if (chan_width == NL80211_CHAN_WIDTH_40)
alt_rate.flags |= IEEE80211_TX_RC_40_MHZ_WIDTH;
if (rate_idx_match_mcs_mask(&alt_rate, mcs_mask)) {
@@ -426,6 +428,228 @@ static void rate_idx_match_mask(struct ieee80211_tx_rate *rate,
*/
}
+static void rate_fixup_ratelist(struct ieee80211_vif *vif,
+ struct ieee80211_supported_band *sband,
+ struct ieee80211_tx_info *info,
+ struct ieee80211_tx_rate *rates,
+ int max_rates)
+{
+ struct ieee80211_rate *rate;
+ bool inval = false;
+ int i;
+
+ /*
+ * Set up the RTS/CTS rate as the fastest basic rate
+ * that is not faster than the data rate unless there
+ * is no basic rate slower than the data rate, in which
+ * case we pick the slowest basic rate
+ *
+ * XXX: Should this check all retry rates?
+ */
+ if (!(rates[0].flags & IEEE80211_TX_RC_MCS)) {
+ u32 basic_rates = vif->bss_conf.basic_rates;
+ s8 baserate = basic_rates ? ffs(basic_rates - 1) : 0;
+
+ rate = &sband->bitrates[rates[0].idx];
+
+ for (i = 0; i < sband->n_bitrates; i++) {
+ /* must be a basic rate */
+ if (!(basic_rates & BIT(i)))
+ continue;
+ /* must not be faster than the data rate */
+ if (sband->bitrates[i].bitrate > rate->bitrate)
+ continue;
+ /* maximum */
+ if (sband->bitrates[baserate].bitrate <
+ sband->bitrates[i].bitrate)
+ baserate = i;
+ }
+
+ info->control.rts_cts_rate_idx = baserate;
+ }
+
+ for (i = 0; i < max_rates; i++) {
+ /*
+ * make sure there's no valid rate following
+ * an invalid one, just in case drivers don't
+ * take the API seriously to stop at -1.
+ */
+ if (inval) {
+ rates[i].idx = -1;
+ continue;
+ }
+ if (rates[i].idx < 0) {
+ inval = true;
+ continue;
+ }
+
+ /*
+ * For now assume MCS is already set up correctly, this
+ * needs to be fixed.
+ */
+ if (rates[i].flags & IEEE80211_TX_RC_MCS) {
+ WARN_ON(rates[i].idx > 76);
+
+ if (!(rates[i].flags & IEEE80211_TX_RC_USE_RTS_CTS) &&
+ info->control.use_cts_prot)
+ rates[i].flags |=
+ IEEE80211_TX_RC_USE_CTS_PROTECT;
+ continue;
+ }
+
+ if (rates[i].flags & IEEE80211_TX_RC_VHT_MCS) {
+ WARN_ON(ieee80211_rate_get_vht_mcs(&rates[i]) > 9);
+ continue;
+ }
+
+ /* set up RTS protection if desired */
+ if (info->control.use_rts) {
+ rates[i].flags |= IEEE80211_TX_RC_USE_RTS_CTS;
+ info->control.use_cts_prot = false;
+ }
+
+ /* RC is busted */
+ if (WARN_ON_ONCE(rates[i].idx >= sband->n_bitrates)) {
+ rates[i].idx = -1;
+ continue;
+ }
+
+ rate = &sband->bitrates[rates[i].idx];
+
+ /* set up short preamble */
+ if (info->control.short_preamble &&
+ rate->flags & IEEE80211_RATE_SHORT_PREAMBLE)
+ rates[i].flags |= IEEE80211_TX_RC_USE_SHORT_PREAMBLE;
+
+ /* set up G protection */
+ if (!(rates[i].flags & IEEE80211_TX_RC_USE_RTS_CTS) &&
+ info->control.use_cts_prot &&
+ rate->flags & IEEE80211_RATE_ERP_G)
+ rates[i].flags |= IEEE80211_TX_RC_USE_CTS_PROTECT;
+ }
+}
+
+
+static void rate_control_fill_sta_table(struct ieee80211_sta *sta,
+ struct ieee80211_tx_info *info,
+ struct ieee80211_tx_rate *rates,
+ int max_rates)
+{
+ struct ieee80211_sta_rates *ratetbl = NULL;
+ int i;
+
+ if (sta && !info->control.skip_table)
+ ratetbl = rcu_dereference(sta->rates);
+
+ /* Fill remaining rate slots with data from the sta rate table. */
+ max_rates = min_t(int, max_rates, IEEE80211_TX_RATE_TABLE_SIZE);
+ for (i = 0; i < max_rates; i++) {
+ if (i < ARRAY_SIZE(info->control.rates) &&
+ info->control.rates[i].idx >= 0 &&
+ info->control.rates[i].count) {
+ if (rates != info->control.rates)
+ rates[i] = info->control.rates[i];
+ } else if (ratetbl) {
+ rates[i].idx = ratetbl->rate[i].idx;
+ rates[i].flags = ratetbl->rate[i].flags;
+ if (info->control.use_rts)
+ rates[i].count = ratetbl->rate[i].count_rts;
+ else if (info->control.use_cts_prot)
+ rates[i].count = ratetbl->rate[i].count_cts;
+ else
+ rates[i].count = ratetbl->rate[i].count;
+ } else {
+ rates[i].idx = -1;
+ rates[i].count = 0;
+ }
+
+ if (rates[i].idx < 0 || !rates[i].count)
+ break;
+ }
+}
+
+static void rate_control_apply_mask(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_sta *sta,
+ struct ieee80211_supported_band *sband,
+ struct ieee80211_tx_info *info,
+ struct ieee80211_tx_rate *rates,
+ int max_rates)
+{
+ enum nl80211_chan_width chan_width;
+ u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN];
+ bool has_mcs_mask;
+ u32 mask;
+ int i;
+
+ /*
+ * Try to enforce the rateidx mask the user wanted. skip this if the
+ * default mask (allow all rates) is used to save some processing for
+ * the common case.
+ */
+ mask = sdata->rc_rateidx_mask[info->band];
+ has_mcs_mask = sdata->rc_has_mcs_mask[info->band];
+ if (mask == (1 << sband->n_bitrates) - 1 && !has_mcs_mask)
+ return;
+
+ if (has_mcs_mask)
+ memcpy(mcs_mask, sdata->rc_rateidx_mcs_mask[info->band],
+ sizeof(mcs_mask));
+ else
+ memset(mcs_mask, 0xff, sizeof(mcs_mask));
+
+ if (sta) {
+ /* Filter out rates that the STA does not support */
+ mask &= sta->supp_rates[info->band];
+ for (i = 0; i < sizeof(mcs_mask); i++)
+ mcs_mask[i] &= sta->ht_cap.mcs.rx_mask[i];
+ }
+
+ /*
+ * Make sure the rate index selected for each TX rate is
+ * included in the configured mask and change the rate indexes
+ * if needed.
+ */
+ chan_width = sdata->vif.bss_conf.chandef.width;
+ for (i = 0; i < max_rates; i++) {
+ /* Skip invalid rates */
+ if (rates[i].idx < 0)
+ break;
+
+ rate_idx_match_mask(&rates[i], sband, chan_width, mask,
+ mcs_mask);
+ }
+}
+
+void ieee80211_get_tx_rates(struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta,
+ struct sk_buff *skb,
+ struct ieee80211_tx_rate *dest,
+ int max_rates)
+{
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ struct ieee80211_supported_band *sband;
+
+ rate_control_fill_sta_table(sta, info, dest, max_rates);
+
+ if (!vif)
+ return;
+
+ sdata = vif_to_sdata(vif);
+ sband = sdata->local->hw.wiphy->bands[info->band];
+
+ if (ieee80211_is_data(hdr->frame_control))
+ rate_control_apply_mask(sdata, sta, sband, info, dest, max_rates);
+
+ if (dest[0].idx < 0)
+ __rate_control_send_low(&sdata->local->hw, sband, sta, info);
+
+ if (sta)
+ rate_fixup_ratelist(vif, sband, info, dest, max_rates);
+}
+EXPORT_SYMBOL(ieee80211_get_tx_rates);
+
void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta,
struct ieee80211_tx_rate_control *txrc)
@@ -435,8 +659,6 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
struct ieee80211_sta *ista = NULL;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb);
int i;
- u32 mask;
- u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN];
if (sta && test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) {
ista = &sta->sta;
@@ -454,37 +676,34 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
ref->ops->get_rate(ref->priv, ista, priv_sta, txrc);
+ if (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_RC_TABLE)
+ return;
+
+ ieee80211_get_tx_rates(&sdata->vif, ista, txrc->skb,
+ info->control.rates,
+ ARRAY_SIZE(info->control.rates));
+}
+
+int rate_control_set_rates(struct ieee80211_hw *hw,
+ struct ieee80211_sta *pubsta,
+ struct ieee80211_sta_rates *rates)
+{
+ struct ieee80211_sta_rates *old;
+
/*
- * Try to enforce the rateidx mask the user wanted. skip this if the
- * default mask (allow all rates) is used to save some processing for
- * the common case.
+ * mac80211 guarantees that this function will not be called
+ * concurrently, so the following RCU access is safe, even without
+ * extra locking. This can not be checked easily, so we just set
+ * the condition to true.
*/
- mask = sdata->rc_rateidx_mask[info->band];
- memcpy(mcs_mask, sdata->rc_rateidx_mcs_mask[info->band],
- sizeof(mcs_mask));
- if (mask != (1 << txrc->sband->n_bitrates) - 1) {
- if (sta) {
- /* Filter out rates that the STA does not support */
- mask &= sta->sta.supp_rates[info->band];
- for (i = 0; i < sizeof(mcs_mask); i++)
- mcs_mask[i] &= sta->sta.ht_cap.mcs.rx_mask[i];
- }
- /*
- * Make sure the rate index selected for each TX rate is
- * included in the configured mask and change the rate indexes
- * if needed.
- */
- for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
- /* Skip invalid rates */
- if (info->control.rates[i].idx < 0)
- break;
- rate_idx_match_mask(&info->control.rates[i], txrc,
- mask, mcs_mask);
- }
- }
+ old = rcu_dereference_protected(pubsta->rates, true);
+ rcu_assign_pointer(pubsta->rates, rates);
+ if (old)
+ kfree_rcu(old, rcu_head);
- BUG_ON(info->control.rates[0].idx < 0);
+ return 0;
}
+EXPORT_SYMBOL(rate_control_set_rates);
int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,
const char *name)
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index eea45a2c7c35..ac7ef5414bde 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -55,7 +55,6 @@
#include "rate.h"
#include "rc80211_minstrel.h"
-#define SAMPLE_COLUMNS 10
#define SAMPLE_TBL(_mi, _idx, _col) \
_mi->sample_table[(_idx * SAMPLE_COLUMNS) + _col]
@@ -70,16 +69,75 @@ rix_to_ndx(struct minstrel_sta_info *mi, int rix)
return i;
}
+/* find & sort topmost throughput rates */
+static inline void
+minstrel_sort_best_tp_rates(struct minstrel_sta_info *mi, int i, u8 *tp_list)
+{
+ int j = MAX_THR_RATES;
+
+ while (j > 0 && mi->r[i].cur_tp > mi->r[tp_list[j - 1]].cur_tp)
+ j--;
+ if (j < MAX_THR_RATES - 1)
+ memmove(&tp_list[j + 1], &tp_list[j], MAX_THR_RATES - (j + 1));
+ if (j < MAX_THR_RATES)
+ tp_list[j] = i;
+}
+
+static void
+minstrel_set_rate(struct minstrel_sta_info *mi, struct ieee80211_sta_rates *ratetbl,
+ int offset, int idx)
+{
+ struct minstrel_rate *r = &mi->r[idx];
+
+ ratetbl->rate[offset].idx = r->rix;
+ ratetbl->rate[offset].count = r->adjusted_retry_count;
+ ratetbl->rate[offset].count_cts = r->retry_count_cts;
+ ratetbl->rate[offset].count_rts = r->retry_count_rtscts;
+}
+
+static void
+minstrel_update_rates(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
+{
+ struct ieee80211_sta_rates *ratetbl;
+ int i = 0;
+
+ ratetbl = kzalloc(sizeof(*ratetbl), GFP_ATOMIC);
+ if (!ratetbl)
+ return;
+
+ /* Start with max_tp_rate */
+ minstrel_set_rate(mi, ratetbl, i++, mi->max_tp_rate[0]);
+
+ if (mp->hw->max_rates >= 3) {
+ /* At least 3 tx rates supported, use max_tp_rate2 next */
+ minstrel_set_rate(mi, ratetbl, i++, mi->max_tp_rate[1]);
+ }
+
+ if (mp->hw->max_rates >= 2) {
+ /* At least 2 tx rates supported, use max_prob_rate next */
+ minstrel_set_rate(mi, ratetbl, i++, mi->max_prob_rate);
+ }
+
+ /* Use lowest rate last */
+ ratetbl->rate[i].idx = mi->lowest_rix;
+ ratetbl->rate[i].count = mp->max_retry;
+ ratetbl->rate[i].count_cts = mp->max_retry;
+ ratetbl->rate[i].count_rts = mp->max_retry;
+
+ rate_control_set_rates(mp->hw, mi->sta, ratetbl);
+}
+
static void
minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
{
- u32 max_tp = 0, index_max_tp = 0, index_max_tp2 = 0;
- u32 max_prob = 0, index_max_prob = 0;
+ u8 tmp_tp_rate[MAX_THR_RATES];
+ u8 tmp_prob_rate = 0;
u32 usecs;
- u32 p;
int i;
- mi->stats_update = jiffies;
+ for (i=0; i < MAX_THR_RATES; i++)
+ tmp_tp_rate[i] = 0;
+
for (i = 0; i < mi->n_rates; i++) {
struct minstrel_rate *mr = &mi->r[i];
@@ -87,27 +145,32 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
if (!usecs)
usecs = 1000000;
- /* To avoid rounding issues, probabilities scale from 0 (0%)
- * to 18000 (100%) */
- if (mr->attempts) {
- p = (mr->success * 18000) / mr->attempts;
+ if (unlikely(mr->attempts > 0)) {
+ mr->sample_skipped = 0;
+ mr->cur_prob = MINSTREL_FRAC(mr->success, mr->attempts);
mr->succ_hist += mr->success;
mr->att_hist += mr->attempts;
- mr->cur_prob = p;
- p = ((p * (100 - mp->ewma_level)) + (mr->probability *
- mp->ewma_level)) / 100;
- mr->probability = p;
- mr->cur_tp = p * (1000000 / usecs);
- }
+ mr->probability = minstrel_ewma(mr->probability,
+ mr->cur_prob,
+ EWMA_LEVEL);
+ } else
+ mr->sample_skipped++;
mr->last_success = mr->success;
mr->last_attempts = mr->attempts;
mr->success = 0;
mr->attempts = 0;
+ /* Update throughput per rate, reset thr. below 10% success */
+ if (mr->probability < MINSTREL_FRAC(10, 100))
+ mr->cur_tp = 0;
+ else
+ mr->cur_tp = mr->probability * (1000000 / usecs);
+
/* Sample less often below the 10% chance of success.
* Sample less often above the 95% chance of success. */
- if ((mr->probability > 17100) || (mr->probability < 1800)) {
+ if (mr->probability > MINSTREL_FRAC(95, 100) ||
+ mr->probability < MINSTREL_FRAC(10, 100)) {
mr->adjusted_retry_count = mr->retry_count >> 1;
if (mr->adjusted_retry_count > 2)
mr->adjusted_retry_count = 2;
@@ -118,35 +181,32 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
}
if (!mr->adjusted_retry_count)
mr->adjusted_retry_count = 2;
- }
- for (i = 0; i < mi->n_rates; i++) {
- struct minstrel_rate *mr = &mi->r[i];
- if (max_tp < mr->cur_tp) {
- index_max_tp = i;
- max_tp = mr->cur_tp;
- }
- if (max_prob < mr->probability) {
- index_max_prob = i;
- max_prob = mr->probability;
+ minstrel_sort_best_tp_rates(mi, i, tmp_tp_rate);
+
+ /* To determine the most robust rate (max_prob_rate) used at
+ * 3rd mmr stage we distinct between two cases:
+ * (1) if any success probabilitiy >= 95%, out of those rates
+ * choose the maximum throughput rate as max_prob_rate
+ * (2) if all success probabilities < 95%, the rate with
+ * highest success probability is choosen as max_prob_rate */
+ if (mr->probability >= MINSTREL_FRAC(95,100)) {
+ if (mr->cur_tp >= mi->r[tmp_prob_rate].cur_tp)
+ tmp_prob_rate = i;
+ } else {
+ if (mr->probability >= mi->r[tmp_prob_rate].probability)
+ tmp_prob_rate = i;
}
}
- max_tp = 0;
- for (i = 0; i < mi->n_rates; i++) {
- struct minstrel_rate *mr = &mi->r[i];
+ /* Assign the new rate set */
+ memcpy(mi->max_tp_rate, tmp_tp_rate, sizeof(mi->max_tp_rate));
+ mi->max_prob_rate = tmp_prob_rate;
- if (i == index_max_tp)
- continue;
+ /* Reset update timer */
+ mi->stats_update = jiffies;
- if (max_tp < mr->cur_tp) {
- index_max_tp2 = i;
- max_tp = mr->cur_tp;
- }
- }
- mi->max_tp_rate = index_max_tp;
- mi->max_tp_rate2 = index_max_tp2;
- mi->max_prob_rate = index_max_prob;
+ minstrel_update_rates(mp, mi);
}
static void
@@ -195,9 +255,9 @@ minstrel_get_retry_count(struct minstrel_rate *mr,
{
unsigned int retry = mr->adjusted_retry_count;
- if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS)
+ if (info->control.use_rts)
retry = max(2U, min(mr->retry_count_rtscts, retry));
- else if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT)
+ else if (info->control.use_cts_prot)
retry = max(2U, min(mr->retry_count_cts, retry));
return retry;
}
@@ -207,10 +267,10 @@ static int
minstrel_get_next_sample(struct minstrel_sta_info *mi)
{
unsigned int sample_ndx;
- sample_ndx = SAMPLE_TBL(mi, mi->sample_idx, mi->sample_column);
- mi->sample_idx++;
- if ((int) mi->sample_idx > (mi->n_rates - 2)) {
- mi->sample_idx = 0;
+ sample_ndx = SAMPLE_TBL(mi, mi->sample_row, mi->sample_column);
+ mi->sample_row++;
+ if ((int) mi->sample_row >= mi->n_rates) {
+ mi->sample_row = 0;
mi->sample_column++;
if (mi->sample_column >= SAMPLE_COLUMNS)
mi->sample_column = 0;
@@ -226,111 +286,96 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct minstrel_sta_info *mi = priv_sta;
struct minstrel_priv *mp = priv;
- struct ieee80211_tx_rate *ar = info->control.rates;
- unsigned int ndx, sample_ndx = 0;
- bool mrr;
- bool sample_slower = false;
- bool sample = false;
- int i, delta;
- int mrr_ndx[3];
- int sample_rate;
-
+ struct ieee80211_tx_rate *rate = &info->control.rates[0];
+ struct minstrel_rate *msr, *mr;
+ unsigned int ndx;
+ bool mrr_capable;
+ bool prev_sample = mi->prev_sample;
+ int delta;
+ int sampling_ratio;
+
+ /* management/no-ack frames do not use rate control */
if (rate_control_send_low(sta, priv_sta, txrc))
return;
- mrr = mp->has_mrr && !txrc->rts && !txrc->bss_conf->use_cts_prot;
-
- ndx = mi->max_tp_rate;
-
- if (mrr)
- sample_rate = mp->lookaround_rate_mrr;
+ /* check multi-rate-retry capabilities & adjust lookaround_rate */
+ mrr_capable = mp->has_mrr &&
+ !txrc->rts &&
+ !txrc->bss_conf->use_cts_prot;
+ if (mrr_capable)
+ sampling_ratio = mp->lookaround_rate_mrr;
else
- sample_rate = mp->lookaround_rate;
+ sampling_ratio = mp->lookaround_rate;
+ /* increase sum packet counter */
mi->packet_count++;
- delta = (mi->packet_count * sample_rate / 100) -
+
+ delta = (mi->packet_count * sampling_ratio / 100) -
(mi->sample_count + mi->sample_deferred / 2);
- /* delta > 0: sampling required */
- if ((delta > 0) && (mrr || !mi->prev_sample)) {
- struct minstrel_rate *msr;
- if (mi->packet_count >= 10000) {
- mi->sample_deferred = 0;
- mi->sample_count = 0;
- mi->packet_count = 0;
- } else if (delta > mi->n_rates * 2) {
- /* With multi-rate retry, not every planned sample
- * attempt actually gets used, due to the way the retry
- * chain is set up - [max_tp,sample,prob,lowest] for
- * sample_rate < max_tp.
- *
- * If there's too much sampling backlog and the link
- * starts getting worse, minstrel would start bursting
- * out lots of sampling frames, which would result
- * in a large throughput loss. */
- mi->sample_count += (delta - mi->n_rates * 2);
- }
+ /* delta < 0: no sampling required */
+ mi->prev_sample = false;
+ if (delta < 0 || (!mrr_capable && prev_sample))
+ return;
- sample_ndx = minstrel_get_next_sample(mi);
- msr = &mi->r[sample_ndx];
- sample = true;
- sample_slower = mrr && (msr->perfect_tx_time >
- mi->r[ndx].perfect_tx_time);
-
- if (!sample_slower) {
- if (msr->sample_limit != 0) {
- ndx = sample_ndx;
- mi->sample_count++;
- if (msr->sample_limit > 0)
- msr->sample_limit--;
- } else {
- sample = false;
- }
- } else {
- /* Only use IEEE80211_TX_CTL_RATE_CTRL_PROBE to mark
- * packets that have the sampling rate deferred to the
- * second MRR stage. Increase the sample counter only
- * if the deferred sample rate was actually used.
- * Use the sample_deferred counter to make sure that
- * the sampling is not done in large bursts */
- info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
- mi->sample_deferred++;
- }
+ if (mi->packet_count >= 10000) {
+ mi->sample_deferred = 0;
+ mi->sample_count = 0;
+ mi->packet_count = 0;
+ } else if (delta > mi->n_rates * 2) {
+ /* With multi-rate retry, not every planned sample
+ * attempt actually gets used, due to the way the retry
+ * chain is set up - [max_tp,sample,prob,lowest] for
+ * sample_rate < max_tp.
+ *
+ * If there's too much sampling backlog and the link
+ * starts getting worse, minstrel would start bursting
+ * out lots of sampling frames, which would result
+ * in a large throughput loss. */
+ mi->sample_count += (delta - mi->n_rates * 2);
+ }
+
+ /* get next random rate sample */
+ ndx = minstrel_get_next_sample(mi);
+ msr = &mi->r[ndx];
+ mr = &mi->r[mi->max_tp_rate[0]];
+
+ /* Decide if direct ( 1st mrr stage) or indirect (2nd mrr stage)
+ * rate sampling method should be used.
+ * Respect such rates that are not sampled for 20 interations.
+ */
+ if (mrr_capable &&
+ msr->perfect_tx_time > mr->perfect_tx_time &&
+ msr->sample_skipped < 20) {
+ /* Only use IEEE80211_TX_CTL_RATE_CTRL_PROBE to mark
+ * packets that have the sampling rate deferred to the
+ * second MRR stage. Increase the sample counter only
+ * if the deferred sample rate was actually used.
+ * Use the sample_deferred counter to make sure that
+ * the sampling is not done in large bursts */
+ info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
+ rate++;
+ mi->sample_deferred++;
+ } else {
+ if (!msr->sample_limit != 0)
+ return;
+
+ mi->sample_count++;
+ if (msr->sample_limit > 0)
+ msr->sample_limit--;
}
- mi->prev_sample = sample;
/* If we're not using MRR and the sampling rate already
* has a probability of >95%, we shouldn't be attempting
* to use it, as this only wastes precious airtime */
- if (!mrr && sample && (mi->r[ndx].probability > 17100))
- ndx = mi->max_tp_rate;
-
- ar[0].idx = mi->r[ndx].rix;
- ar[0].count = minstrel_get_retry_count(&mi->r[ndx], info);
-
- if (!mrr) {
- if (!sample)
- ar[0].count = mp->max_retry;
- ar[1].idx = mi->lowest_rix;
- ar[1].count = mp->max_retry;
+ if (!mrr_capable &&
+ (mi->r[ndx].probability > MINSTREL_FRAC(95, 100)))
return;
- }
- /* MRR setup */
- if (sample) {
- if (sample_slower)
- mrr_ndx[0] = sample_ndx;
- else
- mrr_ndx[0] = mi->max_tp_rate;
- } else {
- mrr_ndx[0] = mi->max_tp_rate2;
- }
- mrr_ndx[1] = mi->max_prob_rate;
- mrr_ndx[2] = 0;
- for (i = 1; i < 4; i++) {
- ar[i].idx = mi->r[mrr_ndx[i - 1]].rix;
- ar[i].count = mi->r[mrr_ndx[i - 1]].adjusted_retry_count;
- }
+ mi->prev_sample = true;
+
+ rate->idx = mi->r[ndx].rix;
+ rate->count = minstrel_get_retry_count(&mi->r[ndx], info);
}
@@ -351,26 +396,21 @@ static void
init_sample_table(struct minstrel_sta_info *mi)
{
unsigned int i, col, new_idx;
- unsigned int n_srates = mi->n_rates - 1;
u8 rnd[8];
mi->sample_column = 0;
- mi->sample_idx = 0;
- memset(mi->sample_table, 0, SAMPLE_COLUMNS * mi->n_rates);
+ mi->sample_row = 0;
+ memset(mi->sample_table, 0xff, SAMPLE_COLUMNS * mi->n_rates);
for (col = 0; col < SAMPLE_COLUMNS; col++) {
- for (i = 0; i < n_srates; i++) {
+ for (i = 0; i < mi->n_rates; i++) {
get_random_bytes(rnd, sizeof(rnd));
- new_idx = (i + rnd[i & 7]) % n_srates;
+ new_idx = (i + rnd[i & 7]) % mi->n_rates;
- while (SAMPLE_TBL(mi, new_idx, col) != 0)
- new_idx = (new_idx + 1) % n_srates;
+ while (SAMPLE_TBL(mi, new_idx, col) != 0xff)
+ new_idx = (new_idx + 1) % mi->n_rates;
- /* Don't sample the slowest rate (i.e. slowest base
- * rate). We must presume that the slowest rate works
- * fine, or else other management frames will also be
- * failing and the link will break */
- SAMPLE_TBL(mi, new_idx, col) = i + 1;
+ SAMPLE_TBL(mi, new_idx, col) = i;
}
}
}
@@ -385,12 +425,16 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband,
unsigned int i, n = 0;
unsigned int t_slot = 9; /* FIXME: get real slot time */
+ mi->sta = sta;
mi->lowest_rix = rate_lowest_index(sband, sta);
ctl_rate = &sband->bitrates[mi->lowest_rix];
mi->sp_ack_dur = ieee80211_frame_duration(sband->band, 10,
ctl_rate->bitrate,
!!(ctl_rate->flags & IEEE80211_RATE_ERP_G), 1);
+ memset(mi->max_tp_rate, 0, sizeof(mi->max_tp_rate));
+ mi->max_prob_rate = 0;
+
for (i = 0; i < sband->n_bitrates; i++) {
struct minstrel_rate *mr = &mi->r[n];
unsigned int tx_time = 0, tx_time_cts = 0, tx_time_rtscts = 0;
@@ -433,6 +477,8 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband,
} while ((tx_time < mp->segment_size) &&
(++mr->retry_count < mp->max_retry));
mr->adjusted_retry_count = mr->retry_count;
+ if (!(sband->bitrates[i].flags & IEEE80211_RATE_ERP_G))
+ mr->retry_count_cts = mr->retry_count;
}
for (i = n; i < sband->n_bitrates; i++) {
@@ -444,6 +490,7 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband,
mi->stats_update = jiffies;
init_sample_table(mi);
+ minstrel_update_rates(mp, mi);
}
static void *
@@ -542,9 +589,6 @@ minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
mp->lookaround_rate = 5;
mp->lookaround_rate_mrr = 10;
- /* moving average weight for EWMA */
- mp->ewma_level = 75;
-
/* maximum time that the hw is allowed to stay in one MRR segment */
mp->segment_size = 6000;
diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h
index 5ecf757817f2..f4301f4b2e41 100644
--- a/net/mac80211/rc80211_minstrel.h
+++ b/net/mac80211/rc80211_minstrel.h
@@ -9,6 +9,29 @@
#ifndef __RC_MINSTREL_H
#define __RC_MINSTREL_H
+#define EWMA_LEVEL 96 /* ewma weighting factor [/EWMA_DIV] */
+#define EWMA_DIV 128
+#define SAMPLE_COLUMNS 10 /* number of columns in sample table */
+
+
+/* scaled fraction values */
+#define MINSTREL_SCALE 16
+#define MINSTREL_FRAC(val, div) (((val) << MINSTREL_SCALE) / div)
+#define MINSTREL_TRUNC(val) ((val) >> MINSTREL_SCALE)
+
+/* number of highest throughput rates to consider*/
+#define MAX_THR_RATES 4
+
+/*
+ * Perform EWMA (Exponentially Weighted Moving Average) calculation
+ */
+static inline int
+minstrel_ewma(int old, int new, int weight)
+{
+ return (new * (EWMA_DIV - weight) + old * weight) / EWMA_DIV;
+}
+
+
struct minstrel_rate {
int bitrate;
int rix;
@@ -26,6 +49,7 @@ struct minstrel_rate {
u32 attempts;
u32 last_attempts;
u32 last_success;
+ u8 sample_skipped;
/* parts per thousand */
u32 cur_prob;
@@ -39,20 +63,21 @@ struct minstrel_rate {
};
struct minstrel_sta_info {
+ struct ieee80211_sta *sta;
+
unsigned long stats_update;
unsigned int sp_ack_dur;
unsigned int rate_avg;
unsigned int lowest_rix;
- unsigned int max_tp_rate;
- unsigned int max_tp_rate2;
- unsigned int max_prob_rate;
+ u8 max_tp_rate[MAX_THR_RATES];
+ u8 max_prob_rate;
unsigned int packet_count;
unsigned int sample_count;
int sample_deferred;
- unsigned int sample_idx;
+ unsigned int sample_row;
unsigned int sample_column;
int n_rates;
@@ -73,7 +98,6 @@ struct minstrel_priv {
unsigned int cw_min;
unsigned int cw_max;
unsigned int max_retry;
- unsigned int ewma_level;
unsigned int segment_size;
unsigned int update_interval;
unsigned int lookaround_rate;
diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c
index d5a56226e675..fd0b9ca1570e 100644
--- a/net/mac80211/rc80211_minstrel_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_debugfs.c
@@ -68,23 +68,25 @@ minstrel_stats_open(struct inode *inode, struct file *file)
file->private_data = ms;
p = ms->buf;
- p += sprintf(p, "rate throughput ewma prob this prob "
+ p += sprintf(p, "rate throughput ewma prob this prob "
"this succ/attempt success attempts\n");
for (i = 0; i < mi->n_rates; i++) {
struct minstrel_rate *mr = &mi->r[i];
- *(p++) = (i == mi->max_tp_rate) ? 'T' : ' ';
- *(p++) = (i == mi->max_tp_rate2) ? 't' : ' ';
+ *(p++) = (i == mi->max_tp_rate[0]) ? 'A' : ' ';
+ *(p++) = (i == mi->max_tp_rate[1]) ? 'B' : ' ';
+ *(p++) = (i == mi->max_tp_rate[2]) ? 'C' : ' ';
+ *(p++) = (i == mi->max_tp_rate[3]) ? 'D' : ' ';
*(p++) = (i == mi->max_prob_rate) ? 'P' : ' ';
p += sprintf(p, "%3u%s", mr->bitrate / 2,
(mr->bitrate & 1 ? ".5" : " "));
- tp = mr->cur_tp / ((18000 << 10) / 96);
- prob = mr->cur_prob / 18;
- eprob = mr->probability / 18;
+ tp = MINSTREL_TRUNC(mr->cur_tp / 10);
+ prob = MINSTREL_TRUNC(mr->cur_prob * 1000);
+ eprob = MINSTREL_TRUNC(mr->probability * 1000);
p += sprintf(p, " %6u.%1u %6u.%1u %6u.%1u "
- "%3u(%3u) %8llu %8llu\n",
+ " %3u(%3u) %8llu %8llu\n",
tp / 10, tp % 10,
eprob / 10, eprob % 10,
prob / 10, prob % 10,
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 3af141c69712..5b2d3012b983 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -17,8 +17,6 @@
#include "rc80211_minstrel_ht.h"
#define AVG_PKT_SIZE 1200
-#define SAMPLE_COLUMNS 10
-#define EWMA_LEVEL 75
/* Number of bits for an average sized packet */
#define MCS_NBITS (AVG_PKT_SIZE << 3)
@@ -26,11 +24,11 @@
/* Number of symbols for a packet with (bps) bits per symbol */
#define MCS_NSYMS(bps) ((MCS_NBITS + (bps) - 1) / (bps))
-/* Transmission time for a packet containing (syms) symbols */
+/* Transmission time (nanoseconds) for a packet containing (syms) symbols */
#define MCS_SYMBOL_TIME(sgi, syms) \
(sgi ? \
- ((syms) * 18 + 4) / 5 : /* syms * 3.6 us */ \
- (syms) << 2 /* syms * 4 us */ \
+ ((syms) * 18000 + 4000) / 5 : /* syms * 3.6 us */ \
+ ((syms) * 1000) << 2 /* syms * 4 us */ \
)
/* Transmit duration for the raw data part of an average sized packet */
@@ -64,9 +62,9 @@
}
#define CCK_DURATION(_bitrate, _short, _len) \
- (10 /* SIFS */ + \
+ (1000 * (10 /* SIFS */ + \
(_short ? 72 + 24 : 144 + 48 ) + \
- (8 * (_len + 4) * 10) / (_bitrate))
+ (8 * (_len + 4) * 10) / (_bitrate)))
#define CCK_ACK_DURATION(_bitrate, _short) \
(CCK_DURATION((_bitrate > 10 ? 20 : 10), false, 60) + \
@@ -128,14 +126,8 @@ const struct mcs_group minstrel_mcs_groups[] = {
static u8 sample_table[SAMPLE_COLUMNS][MCS_GROUP_RATES];
-/*
- * Perform EWMA (Exponentially Weighted Moving Average) calculation
- */
-static int
-minstrel_ewma(int old, int new, int weight)
-{
- return (new * (100 - weight) + old * weight) / 100;
-}
+static void
+minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi);
/*
* Look up an MCS group index based on mac80211 rate information
@@ -211,20 +203,32 @@ static void
minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate)
{
struct minstrel_rate_stats *mr;
- unsigned int usecs = 0;
+ unsigned int nsecs = 0;
+ unsigned int tp;
+ unsigned int prob;
mr = &mi->groups[group].rates[rate];
+ prob = mr->probability;
- if (mr->probability < MINSTREL_FRAC(1, 10)) {
+ if (prob < MINSTREL_FRAC(1, 10)) {
mr->cur_tp = 0;
return;
}
+ /*
+ * For the throughput calculation, limit the probability value to 90% to
+ * account for collision related packet error rate fluctuation
+ */
+ if (prob > MINSTREL_FRAC(9, 10))
+ prob = MINSTREL_FRAC(9, 10);
+
if (group != MINSTREL_CCK_GROUP)
- usecs = mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len);
+ nsecs = 1000 * mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len);
- usecs += minstrel_mcs_groups[group].duration[rate];
- mr->cur_tp = MINSTREL_TRUNC((1000000 / usecs) * mr->probability);
+ nsecs += minstrel_mcs_groups[group].duration[rate];
+ tp = 1000000 * ((mr->probability * 1000) / nsecs);
+
+ mr->cur_tp = MINSTREL_TRUNC(tp);
}
/*
@@ -243,6 +247,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
struct minstrel_rate_stats *mr;
int cur_prob, cur_prob_tp, cur_tp, cur_tp2;
int group, i, index;
+ bool mi_rates_valid = false;
if (mi->ampdu_packets > 0) {
mi->avg_ampdu_len = minstrel_ewma(mi->avg_ampdu_len,
@@ -253,11 +258,10 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
mi->sample_slow = 0;
mi->sample_count = 0;
- mi->max_tp_rate = 0;
- mi->max_tp_rate2 = 0;
- mi->max_prob_rate = 0;
for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) {
+ bool mg_rates_valid = false;
+
cur_prob = 0;
cur_prob_tp = 0;
cur_tp = 0;
@@ -267,15 +271,24 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
if (!mg->supported)
continue;
- mg->max_tp_rate = 0;
- mg->max_tp_rate2 = 0;
- mg->max_prob_rate = 0;
mi->sample_count++;
for (i = 0; i < MCS_GROUP_RATES; i++) {
if (!(mg->supported & BIT(i)))
continue;
+ /* initialize rates selections starting indexes */
+ if (!mg_rates_valid) {
+ mg->max_tp_rate = mg->max_tp_rate2 =
+ mg->max_prob_rate = i;
+ if (!mi_rates_valid) {
+ mi->max_tp_rate = mi->max_tp_rate2 =
+ mi->max_prob_rate = i;
+ mi_rates_valid = true;
+ }
+ mg_rates_valid = true;
+ }
+
mr = &mg->rates[i];
mr->retry_updated = false;
index = MCS_GROUP_RATES * group + i;
@@ -308,8 +321,8 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
}
}
- /* try to sample up to half of the available rates during each interval */
- mi->sample_count *= 4;
+ /* try to sample all available rates during each interval */
+ mi->sample_count *= 8;
cur_prob = 0;
cur_prob_tp = 0;
@@ -320,20 +333,13 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
if (!mg->supported)
continue;
- mr = minstrel_get_ratestats(mi, mg->max_prob_rate);
- if (cur_prob_tp < mr->cur_tp &&
- minstrel_mcs_groups[group].streams == 1) {
- mi->max_prob_rate = mg->max_prob_rate;
- cur_prob = mr->cur_prob;
- cur_prob_tp = mr->cur_tp;
- }
-
mr = minstrel_get_ratestats(mi, mg->max_tp_rate);
if (cur_tp < mr->cur_tp) {
mi->max_tp_rate2 = mi->max_tp_rate;
cur_tp2 = cur_tp;
mi->max_tp_rate = mg->max_tp_rate;
cur_tp = mr->cur_tp;
+ mi->max_prob_streams = minstrel_mcs_groups[group].streams - 1;
}
mr = minstrel_get_ratestats(mi, mg->max_tp_rate2);
@@ -343,6 +349,23 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
}
}
+ if (mi->max_prob_streams < 1)
+ mi->max_prob_streams = 1;
+
+ for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) {
+ mg = &mi->groups[group];
+ if (!mg->supported)
+ continue;
+ mr = minstrel_get_ratestats(mi, mg->max_prob_rate);
+ if (cur_prob_tp < mr->cur_tp &&
+ minstrel_mcs_groups[group].streams <= mi->max_prob_streams) {
+ mi->max_prob_rate = mg->max_prob_rate;
+ cur_prob = mr->cur_prob;
+ cur_prob_tp = mr->cur_tp;
+ }
+ }
+
+
mi->stats_update = jiffies;
}
@@ -445,7 +468,7 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
struct ieee80211_tx_rate *ar = info->status.rates;
struct minstrel_rate_stats *rate, *rate2;
struct minstrel_priv *mp = priv;
- bool last;
+ bool last, update = false;
int i;
if (!msp->is_ht)
@@ -467,7 +490,7 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
if (!mi->sample_wait && !mi->sample_tries && mi->sample_count > 0) {
mi->sample_wait = 16 + 2 * MINSTREL_TRUNC(mi->avg_ampdu_len);
- mi->sample_tries = 2;
+ mi->sample_tries = 1;
mi->sample_count--;
}
@@ -494,21 +517,29 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
rate = minstrel_get_ratestats(mi, mi->max_tp_rate);
if (rate->attempts > 30 &&
MINSTREL_FRAC(rate->success, rate->attempts) <
- MINSTREL_FRAC(20, 100))
+ MINSTREL_FRAC(20, 100)) {
minstrel_downgrade_rate(mi, &mi->max_tp_rate, true);
+ update = true;
+ }
rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate2);
if (rate2->attempts > 30 &&
MINSTREL_FRAC(rate2->success, rate2->attempts) <
- MINSTREL_FRAC(20, 100))
+ MINSTREL_FRAC(20, 100)) {
minstrel_downgrade_rate(mi, &mi->max_tp_rate2, false);
+ update = true;
+ }
if (time_after(jiffies, mi->stats_update + (mp->update_interval / 2 * HZ) / 1000)) {
+ update = true;
minstrel_ht_update_stats(mp, mi);
if (!(info->flags & IEEE80211_TX_CTL_AMPDU) &&
mi->max_prob_rate / MCS_GROUP_RATES != MINSTREL_CCK_GROUP)
minstrel_aggr_check(sta, skb);
}
+
+ if (update)
+ minstrel_ht_update_rates(mp, mi);
}
static void
@@ -536,7 +567,7 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
mr->retry_updated = true;
group = &minstrel_mcs_groups[index / MCS_GROUP_RATES];
- tx_time_data = group->duration[index % MCS_GROUP_RATES] * ampdu_len;
+ tx_time_data = group->duration[index % MCS_GROUP_RATES] * ampdu_len / 1000;
/* Contention time for first 2 tries */
ctime = (t_slot * cw) >> 1;
@@ -572,36 +603,71 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
static void
minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
- struct ieee80211_tx_rate *rate, int index,
- bool sample, bool rtscts)
+ struct ieee80211_sta_rates *ratetbl, int offset, int index)
{
const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES];
struct minstrel_rate_stats *mr;
+ u8 idx;
+ u16 flags;
mr = minstrel_get_ratestats(mi, index);
if (!mr->retry_updated)
minstrel_calc_retransmit(mp, mi, index);
- if (sample)
- rate->count = 1;
- else if (mr->probability < MINSTREL_FRAC(20, 100))
- rate->count = 2;
- else if (rtscts)
- rate->count = mr->retry_count_rtscts;
- else
- rate->count = mr->retry_count;
-
- rate->flags = 0;
- if (rtscts)
- rate->flags |= IEEE80211_TX_RC_USE_RTS_CTS;
+ if (mr->probability < MINSTREL_FRAC(20, 100) || !mr->retry_count) {
+ ratetbl->rate[offset].count = 2;
+ ratetbl->rate[offset].count_rts = 2;
+ ratetbl->rate[offset].count_cts = 2;
+ } else {
+ ratetbl->rate[offset].count = mr->retry_count;
+ ratetbl->rate[offset].count_cts = mr->retry_count;
+ ratetbl->rate[offset].count_rts = mr->retry_count_rtscts;
+ }
if (index / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) {
- rate->idx = mp->cck_rates[index % ARRAY_SIZE(mp->cck_rates)];
+ idx = mp->cck_rates[index % ARRAY_SIZE(mp->cck_rates)];
+ flags = 0;
+ } else {
+ idx = index % MCS_GROUP_RATES +
+ (group->streams - 1) * MCS_GROUP_RATES;
+ flags = IEEE80211_TX_RC_MCS | group->flags;
+ }
+
+ if (offset > 0) {
+ ratetbl->rate[offset].count = ratetbl->rate[offset].count_rts;
+ flags |= IEEE80211_TX_RC_USE_RTS_CTS;
+ }
+
+ ratetbl->rate[offset].idx = idx;
+ ratetbl->rate[offset].flags = flags;
+}
+
+static void
+minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
+{
+ struct ieee80211_sta_rates *rates;
+ int i = 0;
+
+ rates = kzalloc(sizeof(*rates), GFP_ATOMIC);
+ if (!rates)
return;
+
+ /* Start with max_tp_rate */
+ minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate);
+
+ if (mp->hw->max_rates >= 3) {
+ /* At least 3 tx rates supported, use max_tp_rate2 next */
+ minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate2);
}
- rate->flags |= IEEE80211_TX_RC_MCS | group->flags;
- rate->idx = index % MCS_GROUP_RATES + (group->streams - 1) * MCS_GROUP_RATES;
+ if (mp->hw->max_rates >= 2) {
+ /*
+ * At least 2 tx rates supported, use max_prob_rate next */
+ minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_prob_rate);
+ }
+
+ rates->rate[i].idx = -1;
+ rate_control_set_rates(mp->hw, mi->sta, rates);
}
static inline int
@@ -616,6 +682,7 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
{
struct minstrel_rate_stats *mr;
struct minstrel_mcs_group_data *mg;
+ unsigned int sample_dur, sample_group;
int sample_idx = 0;
if (mi->sample_wait > 0) {
@@ -626,39 +693,46 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
if (!mi->sample_tries)
return -1;
- mi->sample_tries--;
mg = &mi->groups[mi->sample_group];
sample_idx = sample_table[mg->column][mg->index];
mr = &mg->rates[sample_idx];
- sample_idx += mi->sample_group * MCS_GROUP_RATES;
+ sample_group = mi->sample_group;
+ sample_idx += sample_group * MCS_GROUP_RATES;
minstrel_next_sample_idx(mi);
/*
* Sampling might add some overhead (RTS, no aggregation)
* to the frame. Hence, don't use sampling for the currently
- * used max TP rate.
+ * used rates.
*/
- if (sample_idx == mi->max_tp_rate)
+ if (sample_idx == mi->max_tp_rate ||
+ sample_idx == mi->max_tp_rate2 ||
+ sample_idx == mi->max_prob_rate)
return -1;
+
/*
- * When not using MRR, do not sample if the probability is already
- * higher than 95% to avoid wasting airtime
+ * Do not sample if the probability is already higher than 95%
+ * to avoid wasting airtime.
*/
- if (!mp->has_mrr && (mr->probability > MINSTREL_FRAC(95, 100)))
+ if (mr->probability > MINSTREL_FRAC(95, 100))
return -1;
/*
* Make sure that lower rates get sampled only occasionally,
* if the link is working perfectly.
*/
- if (minstrel_get_duration(sample_idx) >
- minstrel_get_duration(mi->max_tp_rate)) {
+ sample_dur = minstrel_get_duration(sample_idx);
+ if (sample_dur >= minstrel_get_duration(mi->max_tp_rate2) &&
+ (mi->max_prob_streams <
+ minstrel_mcs_groups[sample_group].streams ||
+ sample_dur >= minstrel_get_duration(mi->max_prob_rate))) {
if (mr->sample_skipped < 20)
return -1;
if (mi->sample_slow++ > 2)
return -1;
}
+ mi->sample_tries--;
return sample_idx;
}
@@ -683,13 +757,13 @@ static void
minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
struct ieee80211_tx_rate_control *txrc)
{
+ const struct mcs_group *sample_group;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb);
- struct ieee80211_tx_rate *ar = info->status.rates;
+ struct ieee80211_tx_rate *rate = &info->status.rates[0];
struct minstrel_ht_sta_priv *msp = priv_sta;
struct minstrel_ht_sta *mi = &msp->ht;
struct minstrel_priv *mp = priv;
int sample_idx;
- bool sample = false;
if (rate_control_send_low(sta, priv_sta, txrc))
return;
@@ -717,51 +791,6 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
}
#endif
- if (sample_idx >= 0) {
- sample = true;
- minstrel_ht_set_rate(mp, mi, &ar[0], sample_idx,
- true, false);
- info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
- } else {
- minstrel_ht_set_rate(mp, mi, &ar[0], mi->max_tp_rate,
- false, false);
- }
-
- if (mp->hw->max_rates >= 3) {
- /*
- * At least 3 tx rates supported, use
- * sample_rate -> max_tp_rate -> max_prob_rate for sampling and
- * max_tp_rate -> max_tp_rate2 -> max_prob_rate by default.
- */
- if (sample_idx >= 0)
- minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate,
- false, false);
- else
- minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate2,
- false, true);
-
- minstrel_ht_set_rate(mp, mi, &ar[2], mi->max_prob_rate,
- false, !sample);
-
- ar[3].count = 0;
- ar[3].idx = -1;
- } else if (mp->hw->max_rates == 2) {
- /*
- * Only 2 tx rates supported, use
- * sample_rate -> max_prob_rate for sampling and
- * max_tp_rate -> max_prob_rate by default.
- */
- minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_prob_rate,
- false, !sample);
-
- ar[2].count = 0;
- ar[2].idx = -1;
- } else {
- /* Not using MRR, only use the first rate */
- ar[1].count = 0;
- ar[1].idx = -1;
- }
-
mi->total_packets++;
/* wraparound */
@@ -769,6 +798,16 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
mi->total_packets = 0;
mi->sample_packets = 0;
}
+
+ if (sample_idx < 0)
+ return;
+
+ sample_group = &minstrel_mcs_groups[sample_idx / MCS_GROUP_RATES];
+ info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
+ rate->idx = sample_idx % MCS_GROUP_RATES +
+ (sample_group->streams - 1) * MCS_GROUP_RATES;
+ rate->flags = IEEE80211_TX_RC_MCS | sample_group->flags;
+ rate->count = 1;
}
static void
@@ -818,6 +857,8 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
msp->is_ht = true;
memset(mi, 0, sizeof(*mi));
+
+ mi->sta = sta;
mi->stats_update = jiffies;
ack_dur = ieee80211_frame_duration(sband->band, 10, 60, 1, 1);
@@ -879,6 +920,10 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
if (!n_supported)
goto use_legacy;
+ /* create an initial rate table with the lowest supported rates */
+ minstrel_ht_update_stats(mp, mi);
+ minstrel_ht_update_rates(mp, mi);
+
return;
use_legacy:
diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h
index 302dbd52180d..d655586773ac 100644
--- a/net/mac80211/rc80211_minstrel_ht.h
+++ b/net/mac80211/rc80211_minstrel_ht.h
@@ -16,11 +16,6 @@
#define MINSTREL_MAX_STREAMS 3
#define MINSTREL_STREAM_GROUPS 4
-/* scaled fraction values */
-#define MINSTREL_SCALE 16
-#define MINSTREL_FRAC(val, div) (((val) << MINSTREL_SCALE) / div)
-#define MINSTREL_TRUNC(val) ((val) >> MINSTREL_SCALE)
-
#define MCS_GROUP_RATES 8
struct mcs_group {
@@ -70,6 +65,8 @@ struct minstrel_mcs_group_data {
};
struct minstrel_ht_sta {
+ struct ieee80211_sta *sta;
+
/* ampdu length (average, per sampling interval) */
unsigned int ampdu_len;
unsigned int ampdu_packets;
@@ -85,6 +82,7 @@ struct minstrel_ht_sta {
/* best probability rate */
unsigned int max_prob_rate;
+ unsigned int max_prob_streams;
/* time of last status update */
unsigned long stats_update;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index c6844ad080be..8e2952620256 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -648,24 +648,6 @@ static ieee80211_rx_result ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx)
return RX_CONTINUE;
}
-#define SEQ_MODULO 0x1000
-#define SEQ_MASK 0xfff
-
-static inline int seq_less(u16 sq1, u16 sq2)
-{
- return ((sq1 - sq2) & SEQ_MASK) > (SEQ_MODULO >> 1);
-}
-
-static inline u16 seq_inc(u16 sq)
-{
- return (sq + 1) & SEQ_MASK;
-}
-
-static inline u16 seq_sub(u16 sq1, u16 sq2)
-{
- return (sq1 - sq2) & SEQ_MASK;
-}
-
static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata,
struct tid_ampdu_rx *tid_agg_rx,
int index,
@@ -687,7 +669,7 @@ static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata,
__skb_queue_tail(frames, skb);
no_frame:
- tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num);
+ tid_agg_rx->head_seq_num = ieee80211_sn_inc(tid_agg_rx->head_seq_num);
}
static void ieee80211_release_reorder_frames(struct ieee80211_sub_if_data *sdata,
@@ -699,8 +681,9 @@ static void ieee80211_release_reorder_frames(struct ieee80211_sub_if_data *sdata
lockdep_assert_held(&tid_agg_rx->reorder_lock);
- while (seq_less(tid_agg_rx->head_seq_num, head_seq_num)) {
- index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
+ while (ieee80211_sn_less(tid_agg_rx->head_seq_num, head_seq_num)) {
+ index = ieee80211_sn_sub(tid_agg_rx->head_seq_num,
+ tid_agg_rx->ssn) %
tid_agg_rx->buf_size;
ieee80211_release_reorder_frame(sdata, tid_agg_rx, index,
frames);
@@ -727,8 +710,8 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata,
lockdep_assert_held(&tid_agg_rx->reorder_lock);
/* release the buffer until next missing frame */
- index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
- tid_agg_rx->buf_size;
+ index = ieee80211_sn_sub(tid_agg_rx->head_seq_num,
+ tid_agg_rx->ssn) % tid_agg_rx->buf_size;
if (!tid_agg_rx->reorder_buf[index] &&
tid_agg_rx->stored_mpdu_num) {
/*
@@ -756,19 +739,22 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata,
* Increment the head seq# also for the skipped slots.
*/
tid_agg_rx->head_seq_num =
- (tid_agg_rx->head_seq_num + skipped) & SEQ_MASK;
+ (tid_agg_rx->head_seq_num +
+ skipped) & IEEE80211_SN_MASK;
skipped = 0;
}
} else while (tid_agg_rx->reorder_buf[index]) {
ieee80211_release_reorder_frame(sdata, tid_agg_rx, index,
frames);
- index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
+ index = ieee80211_sn_sub(tid_agg_rx->head_seq_num,
+ tid_agg_rx->ssn) %
tid_agg_rx->buf_size;
}
if (tid_agg_rx->stored_mpdu_num) {
- j = index = seq_sub(tid_agg_rx->head_seq_num,
- tid_agg_rx->ssn) % tid_agg_rx->buf_size;
+ j = index = ieee80211_sn_sub(tid_agg_rx->head_seq_num,
+ tid_agg_rx->ssn) %
+ tid_agg_rx->buf_size;
for (; j != (index - 1) % tid_agg_rx->buf_size;
j = (j + 1) % tid_agg_rx->buf_size) {
@@ -809,7 +795,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
head_seq_num = tid_agg_rx->head_seq_num;
/* frame with out of date sequence number */
- if (seq_less(mpdu_seq_num, head_seq_num)) {
+ if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) {
dev_kfree_skb(skb);
goto out;
}
@@ -818,8 +804,9 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
* If frame the sequence number exceeds our buffering window
* size release some previous frames to make room for this one.
*/
- if (!seq_less(mpdu_seq_num, head_seq_num + buf_size)) {
- head_seq_num = seq_inc(seq_sub(mpdu_seq_num, buf_size));
+ if (!ieee80211_sn_less(mpdu_seq_num, head_seq_num + buf_size)) {
+ head_seq_num = ieee80211_sn_inc(
+ ieee80211_sn_sub(mpdu_seq_num, buf_size));
/* release stored frames up to new head to stack */
ieee80211_release_reorder_frames(sdata, tid_agg_rx,
head_seq_num, frames);
@@ -827,7 +814,8 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
/* Now the new frame is always in the range of the reordering buffer */
- index = seq_sub(mpdu_seq_num, tid_agg_rx->ssn) % tid_agg_rx->buf_size;
+ index = ieee80211_sn_sub(mpdu_seq_num,
+ tid_agg_rx->ssn) % tid_agg_rx->buf_size;
/* check if we already stored this frame */
if (tid_agg_rx->reorder_buf[index]) {
@@ -843,7 +831,8 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
*/
if (mpdu_seq_num == tid_agg_rx->head_seq_num &&
tid_agg_rx->stored_mpdu_num == 0) {
- tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num);
+ tid_agg_rx->head_seq_num =
+ ieee80211_sn_inc(tid_agg_rx->head_seq_num);
ret = false;
goto out;
}
@@ -1894,8 +1883,10 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
* 'align' will only take the values 0 or 2 here
* since all frames are required to be aligned
* to 2-byte boundaries when being passed to
- * mac80211. That also explains the __skb_push()
- * below.
+ * mac80211; the code here works just as well if
+ * that isn't true, but mac80211 assumes it can
+ * access fields as 2-byte aligned (e.g. for
+ * compare_ether_addr)
*/
align = ((unsigned long)(skb->data + sizeof(struct ethhdr))) & 3;
if (align) {
@@ -2094,6 +2085,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
}
fwd_hdr = (struct ieee80211_hdr *) fwd_skb->data;
+ fwd_hdr->frame_control &= ~cpu_to_le16(IEEE80211_FCTL_RETRY);
info = IEEE80211_SKB_CB(fwd_skb);
memset(info, 0, sizeof(*info));
info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
@@ -2432,6 +2424,22 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
}
break;
+ case WLAN_CATEGORY_PUBLIC:
+ if (len < IEEE80211_MIN_ACTION_SIZE + 1)
+ goto invalid;
+ if (sdata->vif.type != NL80211_IFTYPE_STATION)
+ break;
+ if (!rx->sta)
+ break;
+ if (!ether_addr_equal(mgmt->bssid, sdata->u.mgd.bssid))
+ break;
+ if (mgmt->u.action.u.ext_chan_switch.action_code !=
+ WLAN_PUB_ACTION_EXT_CHANSW_ANN)
+ break;
+ if (len < offsetof(struct ieee80211_mgmt,
+ u.action.u.ext_chan_switch.variable))
+ goto invalid;
+ goto queue;
case WLAN_CATEGORY_VHT:
if (sdata->vif.type != NL80211_IFTYPE_STATION &&
sdata->vif.type != NL80211_IFTYPE_MESH_POINT &&
@@ -2515,10 +2523,6 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
ieee80211_process_measurement_req(sdata, mgmt, len);
goto handled;
case WLAN_ACTION_SPCT_CHL_SWITCH:
- if (len < (IEEE80211_MIN_ACTION_SIZE +
- sizeof(mgmt->u.action.u.chan_switch)))
- break;
-
if (sdata->vif.type != NL80211_IFTYPE_STATION)
break;
@@ -2552,7 +2556,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
case WLAN_SP_MESH_PEERING_CONFIRM:
if (!ieee80211_vif_is_mesh(&sdata->vif))
goto invalid;
- if (sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE)
+ if (sdata->u.mesh.user_mpm)
/* userspace handles this frame */
break;
goto queue;
@@ -3032,6 +3036,9 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx,
* and location updates. Note that mac80211
* itself never looks at these frames.
*/
+ if (!multicast &&
+ !ether_addr_equal(sdata->vif.addr, hdr->addr1))
+ return 0;
if (ieee80211_is_public_action(hdr, skb->len))
return 1;
if (!ieee80211_is_beacon(hdr->frame_control))
@@ -3051,7 +3058,8 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx,
!ieee80211_is_probe_resp(hdr->frame_control) &&
!ieee80211_is_beacon(hdr->frame_control))
return 0;
- if (!ether_addr_equal(sdata->vif.addr, hdr->addr1))
+ if (!ether_addr_equal(sdata->vif.addr, hdr->addr1) &&
+ !multicast)
status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
break;
default:
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 43a45cf00e06..99b103921a4b 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -98,9 +98,8 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
}
/* save the ERP value so that it is available at association time */
- if (elems->erp_info && elems->erp_info_len >= 1 &&
- (!elems->parse_error ||
- !(bss->valid_data & IEEE80211_BSS_VALID_ERP))) {
+ if (elems->erp_info && (!elems->parse_error ||
+ !(bss->valid_data & IEEE80211_BSS_VALID_ERP))) {
bss->erp_value = elems->erp_info[0];
bss->has_erp_value = true;
if (!elems->parse_error)
@@ -153,7 +152,6 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
u8 *elements;
struct ieee80211_channel *channel;
size_t baselen;
- bool beacon;
struct ieee802_11_elems elems;
if (skb->len < 24 ||
@@ -175,17 +173,15 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
elements = mgmt->u.probe_resp.variable;
baselen = offsetof(struct ieee80211_mgmt, u.probe_resp.variable);
- beacon = false;
} else {
baselen = offsetof(struct ieee80211_mgmt, u.beacon.variable);
elements = mgmt->u.beacon.variable;
- beacon = true;
}
if (baselen > skb->len)
return;
- ieee802_11_parse_elems(elements, skb->len - baselen, &elems);
+ ieee802_11_parse_elems(elements, skb->len - baselen, false, &elems);
channel = ieee80211_get_channel(local->hw.wiphy, rx_status->freq);
@@ -335,7 +331,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local)
ieee80211_offchannel_stop_vifs(local);
/* ensure nullfunc is transmitted before leaving operating channel */
- drv_flush(local, false);
+ ieee80211_flush_queues(local, NULL);
ieee80211_configure_filter(local);
@@ -387,7 +383,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local,
{
int i;
struct ieee80211_sub_if_data *sdata;
- enum ieee80211_band band = local->hw.conf.channel->band;
+ enum ieee80211_band band = local->hw.conf.chandef.chan->band;
u32 tx_flags;
tx_flags = IEEE80211_TX_INTFL_OFFCHAN_TX_OK;
@@ -404,7 +400,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local,
local->scan_req->ssids[i].ssid_len,
local->scan_req->ie, local->scan_req->ie_len,
local->scan_req->rates[band], false,
- tx_flags, local->hw.conf.channel, true);
+ tx_flags, local->hw.conf.chandef.chan, true);
/*
* After sending probe requests, wait for probe responses
@@ -470,7 +466,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
if (local->ops->hw_scan) {
__set_bit(SCAN_HW_SCANNING, &local->scanning);
} else if ((req->n_channels == 1) &&
- (req->channels[0] == local->_oper_channel)) {
+ (req->channels[0] == local->_oper_chandef.chan)) {
/*
* If we are scanning only on the operating channel
* then we do not need to stop normal activities
@@ -671,7 +667,7 @@ static void ieee80211_scan_state_resume(struct ieee80211_local *local,
ieee80211_offchannel_stop_vifs(local);
if (local->ops->flush) {
- drv_flush(local, false);
+ ieee80211_flush_queues(local, NULL);
*next_delay = 0;
} else
*next_delay = HZ / 10;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 238a0cca320e..11216bc13b27 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -342,6 +342,11 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
INIT_WORK(&sta->drv_unblock_wk, sta_unblock);
INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work);
mutex_init(&sta->ampdu_mlme.mtx);
+#ifdef CONFIG_MAC80211_MESH
+ if (ieee80211_vif_is_mesh(&sdata->vif) &&
+ !sdata->u.mesh.user_mpm)
+ init_timer(&sta->plink_timer);
+#endif
memcpy(sta->sta.addr, addr, ETH_ALEN);
sta->local = local;
@@ -551,6 +556,15 @@ static inline void __bss_tim_clear(u8 *tim, u16 id)
tim[id / 8] &= ~(1 << (id % 8));
}
+static inline bool __bss_tim_get(u8 *tim, u16 id)
+{
+ /*
+ * This format has been mandated by the IEEE specifications,
+ * so this line may not be changed to use the test_bit() format.
+ */
+ return tim[id / 8] & (1 << (id % 8));
+}
+
static unsigned long ieee80211_tids_for_ac(int ac)
{
/* If we ever support TIDs > 7, this obviously needs to be adjusted */
@@ -631,6 +645,9 @@ void sta_info_recalc_tim(struct sta_info *sta)
done:
spin_lock_bh(&local->tim_lock);
+ if (indicate_tim == __bss_tim_get(ps->tim, id))
+ goto out_unlock;
+
if (indicate_tim)
__bss_tim_set(ps->tim, id);
else
@@ -642,6 +659,7 @@ void sta_info_recalc_tim(struct sta_info *sta)
local->tim_in_locked_section = false;
}
+out_unlock:
spin_unlock_bh(&local->tim_lock);
}
@@ -765,8 +783,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta)
{
struct ieee80211_local *local;
struct ieee80211_sub_if_data *sdata;
- int ret, i;
- bool have_key = false;
+ int ret;
might_sleep();
@@ -793,19 +810,8 @@ int __must_check __sta_info_destroy(struct sta_info *sta)
list_del_rcu(&sta->list);
- mutex_lock(&local->key_mtx);
- for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
- __ieee80211_key_free(key_mtx_dereference(local, sta->gtk[i]));
- have_key = true;
- }
- if (sta->ptk) {
- __ieee80211_key_free(key_mtx_dereference(local, sta->ptk));
- have_key = true;
- }
- mutex_unlock(&local->key_mtx);
-
- if (!have_key)
- synchronize_net();
+ /* this always calls synchronize_net() */
+ ieee80211_free_sta_keys(local, sta);
sta->dead = true;
@@ -1391,30 +1397,16 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw,
}
EXPORT_SYMBOL(ieee80211_sta_block_awake);
-void ieee80211_sta_eosp_irqsafe(struct ieee80211_sta *pubsta)
+void ieee80211_sta_eosp(struct ieee80211_sta *pubsta)
{
struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
struct ieee80211_local *local = sta->local;
- struct sk_buff *skb;
- struct skb_eosp_msg_data *data;
trace_api_eosp(local, pubsta);
- skb = alloc_skb(0, GFP_ATOMIC);
- if (!skb) {
- /* too bad ... but race is better than loss */
- clear_sta_flag(sta, WLAN_STA_SP);
- return;
- }
-
- data = (void *)skb->cb;
- memcpy(data->sta, pubsta->addr, ETH_ALEN);
- memcpy(data->iface, sta->sdata->vif.addr, ETH_ALEN);
- skb->pkt_type = IEEE80211_EOSP_MSG;
- skb_queue_tail(&local->skb_queue, skb);
- tasklet_schedule(&local->tasklet);
+ clear_sta_flag(sta, WLAN_STA_SP);
}
-EXPORT_SYMBOL(ieee80211_sta_eosp_irqsafe);
+EXPORT_SYMBOL(ieee80211_sta_eosp);
void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta,
u8 tid, bool buffered)
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 4947341a2a82..adc30045f99e 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -281,7 +281,6 @@ struct sta_ampdu_mlme {
* @plink_state: peer link state
* @plink_timeout: timeout of peer link
* @plink_timer: peer link watch timer
- * @plink_timer_was_running: used by suspend/resume to restore timers
* @t_offset: timing offset relative to this host
* @t_offset_setpoint: reference timing offset of this sta to be used when
* calculating clockdrift
@@ -334,7 +333,8 @@ struct sta_info {
unsigned long driver_buffered_tids;
/* Updated from RX path only, no locking requirements */
- unsigned long rx_packets, rx_bytes;
+ unsigned long rx_packets;
+ u64 rx_bytes;
unsigned long wep_weak_iv_count;
unsigned long last_rx;
long last_connected;
@@ -354,9 +354,9 @@ struct sta_info {
unsigned int fail_avg;
/* Updated from TX path only, no locking requirements */
- unsigned long tx_packets;
- unsigned long tx_bytes;
- unsigned long tx_fragments;
+ u32 tx_fragments;
+ u64 tx_packets[IEEE80211_NUM_ACS];
+ u64 tx_bytes[IEEE80211_NUM_ACS];
struct ieee80211_tx_rate last_tx_rate;
int last_rx_rate_idx;
u32 last_rx_rate_flag;
@@ -379,7 +379,6 @@ struct sta_info {
__le16 reason;
u8 plink_retries;
bool ignore_plink_timer;
- bool plink_timer_was_running;
enum nl80211_plink_state plink_state;
u32 plink_timeout;
struct timer_list plink_timer;
diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index 3ed801d90f1e..124b1fdc20d0 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c
@@ -208,10 +208,10 @@ void ieee80211_get_tkip_p2k(struct ieee80211_key_conf *keyconf,
u32 iv32 = get_unaligned_le32(&data[4]);
u16 iv16 = data[2] | (data[0] << 8);
- spin_lock_bh(&key->u.tkip.txlock);
+ spin_lock(&key->u.tkip.txlock);
ieee80211_compute_tkip_p1k(key, iv32);
tkip_mixing_phase2(tk, ctx, iv16, p2k);
- spin_unlock_bh(&key->u.tkip.txlock);
+ spin_unlock(&key->u.tkip.txlock);
}
EXPORT_SYMBOL(ieee80211_get_tkip_p2k);
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 3d7cd2a0582f..c215fafd7a2f 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -28,27 +28,27 @@
#define VIF_PR_FMT " vif:%s(%d%s)"
#define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : ""
-#define CHANDEF_ENTRY __field(u32, control_freq) \
- __field(u32, chan_width) \
- __field(u32, center_freq1) \
+#define CHANDEF_ENTRY __field(u32, control_freq) \
+ __field(u32, chan_width) \
+ __field(u32, center_freq1) \
__field(u32, center_freq2)
-#define CHANDEF_ASSIGN(c) \
- __entry->control_freq = (c)->chan->center_freq; \
- __entry->chan_width = (c)->width; \
- __entry->center_freq1 = (c)->center_freq1; \
+#define CHANDEF_ASSIGN(c) \
+ __entry->control_freq = (c)->chan ? (c)->chan->center_freq : 0; \
+ __entry->chan_width = (c)->width; \
+ __entry->center_freq1 = (c)->center_freq1; \
__entry->center_freq2 = (c)->center_freq2;
#define CHANDEF_PR_FMT " control:%d MHz width:%d center: %d/%d MHz"
-#define CHANDEF_PR_ARG __entry->control_freq, __entry->chan_width, \
+#define CHANDEF_PR_ARG __entry->control_freq, __entry->chan_width, \
__entry->center_freq1, __entry->center_freq2
-#define CHANCTX_ENTRY CHANDEF_ENTRY \
- __field(u8, rx_chains_static) \
+#define CHANCTX_ENTRY CHANDEF_ENTRY \
+ __field(u8, rx_chains_static) \
__field(u8, rx_chains_dynamic)
-#define CHANCTX_ASSIGN CHANDEF_ASSIGN(&ctx->conf.def) \
- __entry->rx_chains_static = ctx->conf.rx_chains_static; \
+#define CHANCTX_ASSIGN CHANDEF_ASSIGN(&ctx->conf.def) \
+ __entry->rx_chains_static = ctx->conf.rx_chains_static; \
__entry->rx_chains_dynamic = ctx->conf.rx_chains_dynamic
#define CHANCTX_PR_FMT CHANDEF_PR_FMT " chains:%d/%d"
-#define CHANCTX_PR_ARG CHANDEF_PR_ARG, \
+#define CHANCTX_PR_ARG CHANDEF_PR_ARG, \
__entry->rx_chains_static, __entry->rx_chains_dynamic
@@ -286,8 +286,7 @@ TRACE_EVENT(drv_config,
__field(u16, listen_interval)
__field(u8, long_frame_max_tx_count)
__field(u8, short_frame_max_tx_count)
- __field(int, center_freq)
- __field(int, channel_type)
+ CHANDEF_ENTRY
__field(int, smps)
),
@@ -303,15 +302,13 @@ TRACE_EVENT(drv_config,
local->hw.conf.long_frame_max_tx_count;
__entry->short_frame_max_tx_count =
local->hw.conf.short_frame_max_tx_count;
- __entry->center_freq = local->hw.conf.channel ?
- local->hw.conf.channel->center_freq : 0;
- __entry->channel_type = local->hw.conf.channel_type;
+ CHANDEF_ASSIGN(&local->hw.conf.chandef)
__entry->smps = local->hw.conf.smps_mode;
),
TP_printk(
- LOCAL_PR_FMT " ch:%#x freq:%d",
- LOCAL_PR_ARG, __entry->changed, __entry->center_freq
+ LOCAL_PR_FMT " ch:%#x" CHANDEF_PR_FMT,
+ LOCAL_PR_ARG, __entry->changed, CHANDEF_PR_ARG
)
);
@@ -359,8 +356,7 @@ TRACE_EVENT(drv_bss_info_changed,
__dynamic_array(u8, ssid, info->ssid_len);
__field(bool, hidden_ssid);
__field(int, txpower)
- __field(u8, p2p_ctwindow)
- __field(bool, p2p_oppps)
+ __field(u8, p2p_oppps_ctwindow)
),
TP_fast_assign(
@@ -400,8 +396,7 @@ TRACE_EVENT(drv_bss_info_changed,
memcpy(__get_dynamic_array(ssid), info->ssid, info->ssid_len);
__entry->hidden_ssid = info->hidden_ssid;
__entry->txpower = info->txpower;
- __entry->p2p_ctwindow = info->p2p_ctwindow;
- __entry->p2p_oppps = info->p2p_oppps;
+ __entry->p2p_oppps_ctwindow = info->p2p_noa_attr.oppps_ctwindow;
),
TP_printk(
@@ -431,6 +426,30 @@ TRACE_EVENT(drv_prepare_multicast,
)
);
+TRACE_EVENT(drv_set_multicast_list,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata, int mc_count),
+
+ TP_ARGS(local, sdata, mc_count),
+
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ __field(bool, allmulti)
+ __field(int, mc_count)
+ ),
+
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ __entry->allmulti = sdata->flags & IEEE80211_SDATA_ALLMULTI;
+ __entry->mc_count = mc_count;
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT " configure mc filter, count=%d, allmulti=%d",
+ LOCAL_PR_ARG, __entry->mc_count, __entry->allmulti
+ )
+);
+
TRACE_EVENT(drv_configure_filter,
TP_PROTO(struct ieee80211_local *local,
unsigned int changed_flags,
@@ -940,23 +959,26 @@ TRACE_EVENT(drv_get_survey,
);
TRACE_EVENT(drv_flush,
- TP_PROTO(struct ieee80211_local *local, bool drop),
+ TP_PROTO(struct ieee80211_local *local,
+ u32 queues, bool drop),
- TP_ARGS(local, drop),
+ TP_ARGS(local, queues, drop),
TP_STRUCT__entry(
LOCAL_ENTRY
__field(bool, drop)
+ __field(u32, queues)
),
TP_fast_assign(
LOCAL_ASSIGN;
__entry->drop = drop;
+ __entry->queues = queues;
),
TP_printk(
- LOCAL_PR_FMT " drop:%d",
- LOCAL_PR_ARG, __entry->drop
+ LOCAL_PR_FMT " queues:0x%x drop:%d",
+ LOCAL_PR_ARG, __entry->queues, __entry->drop
)
);
@@ -968,23 +990,23 @@ TRACE_EVENT(drv_channel_switch,
TP_STRUCT__entry(
LOCAL_ENTRY
+ CHANDEF_ENTRY
__field(u64, timestamp)
__field(bool, block_tx)
- __field(u16, freq)
__field(u8, count)
),
TP_fast_assign(
LOCAL_ASSIGN;
+ CHANDEF_ASSIGN(&ch_switch->chandef)
__entry->timestamp = ch_switch->timestamp;
__entry->block_tx = ch_switch->block_tx;
- __entry->freq = ch_switch->channel->center_freq;
__entry->count = ch_switch->count;
),
TP_printk(
- LOCAL_PR_FMT " new freq:%u count:%d",
- LOCAL_PR_ARG, __entry->freq, __entry->count
+ LOCAL_PR_FMT " new " CHANDEF_PR_FMT " count:%d",
+ LOCAL_PR_ARG, CHANDEF_PR_ARG, __entry->count
)
);
@@ -1042,15 +1064,17 @@ TRACE_EVENT(drv_remain_on_channel,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct ieee80211_channel *chan,
- unsigned int duration),
+ unsigned int duration,
+ enum ieee80211_roc_type type),
- TP_ARGS(local, sdata, chan, duration),
+ TP_ARGS(local, sdata, chan, duration, type),
TP_STRUCT__entry(
LOCAL_ENTRY
VIF_ENTRY
__field(int, center_freq)
__field(unsigned int, duration)
+ __field(u32, type)
),
TP_fast_assign(
@@ -1058,12 +1082,13 @@ TRACE_EVENT(drv_remain_on_channel,
VIF_ASSIGN;
__entry->center_freq = chan->center_freq;
__entry->duration = duration;
+ __entry->type = type;
),
TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT " freq:%dMHz duration:%dms",
+ LOCAL_PR_FMT VIF_PR_FMT " freq:%dMHz duration:%dms type=%d",
LOCAL_PR_ARG, VIF_PR_ARG,
- __entry->center_freq, __entry->duration
+ __entry->center_freq, __entry->duration, __entry->type
)
);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 8914d2d2881a..9972e07a2f96 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -48,15 +48,15 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
/* assume HW handles this */
- if (info->control.rates[0].flags & IEEE80211_TX_RC_MCS)
+ if (tx->rate.flags & IEEE80211_TX_RC_MCS)
return 0;
/* uh huh? */
- if (WARN_ON_ONCE(info->control.rates[0].idx < 0))
+ if (WARN_ON_ONCE(tx->rate.idx < 0))
return 0;
sband = local->hw.wiphy->bands[info->band];
- txrate = &sband->bitrates[info->control.rates[0].idx];
+ txrate = &sband->bitrates[tx->rate.idx];
erp = txrate->flags & IEEE80211_RATE_ERP_G;
@@ -233,6 +233,7 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx)
if (local->hw.conf.flags & IEEE80211_CONF_PS) {
ieee80211_stop_queues_by_reason(&local->hw,
+ IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_PS);
ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED;
ieee80211_queue_work(&local->hw,
@@ -616,11 +617,9 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
struct ieee80211_hdr *hdr = (void *)tx->skb->data;
struct ieee80211_supported_band *sband;
- struct ieee80211_rate *rate;
- int i;
u32 len;
- bool inval = false, rts = false, short_preamble = false;
struct ieee80211_tx_rate_control txrc;
+ struct ieee80211_sta_rates *ratetbl = NULL;
bool assoc = false;
memset(&txrc, 0, sizeof(txrc));
@@ -641,18 +640,23 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
txrc.max_rate_idx = -1;
else
txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1;
- memcpy(txrc.rate_idx_mcs_mask,
- tx->sdata->rc_rateidx_mcs_mask[info->band],
- sizeof(txrc.rate_idx_mcs_mask));
+
+ if (tx->sdata->rc_has_mcs_mask[info->band])
+ txrc.rate_idx_mcs_mask =
+ tx->sdata->rc_rateidx_mcs_mask[info->band];
+
txrc.bss = (tx->sdata->vif.type == NL80211_IFTYPE_AP ||
tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT ||
tx->sdata->vif.type == NL80211_IFTYPE_ADHOC);
/* set up RTS protection if desired */
if (len > tx->local->hw.wiphy->rts_threshold) {
- txrc.rts = rts = true;
+ txrc.rts = true;
}
+ info->control.use_rts = txrc.rts;
+ info->control.use_cts_prot = tx->sdata->vif.bss_conf.use_cts_prot;
+
/*
* Use short preamble if the BSS can handle it, but not for
* management frames unless we know the receiver can handle
@@ -662,7 +666,9 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
if (tx->sdata->vif.bss_conf.use_short_preamble &&
(ieee80211_is_data(hdr->frame_control) ||
(tx->sta && test_sta_flag(tx->sta, WLAN_STA_SHORT_PREAMBLE))))
- txrc.short_preamble = short_preamble = true;
+ txrc.short_preamble = true;
+
+ info->control.short_preamble = txrc.short_preamble;
if (tx->sta)
assoc = test_sta_flag(tx->sta, WLAN_STA_ASSOC);
@@ -686,16 +692,38 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
*/
rate_control_get_rate(tx->sdata, tx->sta, &txrc);
- if (unlikely(info->control.rates[0].idx < 0))
- return TX_DROP;
+ if (tx->sta && !info->control.skip_table)
+ ratetbl = rcu_dereference(tx->sta->sta.rates);
+
+ if (unlikely(info->control.rates[0].idx < 0)) {
+ if (ratetbl) {
+ struct ieee80211_tx_rate rate = {
+ .idx = ratetbl->rate[0].idx,
+ .flags = ratetbl->rate[0].flags,
+ .count = ratetbl->rate[0].count
+ };
+
+ if (ratetbl->rate[0].idx < 0)
+ return TX_DROP;
+
+ tx->rate = rate;
+ } else {
+ return TX_DROP;
+ }
+ } else {
+ tx->rate = info->control.rates[0];
+ }
if (txrc.reported_rate.idx < 0) {
- txrc.reported_rate = info->control.rates[0];
+ txrc.reported_rate = tx->rate;
if (tx->sta && ieee80211_is_data(hdr->frame_control))
tx->sta->last_tx_rate = txrc.reported_rate;
} else if (tx->sta)
tx->sta->last_tx_rate = txrc.reported_rate;
+ if (ratetbl)
+ return TX_CONTINUE;
+
if (unlikely(!info->control.rates[0].count))
info->control.rates[0].count = 1;
@@ -703,91 +731,6 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
(info->flags & IEEE80211_TX_CTL_NO_ACK)))
info->control.rates[0].count = 1;
- if (is_multicast_ether_addr(hdr->addr1)) {
- /*
- * XXX: verify the rate is in the basic rateset
- */
- return TX_CONTINUE;
- }
-
- /*
- * set up the RTS/CTS rate as the fastest basic rate
- * that is not faster than the data rate
- *
- * XXX: Should this check all retry rates?
- */
- if (!(info->control.rates[0].flags & IEEE80211_TX_RC_MCS)) {
- s8 baserate = 0;
-
- rate = &sband->bitrates[info->control.rates[0].idx];
-
- for (i = 0; i < sband->n_bitrates; i++) {
- /* must be a basic rate */
- if (!(tx->sdata->vif.bss_conf.basic_rates & BIT(i)))
- continue;
- /* must not be faster than the data rate */
- if (sband->bitrates[i].bitrate > rate->bitrate)
- continue;
- /* maximum */
- if (sband->bitrates[baserate].bitrate <
- sband->bitrates[i].bitrate)
- baserate = i;
- }
-
- info->control.rts_cts_rate_idx = baserate;
- }
-
- for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
- /*
- * make sure there's no valid rate following
- * an invalid one, just in case drivers don't
- * take the API seriously to stop at -1.
- */
- if (inval) {
- info->control.rates[i].idx = -1;
- continue;
- }
- if (info->control.rates[i].idx < 0) {
- inval = true;
- continue;
- }
-
- /*
- * For now assume MCS is already set up correctly, this
- * needs to be fixed.
- */
- if (info->control.rates[i].flags & IEEE80211_TX_RC_MCS) {
- WARN_ON(info->control.rates[i].idx > 76);
- continue;
- }
-
- /* set up RTS protection if desired */
- if (rts)
- info->control.rates[i].flags |=
- IEEE80211_TX_RC_USE_RTS_CTS;
-
- /* RC is busted */
- if (WARN_ON_ONCE(info->control.rates[i].idx >=
- sband->n_bitrates)) {
- info->control.rates[i].idx = -1;
- continue;
- }
-
- rate = &sband->bitrates[info->control.rates[i].idx];
-
- /* set up short preamble */
- if (short_preamble &&
- rate->flags & IEEE80211_RATE_SHORT_PREAMBLE)
- info->control.rates[i].flags |=
- IEEE80211_TX_RC_USE_SHORT_PREAMBLE;
-
- /* set up G protection */
- if (!rts && tx->sdata->vif.bss_conf.use_cts_prot &&
- rate->flags & IEEE80211_RATE_ERP_G)
- info->control.rates[i].flags |=
- IEEE80211_TX_RC_USE_CTS_PROTECT;
- }
-
return TX_CONTINUE;
}
@@ -991,15 +934,18 @@ static ieee80211_tx_result debug_noinline
ieee80211_tx_h_stats(struct ieee80211_tx_data *tx)
{
struct sk_buff *skb;
+ int ac = -1;
if (!tx->sta)
return TX_CONTINUE;
- tx->sta->tx_packets++;
skb_queue_walk(&tx->skbs, skb) {
+ ac = skb_get_queue_mapping(skb);
tx->sta->tx_fragments++;
- tx->sta->tx_bytes += skb->len;
+ tx->sta->tx_bytes[ac] += skb->len;
}
+ if (ac >= 0)
+ tx->sta->tx_packets[ac]++;
return TX_CONTINUE;
}
@@ -1705,7 +1651,7 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
if (chanctx_conf)
chan = chanctx_conf->def.chan;
else if (!local->use_chanctx)
- chan = local->_oper_channel;
+ chan = local->_oper_chandef.chan;
else
goto fail_rcu;
@@ -1839,7 +1785,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
* This is the exception! WDS style interfaces are prohibited
* when channel contexts are in used so this must be valid
*/
- band = local->hw.conf.channel->band;
+ band = local->hw.conf.chandef.chan->band;
break;
#ifdef CONFIG_MAC80211_MESH
case NL80211_IFTYPE_MESH_POINT:
@@ -2085,7 +2031,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
encaps_data = bridge_tunnel_header;
encaps_len = sizeof(bridge_tunnel_header);
skip_header_bytes -= 2;
- } else if (ethertype >= 0x600) {
+ } else if (ethertype >= ETH_P_802_3_MIN) {
encaps_data = rfc1042_header;
encaps_len = sizeof(rfc1042_header);
skip_header_bytes -= 2;
@@ -2438,14 +2384,17 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
} else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
struct ieee80211_hdr *hdr;
- struct sk_buff *presp = rcu_dereference(ifibss->presp);
+ struct beacon_data *presp = rcu_dereference(ifibss->presp);
if (!presp)
goto out;
- skb = skb_copy(presp, GFP_ATOMIC);
+ skb = dev_alloc_skb(local->tx_headroom + presp->head_len);
if (!skb)
goto out;
+ skb_reserve(skb, local->tx_headroom);
+ memcpy(skb_put(skb, presp->head_len), presp->head,
+ presp->head_len);
hdr = (struct ieee80211_hdr *) skb->data;
hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
@@ -2495,8 +2444,6 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
txrc.max_rate_idx = -1;
else
txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1;
- memcpy(txrc.rate_idx_mcs_mask, sdata->rc_rateidx_mcs_mask[band],
- sizeof(txrc.rate_idx_mcs_mask));
txrc.bss = true;
rate_control_get_rate(sdata, NULL, &txrc);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 0f38f43ac62e..72e6292955bb 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -453,7 +453,8 @@ void ieee80211_add_pending_skbs_fn(struct ieee80211_local *local,
}
void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
- enum queue_stop_reason reason)
+ unsigned long queues,
+ enum queue_stop_reason reason)
{
struct ieee80211_local *local = hw_to_local(hw);
unsigned long flags;
@@ -461,7 +462,7 @@ void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
- for (i = 0; i < hw->queues; i++)
+ for_each_set_bit(i, &queues, hw->queues)
__ieee80211_stop_queue(hw, i, reason);
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
@@ -469,7 +470,7 @@ void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
void ieee80211_stop_queues(struct ieee80211_hw *hw)
{
- ieee80211_stop_queues_by_reason(hw,
+ ieee80211_stop_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_DRIVER);
}
EXPORT_SYMBOL(ieee80211_stop_queues);
@@ -484,13 +485,15 @@ int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue)
return true;
spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
- ret = !!local->queue_stop_reasons[queue];
+ ret = test_bit(IEEE80211_QUEUE_STOP_REASON_DRIVER,
+ &local->queue_stop_reasons[queue]);
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
return ret;
}
EXPORT_SYMBOL(ieee80211_queue_stopped);
void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
+ unsigned long queues,
enum queue_stop_reason reason)
{
struct ieee80211_local *local = hw_to_local(hw);
@@ -499,7 +502,7 @@ void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
- for (i = 0; i < hw->queues; i++)
+ for_each_set_bit(i, &queues, hw->queues)
__ieee80211_wake_queue(hw, i, reason);
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
@@ -507,10 +510,42 @@ void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
void ieee80211_wake_queues(struct ieee80211_hw *hw)
{
- ieee80211_wake_queues_by_reason(hw, IEEE80211_QUEUE_STOP_REASON_DRIVER);
+ ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
+ IEEE80211_QUEUE_STOP_REASON_DRIVER);
}
EXPORT_SYMBOL(ieee80211_wake_queues);
+void ieee80211_flush_queues(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata)
+{
+ u32 queues;
+
+ if (!local->ops->flush)
+ return;
+
+ if (sdata && local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) {
+ int ac;
+
+ queues = 0;
+
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
+ queues |= BIT(sdata->vif.hw_queue[ac]);
+ if (sdata->vif.cab_queue != IEEE80211_INVAL_HW_QUEUE)
+ queues |= BIT(sdata->vif.cab_queue);
+ } else {
+ /* all queues */
+ queues = BIT(local->hw.queues) - 1;
+ }
+
+ ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
+ IEEE80211_QUEUE_STOP_REASON_FLUSH);
+
+ drv_flush(local, queues, false);
+
+ ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
+ IEEE80211_QUEUE_STOP_REASON_FLUSH);
+}
+
void ieee80211_iterate_active_interfaces(
struct ieee80211_hw *hw, u32 iter_flags,
void (*iterator)(void *data, u8 *mac,
@@ -626,14 +661,15 @@ void ieee80211_queue_delayed_work(struct ieee80211_hw *hw,
}
EXPORT_SYMBOL(ieee80211_queue_delayed_work);
-u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
+u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
struct ieee802_11_elems *elems,
u64 filter, u32 crc)
{
size_t left = len;
- u8 *pos = start;
+ const u8 *pos = start;
bool calc_crc = filter != 0;
DECLARE_BITMAP(seen_elems, 256);
+ const u8 *ie;
bitmap_zero(seen_elems, 256);
memset(elems, 0, sizeof(*elems));
@@ -681,6 +717,12 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
case WLAN_EID_COUNTRY:
case WLAN_EID_PWR_CONSTRAINT:
case WLAN_EID_TIMEOUT_INTERVAL:
+ case WLAN_EID_SECONDARY_CHANNEL_OFFSET:
+ case WLAN_EID_WIDE_BW_CHANNEL_SWITCH:
+ /*
+ * not listing WLAN_EID_CHANNEL_SWITCH_WRAPPER -- it seems possible
+ * that if the content gets bigger it might be needed more than once
+ */
if (test_bit(id, seen_elems)) {
elems->parse_error = true;
left -= elen;
@@ -704,17 +746,11 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
elems->supp_rates = pos;
elems->supp_rates_len = elen;
break;
- case WLAN_EID_FH_PARAMS:
- elems->fh_params = pos;
- elems->fh_params_len = elen;
- break;
case WLAN_EID_DS_PARAMS:
- elems->ds_params = pos;
- elems->ds_params_len = elen;
- break;
- case WLAN_EID_CF_PARAMS:
- elems->cf_params = pos;
- elems->cf_params_len = elen;
+ if (elen >= 1)
+ elems->ds_params = pos;
+ else
+ elem_parse_failed = true;
break;
case WLAN_EID_TIM:
if (elen >= sizeof(struct ieee80211_tim_ie)) {
@@ -723,10 +759,6 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
} else
elem_parse_failed = true;
break;
- case WLAN_EID_IBSS_PARAMS:
- elems->ibss_params = pos;
- elems->ibss_params_len = elen;
- break;
case WLAN_EID_CHALLENGE:
elems->challenge = pos;
elems->challenge_len = elen;
@@ -756,8 +788,10 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
elems->rsn_len = elen;
break;
case WLAN_EID_ERP_INFO:
- elems->erp_info = pos;
- elems->erp_info_len = elen;
+ if (elen >= 1)
+ elems->erp_info = pos;
+ else
+ elem_parse_failed = true;
break;
case WLAN_EID_EXT_SUPP_RATES:
elems->ext_supp_rates = pos;
@@ -836,12 +870,47 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
}
elems->ch_switch_ie = (void *)pos;
break;
- case WLAN_EID_QUIET:
- if (!elems->quiet_elem) {
- elems->quiet_elem = pos;
- elems->quiet_elem_len = elen;
+ case WLAN_EID_EXT_CHANSWITCH_ANN:
+ if (elen != sizeof(struct ieee80211_ext_chansw_ie)) {
+ elem_parse_failed = true;
+ break;
+ }
+ elems->ext_chansw_ie = (void *)pos;
+ break;
+ case WLAN_EID_SECONDARY_CHANNEL_OFFSET:
+ if (elen != sizeof(struct ieee80211_sec_chan_offs_ie)) {
+ elem_parse_failed = true;
+ break;
+ }
+ elems->sec_chan_offs = (void *)pos;
+ break;
+ case WLAN_EID_WIDE_BW_CHANNEL_SWITCH:
+ if (!action ||
+ elen != sizeof(*elems->wide_bw_chansw_ie)) {
+ elem_parse_failed = true;
+ break;
+ }
+ elems->wide_bw_chansw_ie = (void *)pos;
+ break;
+ case WLAN_EID_CHANNEL_SWITCH_WRAPPER:
+ if (action) {
+ elem_parse_failed = true;
+ break;
+ }
+ /*
+ * This is a bit tricky, but as we only care about
+ * the wide bandwidth channel switch element, so
+ * just parse it out manually.
+ */
+ ie = cfg80211_find_ie(WLAN_EID_WIDE_BW_CHANNEL_SWITCH,
+ pos, elen);
+ if (ie) {
+ if (ie[1] == sizeof(*elems->wide_bw_chansw_ie))
+ elems->wide_bw_chansw_ie =
+ (void *)(ie + 2);
+ else
+ elem_parse_failed = true;
}
- elems->num_of_quiet_elem++;
break;
case WLAN_EID_COUNTRY:
elems->country_elem = pos;
@@ -855,8 +924,10 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
elems->pwr_constr_elem = pos;
break;
case WLAN_EID_TIMEOUT_INTERVAL:
- elems->timeout_int = pos;
- elems->timeout_int_len = elen;
+ if (elen >= sizeof(struct ieee80211_timeout_interval_ie))
+ elems->timeout_int = (void *)pos;
+ else
+ elem_parse_failed = true;
break;
default:
break;
@@ -877,12 +948,6 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
return crc;
}
-void ieee802_11_parse_elems(u8 *start, size_t len,
- struct ieee802_11_elems *elems)
-{
- ieee802_11_parse_elems_crc(start, len, elems, 0, 0);
-}
-
void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
bool bss_notify)
{
@@ -1357,6 +1422,25 @@ void ieee80211_stop_device(struct ieee80211_local *local)
drv_stop(local);
}
+static void ieee80211_assign_chanctx(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_chanctx_conf *conf;
+ struct ieee80211_chanctx *ctx;
+
+ if (!local->use_chanctx)
+ return;
+
+ mutex_lock(&local->chanctx_mtx);
+ conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+ lockdep_is_held(&local->chanctx_mtx));
+ if (conf) {
+ ctx = container_of(conf, struct ieee80211_chanctx, conf);
+ drv_assign_vif_chanctx(local, sdata, ctx);
+ }
+ mutex_unlock(&local->chanctx_mtx);
+}
+
int ieee80211_reconfig(struct ieee80211_local *local)
{
struct ieee80211_hw *hw = &local->hw;
@@ -1421,6 +1505,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
/* add interfaces */
sdata = rtnl_dereference(local->monitor_sdata);
if (sdata) {
+ /* in HW restart it exists already */
+ WARN_ON(local->resuming);
res = drv_add_interface(local, sdata);
if (WARN_ON(res)) {
rcu_assign_pointer(local->monitor_sdata, NULL);
@@ -1445,36 +1531,14 @@ int ieee80211_reconfig(struct ieee80211_local *local)
}
list_for_each_entry(sdata, &local->interfaces, list) {
- struct ieee80211_chanctx_conf *ctx_conf;
-
if (!ieee80211_sdata_running(sdata))
continue;
-
- mutex_lock(&local->chanctx_mtx);
- ctx_conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
- lockdep_is_held(&local->chanctx_mtx));
- if (ctx_conf) {
- ctx = container_of(ctx_conf, struct ieee80211_chanctx,
- conf);
- drv_assign_vif_chanctx(local, sdata, ctx);
- }
- mutex_unlock(&local->chanctx_mtx);
+ ieee80211_assign_chanctx(local, sdata);
}
sdata = rtnl_dereference(local->monitor_sdata);
- if (sdata && local->use_chanctx && ieee80211_sdata_running(sdata)) {
- struct ieee80211_chanctx_conf *ctx_conf;
-
- mutex_lock(&local->chanctx_mtx);
- ctx_conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
- lockdep_is_held(&local->chanctx_mtx));
- if (ctx_conf) {
- ctx = container_of(ctx_conf, struct ieee80211_chanctx,
- conf);
- drv_assign_vif_chanctx(local, sdata, ctx);
- }
- mutex_unlock(&local->chanctx_mtx);
- }
+ if (sdata && ieee80211_sdata_running(sdata))
+ ieee80211_assign_chanctx(local, sdata);
/* add STAs back */
mutex_lock(&local->sta_mtx);
@@ -1534,11 +1598,6 @@ int ieee80211_reconfig(struct ieee80211_local *local)
BSS_CHANGED_IDLE |
BSS_CHANGED_TXPOWER;
-#ifdef CONFIG_PM
- if (local->resuming && !reconfig_due_to_wowlan)
- sdata->vif.bss_conf = sdata->suspend_bss_conf;
-#endif
-
switch (sdata->vif.type) {
case NL80211_IFTYPE_STATION:
changed |= BSS_CHANGED_ASSOC |
@@ -1637,6 +1696,9 @@ int ieee80211_reconfig(struct ieee80211_local *local)
local->in_reconfig = false;
barrier();
+ if (local->monitors == local->open_count && local->monitors > 0)
+ ieee80211_add_virtual_monitor(local);
+
/*
* Clear the WLAN_STA_BLOCK_BA flag so new aggregation
* sessions can be established after a resume.
@@ -1659,8 +1721,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
mutex_unlock(&local->sta_mtx);
}
- ieee80211_wake_queues_by_reason(hw,
- IEEE80211_QUEUE_STOP_REASON_SUSPEND);
+ ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
+ IEEE80211_QUEUE_STOP_REASON_SUSPEND);
/*
* If this is for hw restart things are still running.
@@ -1679,27 +1741,13 @@ int ieee80211_reconfig(struct ieee80211_local *local)
local->resuming = false;
list_for_each_entry(sdata, &local->interfaces, list) {
- switch(sdata->vif.type) {
- case NL80211_IFTYPE_STATION:
+ if (!ieee80211_sdata_running(sdata))
+ continue;
+ if (sdata->vif.type == NL80211_IFTYPE_STATION)
ieee80211_sta_restart(sdata);
- break;
- case NL80211_IFTYPE_ADHOC:
- ieee80211_ibss_restart(sdata);
- break;
- case NL80211_IFTYPE_MESH_POINT:
- ieee80211_mesh_restart(sdata);
- break;
- default:
- break;
- }
}
mod_timer(&local->sta_cleanup, jiffies + 1);
-
- mutex_lock(&local->sta_mtx);
- list_for_each_entry(sta, &local->sta_list, list)
- mesh_plink_restart(sta);
- mutex_unlock(&local->sta_mtx);
#else
WARN_ON(1);
#endif
@@ -2051,7 +2099,7 @@ int ieee80211_ave_rssi(struct ieee80211_vif *vif)
/* non-managed type inferfaces */
return 0;
}
- return ifmgd->ave_beacon_signal;
+ return ifmgd->ave_beacon_signal / 16;
}
EXPORT_SYMBOL_GPL(ieee80211_ave_rssi);
@@ -2166,8 +2214,7 @@ void ieee80211_dfs_radar_detected_work(struct work_struct *work)
/* currently not handled */
WARN_ON(1);
else {
- cfg80211_chandef_create(&chandef, local->hw.conf.channel,
- local->hw.conf.channel_type);
+ chandef = local->hw.conf.chandef;
cfg80211_radar_event(local->hw.wiphy, &chandef, GFP_KERNEL);
}
}
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index a2c2258bc84e..171344d4eb7c 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -13,6 +13,104 @@
#include "rate.h"
+static void __check_vhtcap_disable(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_sta_vht_cap *vht_cap,
+ u32 flag)
+{
+ __le32 le_flag = cpu_to_le32(flag);
+
+ if (sdata->u.mgd.vht_capa_mask.vht_cap_info & le_flag &&
+ !(sdata->u.mgd.vht_capa.vht_cap_info & le_flag))
+ vht_cap->cap &= ~flag;
+}
+
+void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_sta_vht_cap *vht_cap)
+{
+ int i;
+ u16 rxmcs_mask, rxmcs_cap, rxmcs_n, txmcs_mask, txmcs_cap, txmcs_n;
+
+ if (!vht_cap->vht_supported)
+ return;
+
+ if (sdata->vif.type != NL80211_IFTYPE_STATION)
+ return;
+
+ __check_vhtcap_disable(sdata, vht_cap,
+ IEEE80211_VHT_CAP_RXLDPC);
+ __check_vhtcap_disable(sdata, vht_cap,
+ IEEE80211_VHT_CAP_SHORT_GI_80);
+ __check_vhtcap_disable(sdata, vht_cap,
+ IEEE80211_VHT_CAP_SHORT_GI_160);
+ __check_vhtcap_disable(sdata, vht_cap,
+ IEEE80211_VHT_CAP_TXSTBC);
+ __check_vhtcap_disable(sdata, vht_cap,
+ IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE);
+ __check_vhtcap_disable(sdata, vht_cap,
+ IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE);
+ __check_vhtcap_disable(sdata, vht_cap,
+ IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN);
+ __check_vhtcap_disable(sdata, vht_cap,
+ IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN);
+
+ /* Allow user to decrease AMPDU length exponent */
+ if (sdata->u.mgd.vht_capa_mask.vht_cap_info &
+ cpu_to_le32(IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK)) {
+ u32 cap, n;
+
+ n = le32_to_cpu(sdata->u.mgd.vht_capa.vht_cap_info) &
+ IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK;
+ n >>= IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT;
+ cap = vht_cap->cap & IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK;
+ cap >>= IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT;
+
+ if (n < cap) {
+ vht_cap->cap &=
+ ~IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK;
+ vht_cap->cap |=
+ n << IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT;
+ }
+ }
+
+ /* Allow the user to decrease MCSes */
+ rxmcs_mask =
+ le16_to_cpu(sdata->u.mgd.vht_capa_mask.supp_mcs.rx_mcs_map);
+ rxmcs_n = le16_to_cpu(sdata->u.mgd.vht_capa.supp_mcs.rx_mcs_map);
+ rxmcs_n &= rxmcs_mask;
+ rxmcs_cap = le16_to_cpu(vht_cap->vht_mcs.rx_mcs_map);
+
+ txmcs_mask =
+ le16_to_cpu(sdata->u.mgd.vht_capa_mask.supp_mcs.tx_mcs_map);
+ txmcs_n = le16_to_cpu(sdata->u.mgd.vht_capa.supp_mcs.tx_mcs_map);
+ txmcs_n &= txmcs_mask;
+ txmcs_cap = le16_to_cpu(vht_cap->vht_mcs.tx_mcs_map);
+ for (i = 0; i < 8; i++) {
+ u8 m, n, c;
+
+ m = (rxmcs_mask >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
+ n = (rxmcs_n >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
+ c = (rxmcs_cap >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
+
+ if (m && ((c != IEEE80211_VHT_MCS_NOT_SUPPORTED && n < c) ||
+ n == IEEE80211_VHT_MCS_NOT_SUPPORTED)) {
+ rxmcs_cap &= ~(3 << 2*i);
+ rxmcs_cap |= (rxmcs_n & (3 << 2*i));
+ }
+
+ m = (txmcs_mask >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
+ n = (txmcs_n >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
+ c = (txmcs_cap >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
+
+ if (m && ((c != IEEE80211_VHT_MCS_NOT_SUPPORTED && n < c) ||
+ n == IEEE80211_VHT_MCS_NOT_SUPPORTED)) {
+ txmcs_cap &= ~(3 << 2*i);
+ txmcs_cap |= (txmcs_n & (3 << 2*i));
+ }
+ }
+ vht_cap->vht_mcs.rx_mcs_map = cpu_to_le16(rxmcs_cap);
+ vht_cap->vht_mcs.tx_mcs_map = cpu_to_le16(txmcs_cap);
+}
+
void
ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband,
@@ -20,6 +118,8 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta)
{
struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap;
+ struct ieee80211_sta_vht_cap own_cap;
+ u32 cap_info, i;
memset(vht_cap, 0, sizeof(*vht_cap));
@@ -35,12 +135,122 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
vht_cap->vht_supported = true;
- vht_cap->cap = le32_to_cpu(vht_cap_ie->vht_cap_info);
+ own_cap = sband->vht_cap;
+ /*
+ * If user has specified capability overrides, take care
+ * of that if the station we're setting up is the AP that
+ * we advertised a restricted capability set to. Override
+ * our own capabilities and then use those below.
+ */
+ if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+ !test_sta_flag(sta, WLAN_STA_TDLS_PEER))
+ ieee80211_apply_vhtcap_overrides(sdata, &own_cap);
+
+ /* take some capabilities as-is */
+ cap_info = le32_to_cpu(vht_cap_ie->vht_cap_info);
+ vht_cap->cap = cap_info;
+ vht_cap->cap &= IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895 |
+ IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 |
+ IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 |
+ IEEE80211_VHT_CAP_RXLDPC |
+ IEEE80211_VHT_CAP_VHT_TXOP_PS |
+ IEEE80211_VHT_CAP_HTC_VHT |
+ IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK |
+ IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_UNSOL_MFB |
+ IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_MRQ_MFB |
+ IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN |
+ IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN;
+
+ /* and some based on our own capabilities */
+ switch (own_cap.cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) {
+ case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ:
+ vht_cap->cap |= cap_info &
+ IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ;
+ break;
+ case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ:
+ vht_cap->cap |= cap_info &
+ IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
+ break;
+ default:
+ /* nothing */
+ break;
+ }
+
+ /* symmetric capabilities */
+ vht_cap->cap |= cap_info & own_cap.cap &
+ (IEEE80211_VHT_CAP_SHORT_GI_80 |
+ IEEE80211_VHT_CAP_SHORT_GI_160);
+
+ /* remaining ones */
+ if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE) {
+ vht_cap->cap |= cap_info &
+ (IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE |
+ IEEE80211_VHT_CAP_BEAMFORMER_ANTENNAS_MAX |
+ IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MAX);
+ }
+
+ if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE)
+ vht_cap->cap |= cap_info &
+ IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE;
+
+ if (own_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE)
+ vht_cap->cap |= cap_info &
+ IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE;
+
+ if (own_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE)
+ vht_cap->cap |= cap_info &
+ IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE;
+
+ if (own_cap.cap & IEEE80211_VHT_CAP_TXSTBC)
+ vht_cap->cap |= cap_info & IEEE80211_VHT_CAP_RXSTBC_MASK;
+
+ if (own_cap.cap & IEEE80211_VHT_CAP_RXSTBC_MASK)
+ vht_cap->cap |= cap_info & IEEE80211_VHT_CAP_TXSTBC;
/* Copy peer MCS info, the driver might need them. */
memcpy(&vht_cap->vht_mcs, &vht_cap_ie->supp_mcs,
sizeof(struct ieee80211_vht_mcs_info));
+ /* but also restrict MCSes */
+ for (i = 0; i < 8; i++) {
+ u16 own_rx, own_tx, peer_rx, peer_tx;
+
+ own_rx = le16_to_cpu(own_cap.vht_mcs.rx_mcs_map);
+ own_rx = (own_rx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
+
+ own_tx = le16_to_cpu(own_cap.vht_mcs.tx_mcs_map);
+ own_tx = (own_tx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
+
+ peer_rx = le16_to_cpu(vht_cap->vht_mcs.rx_mcs_map);
+ peer_rx = (peer_rx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
+
+ peer_tx = le16_to_cpu(vht_cap->vht_mcs.tx_mcs_map);
+ peer_tx = (peer_tx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
+
+ if (peer_tx != IEEE80211_VHT_MCS_NOT_SUPPORTED) {
+ if (own_rx == IEEE80211_VHT_MCS_NOT_SUPPORTED)
+ peer_tx = IEEE80211_VHT_MCS_NOT_SUPPORTED;
+ else if (own_rx < peer_tx)
+ peer_tx = own_rx;
+ }
+
+ if (peer_rx != IEEE80211_VHT_MCS_NOT_SUPPORTED) {
+ if (own_tx == IEEE80211_VHT_MCS_NOT_SUPPORTED)
+ peer_rx = IEEE80211_VHT_MCS_NOT_SUPPORTED;
+ else if (own_tx < peer_rx)
+ peer_rx = own_tx;
+ }
+
+ vht_cap->vht_mcs.rx_mcs_map &=
+ ~cpu_to_le16(IEEE80211_VHT_MCS_NOT_SUPPORTED << i * 2);
+ vht_cap->vht_mcs.rx_mcs_map |= cpu_to_le16(peer_rx << i * 2);
+
+ vht_cap->vht_mcs.tx_mcs_map &=
+ ~cpu_to_le16(IEEE80211_VHT_MCS_NOT_SUPPORTED << i * 2);
+ vht_cap->vht_mcs.tx_mcs_map |= cpu_to_le16(peer_tx << i * 2);
+ }
+
+ /* finally set up the bandwidth */
switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) {
case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ:
case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ:
diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h
index a4dcaf1dd4b6..d48422e27110 100644
--- a/net/mac802154/mac802154.h
+++ b/net/mac802154/mac802154.h
@@ -88,9 +88,7 @@ struct mac802154_sub_if_data {
#define mac802154_to_priv(_hw) container_of(_hw, struct mac802154_priv, hw)
-#define MAC802154_MAX_XMIT_ATTEMPTS 3
-
-#define MAC802154_CHAN_NONE (~(u8)0) /* No channel is assigned */
+#define MAC802154_CHAN_NONE 0xff /* No channel is assigned */
extern struct ieee802154_reduced_mlme_ops mac802154_mlme_reduced;
extern struct ieee802154_mlme_ops mac802154_mlme_wpan;
@@ -114,5 +112,6 @@ void mac802154_dev_set_ieee_addr(struct net_device *dev);
u16 mac802154_dev_get_pan_id(const struct net_device *dev);
void mac802154_dev_set_pan_id(struct net_device *dev, u16 val);
void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan);
+u8 mac802154_dev_get_dsn(const struct net_device *dev);
#endif /* MAC802154_H */
diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c
index d8d277006089..a99910d4d52f 100644
--- a/net/mac802154/mac_cmd.c
+++ b/net/mac802154/mac_cmd.c
@@ -73,4 +73,5 @@ struct ieee802154_mlme_ops mac802154_mlme_wpan = {
.start_req = mac802154_mlme_start_req,
.get_pan_id = mac802154_dev_get_pan_id,
.get_short_addr = mac802154_dev_get_short_addr,
+ .get_dsn = mac802154_dev_get_dsn,
};
diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c
index f47781ab0ccc..8ded97cf1c33 100644
--- a/net/mac802154/mib.c
+++ b/net/mac802154/mib.c
@@ -159,6 +159,15 @@ void mac802154_dev_set_pan_id(struct net_device *dev, u16 val)
}
}
+u8 mac802154_dev_get_dsn(const struct net_device *dev)
+{
+ struct mac802154_sub_if_data *priv = netdev_priv(dev);
+
+ BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+ return priv->dsn++;
+}
+
static void phy_chan_notify(struct work_struct *work)
{
struct phy_chan_notify_work *nw = container_of(work,
@@ -167,9 +176,15 @@ static void phy_chan_notify(struct work_struct *work)
struct mac802154_sub_if_data *priv = netdev_priv(nw->dev);
int res;
+ mutex_lock(&priv->hw->phy->pib_lock);
res = hw->ops->set_channel(&hw->hw, priv->page, priv->chan);
if (res)
pr_debug("set_channel failed\n");
+ else {
+ priv->hw->phy->current_channel = priv->chan;
+ priv->hw->phy->current_page = priv->page;
+ }
+ mutex_unlock(&priv->hw->phy->pib_lock);
kfree(nw);
}
@@ -186,8 +201,11 @@ void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan)
priv->chan = chan;
spin_unlock_bh(&priv->mib_lock);
+ mutex_lock(&priv->hw->phy->pib_lock);
if (priv->hw->phy->current_channel != priv->chan ||
priv->hw->phy->current_page != priv->page) {
+ mutex_unlock(&priv->hw->phy->pib_lock);
+
work = kzalloc(sizeof(*work), GFP_ATOMIC);
if (!work)
return;
@@ -195,5 +213,6 @@ void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan)
INIT_WORK(&work->work, phy_chan_notify);
work->dev = dev;
queue_work(priv->hw->dev_workqueue, &work->work);
- }
+ } else
+ mutex_unlock(&priv->hw->phy->pib_lock);
}
diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
index 4e09d070995a..6d1647399d4f 100644
--- a/net/mac802154/tx.c
+++ b/net/mac802154/tx.c
@@ -25,6 +25,7 @@
#include <linux/if_arp.h>
#include <linux/crc-ccitt.h>
+#include <net/ieee802154_netdev.h>
#include <net/mac802154.h>
#include <net/wpan-phy.h>
@@ -39,12 +40,12 @@ struct xmit_work {
struct mac802154_priv *priv;
u8 chan;
u8 page;
- u8 xmit_attempts;
};
static void mac802154_xmit_worker(struct work_struct *work)
{
struct xmit_work *xw = container_of(work, struct xmit_work, work);
+ struct mac802154_sub_if_data *sdata;
int res;
mutex_lock(&xw->priv->phy->pib_lock);
@@ -57,21 +58,23 @@ static void mac802154_xmit_worker(struct work_struct *work)
pr_debug("set_channel failed\n");
goto out;
}
+
+ xw->priv->phy->current_channel = xw->chan;
+ xw->priv->phy->current_page = xw->page;
}
res = xw->priv->ops->xmit(&xw->priv->hw, xw->skb);
+ if (res)
+ pr_debug("transmission failed\n");
out:
mutex_unlock(&xw->priv->phy->pib_lock);
- if (res) {
- if (xw->xmit_attempts++ < MAC802154_MAX_XMIT_ATTEMPTS) {
- queue_work(xw->priv->dev_workqueue, &xw->work);
- return;
- } else
- pr_debug("transmission failed for %d times",
- MAC802154_MAX_XMIT_ATTEMPTS);
- }
+ /* Restart the netif queue on each sub_if_data object. */
+ rcu_read_lock();
+ list_for_each_entry_rcu(sdata, &xw->priv->slaves, list)
+ netif_wake_queue(sdata->dev);
+ rcu_read_unlock();
dev_kfree_skb(xw->skb);
@@ -82,6 +85,7 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb,
u8 page, u8 chan)
{
struct xmit_work *work;
+ struct mac802154_sub_if_data *sdata;
if (!(priv->phy->channels_supported[page] & (1 << chan))) {
WARN_ON(1);
@@ -109,12 +113,17 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb,
return NETDEV_TX_BUSY;
}
+ /* Stop the netif queue on each sub_if_data object. */
+ rcu_read_lock();
+ list_for_each_entry_rcu(sdata, &priv->slaves, list)
+ netif_stop_queue(sdata->dev);
+ rcu_read_unlock();
+
INIT_WORK(&work->work, mac802154_xmit_worker);
work->skb = skb;
work->priv = priv;
work->page = page;
work->chan = chan;
- work->xmit_attempts = 0;
queue_work(priv->dev_workqueue, &work->work);
diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c
index d20c6d3c247d..2ca2f4dceab7 100644
--- a/net/mac802154/wpan.c
+++ b/net/mac802154/wpan.c
@@ -145,6 +145,8 @@ static int mac802154_header_create(struct sk_buff *skb,
head[pos++] = mac_cb(skb)->seq; /* DSN/BSN */
fc = mac_cb_type(skb);
+ if (mac_cb_is_ackreq(skb))
+ fc |= IEEE802154_FC_ACK_REQ;
if (!saddr) {
spin_lock_bh(&priv->mib_lock);
@@ -358,7 +360,7 @@ void mac802154_wpan_setup(struct net_device *dev)
dev->header_ops = &mac802154_header_ops;
dev->needed_tailroom = 2; /* FCS */
dev->mtu = IEEE802154_MTU;
- dev->tx_queue_len = 10;
+ dev->tx_queue_len = 300;
dev->type = ARPHRD_IEEE802154;
dev->flags = IFF_NOARP | IFF_BROADCAST;
dev->watchdog_timeo = 0;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index a9c488b6c50d..857ca9f35177 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -5,6 +5,7 @@
* way.
*
* Rusty Russell (C)2000 -- This code is GPL.
+ * Patrick McHardy (c) 2006-2012
*/
#include <linux/kernel.h>
#include <linux/netfilter.h>
@@ -29,6 +30,8 @@ static DEFINE_MUTEX(afinfo_mutex);
const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
EXPORT_SYMBOL(nf_afinfo);
+const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
+EXPORT_SYMBOL_GPL(nf_ipv6_ops);
int nf_register_afinfo(const struct nf_afinfo *afinfo)
{
@@ -276,10 +279,30 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
EXPORT_SYMBOL(nf_nat_decode_session_hook);
#endif
+static int __net_init netfilter_net_init(struct net *net)
+{
#ifdef CONFIG_PROC_FS
-struct proc_dir_entry *proc_net_netfilter;
-EXPORT_SYMBOL(proc_net_netfilter);
+ net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
+ net->proc_net);
+ if (!net->nf.proc_netfilter) {
+ if (!net_eq(net, &init_net))
+ pr_err("cannot create netfilter proc entry");
+
+ return -ENOMEM;
+ }
#endif
+ return 0;
+}
+
+static void __net_exit netfilter_net_exit(struct net *net)
+{
+ remove_proc_entry("netfilter", net->proc_net);
+}
+
+static struct pernet_operations netfilter_net_ops = {
+ .init = netfilter_net_init,
+ .exit = netfilter_net_exit,
+};
void __init netfilter_init(void)
{
@@ -289,11 +312,8 @@ void __init netfilter_init(void)
INIT_LIST_HEAD(&nf_hooks[i][h]);
}
-#ifdef CONFIG_PROC_FS
- proc_net_netfilter = proc_mkdir("netfilter", init_net.proc_net);
- if (!proc_net_netfilter)
+ if (register_pernet_subsys(&netfilter_net_ops) < 0)
panic("cannot create netfilter proc entry");
-#endif
if (netfilter_log_init() < 0)
panic("cannot initialize nf_log");
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
new file mode 100644
index 000000000000..25243379b887
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -0,0 +1,277 @@
+/* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __IP_SET_BITMAP_IP_GEN_H
+#define __IP_SET_BITMAP_IP_GEN_H
+
+#define CONCAT(a, b) a##b
+#define TOKEN(a,b) CONCAT(a, b)
+
+#define mtype_do_test TOKEN(MTYPE, _do_test)
+#define mtype_gc_test TOKEN(MTYPE, _gc_test)
+#define mtype_is_filled TOKEN(MTYPE, _is_filled)
+#define mtype_do_add TOKEN(MTYPE, _do_add)
+#define mtype_do_del TOKEN(MTYPE, _do_del)
+#define mtype_do_list TOKEN(MTYPE, _do_list)
+#define mtype_do_head TOKEN(MTYPE, _do_head)
+#define mtype_adt_elem TOKEN(MTYPE, _adt_elem)
+#define mtype_add_timeout TOKEN(MTYPE, _add_timeout)
+#define mtype_gc_init TOKEN(MTYPE, _gc_init)
+#define mtype_kadt TOKEN(MTYPE, _kadt)
+#define mtype_uadt TOKEN(MTYPE, _uadt)
+#define mtype_destroy TOKEN(MTYPE, _destroy)
+#define mtype_flush TOKEN(MTYPE, _flush)
+#define mtype_head TOKEN(MTYPE, _head)
+#define mtype_same_set TOKEN(MTYPE, _same_set)
+#define mtype_elem TOKEN(MTYPE, _elem)
+#define mtype_test TOKEN(MTYPE, _test)
+#define mtype_add TOKEN(MTYPE, _add)
+#define mtype_del TOKEN(MTYPE, _del)
+#define mtype_list TOKEN(MTYPE, _list)
+#define mtype_gc TOKEN(MTYPE, _gc)
+#define mtype MTYPE
+
+#define ext_timeout(e, m) \
+ (unsigned long *)((e) + (m)->offset[IPSET_OFFSET_TIMEOUT])
+#define ext_counter(e, m) \
+ (struct ip_set_counter *)((e) + (m)->offset[IPSET_OFFSET_COUNTER])
+#define get_ext(map, id) ((map)->extensions + (map)->dsize * (id))
+
+static void
+mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
+{
+ struct mtype *map = set->data;
+
+ init_timer(&map->gc);
+ map->gc.data = (unsigned long) set;
+ map->gc.function = gc;
+ map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+ add_timer(&map->gc);
+}
+
+static void
+mtype_destroy(struct ip_set *set)
+{
+ struct mtype *map = set->data;
+
+ if (SET_WITH_TIMEOUT(set))
+ del_timer_sync(&map->gc);
+
+ ip_set_free(map->members);
+ if (map->dsize)
+ ip_set_free(map->extensions);
+ kfree(map);
+
+ set->data = NULL;
+}
+
+static void
+mtype_flush(struct ip_set *set)
+{
+ struct mtype *map = set->data;
+
+ memset(map->members, 0, map->memsize);
+}
+
+static int
+mtype_head(struct ip_set *set, struct sk_buff *skb)
+{
+ const struct mtype *map = set->data;
+ struct nlattr *nested;
+
+ nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+ if (!nested)
+ goto nla_put_failure;
+ if (mtype_do_head(skb, map) ||
+ nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
+ nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
+ htonl(sizeof(*map) +
+ map->memsize +
+ map->dsize * map->elements)) ||
+ (SET_WITH_TIMEOUT(set) &&
+ nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) ||
+ (SET_WITH_COUNTER(set) &&
+ nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS,
+ htonl(IPSET_FLAG_WITH_COUNTERS))))
+ goto nla_put_failure;
+ ipset_nest_end(skb, nested);
+
+ return 0;
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static int
+mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+ struct ip_set_ext *mext, u32 flags)
+{
+ struct mtype *map = set->data;
+ const struct mtype_adt_elem *e = value;
+ void *x = get_ext(map, e->id);
+ int ret = mtype_do_test(e, map);
+
+ if (ret <= 0)
+ return ret;
+ if (SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(x, map)))
+ return 0;
+ if (SET_WITH_COUNTER(set))
+ ip_set_update_counter(ext_counter(x, map), ext, mext, flags);
+ return 1;
+}
+
+static int
+mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+ struct ip_set_ext *mext, u32 flags)
+{
+ struct mtype *map = set->data;
+ const struct mtype_adt_elem *e = value;
+ void *x = get_ext(map, e->id);
+ int ret = mtype_do_add(e, map, flags);
+
+ if (ret == IPSET_ADD_FAILED) {
+ if (SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(x, map)))
+ ret = 0;
+ else if (!(flags & IPSET_FLAG_EXIST))
+ return -IPSET_ERR_EXIST;
+ }
+
+ if (SET_WITH_TIMEOUT(set))
+#ifdef IP_SET_BITMAP_STORED_TIMEOUT
+ mtype_add_timeout(ext_timeout(x, map), e, ext, map, ret);
+#else
+ ip_set_timeout_set(ext_timeout(x, map), ext->timeout);
+#endif
+
+ if (SET_WITH_COUNTER(set))
+ ip_set_init_counter(ext_counter(x, map), ext);
+ return 0;
+}
+
+static int
+mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+ struct ip_set_ext *mext, u32 flags)
+{
+ struct mtype *map = set->data;
+ const struct mtype_adt_elem *e = value;
+ const void *x = get_ext(map, e->id);
+
+ if (mtype_do_del(e, map) ||
+ (SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(x, map))))
+ return -IPSET_ERR_EXIST;
+
+ return 0;
+}
+
+static int
+mtype_list(const struct ip_set *set,
+ struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct mtype *map = set->data;
+ struct nlattr *adt, *nested;
+ void *x;
+ u32 id, first = cb->args[2];
+
+ adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
+ if (!adt)
+ return -EMSGSIZE;
+ for (; cb->args[2] < map->elements; cb->args[2]++) {
+ id = cb->args[2];
+ x = get_ext(map, id);
+ if (!test_bit(id, map->members) ||
+ (SET_WITH_TIMEOUT(set) &&
+#ifdef IP_SET_BITMAP_STORED_TIMEOUT
+ mtype_is_filled((const struct mtype_elem *) x) &&
+#endif
+ ip_set_timeout_expired(ext_timeout(x, map))))
+ continue;
+ nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+ if (!nested) {
+ if (id == first) {
+ nla_nest_cancel(skb, adt);
+ return -EMSGSIZE;
+ } else
+ goto nla_put_failure;
+ }
+ if (mtype_do_list(skb, map, id))
+ goto nla_put_failure;
+ if (SET_WITH_TIMEOUT(set)) {
+#ifdef IP_SET_BITMAP_STORED_TIMEOUT
+ if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
+ htonl(ip_set_timeout_stored(map, id,
+ ext_timeout(x, map)))))
+ goto nla_put_failure;
+#else
+ if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
+ htonl(ip_set_timeout_get(
+ ext_timeout(x, map)))))
+ goto nla_put_failure;
+#endif
+ }
+ if (SET_WITH_COUNTER(set) &&
+ ip_set_put_counter(skb, ext_counter(x, map)))
+ goto nla_put_failure;
+ ipset_nest_end(skb, nested);
+ }
+ ipset_nest_end(skb, adt);
+
+ /* Set listing finished */
+ cb->args[2] = 0;
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nested);
+ ipset_nest_end(skb, adt);
+ if (unlikely(id == first)) {
+ cb->args[2] = 0;
+ return -EMSGSIZE;
+ }
+ return 0;
+}
+
+static void
+mtype_gc(unsigned long ul_set)
+{
+ struct ip_set *set = (struct ip_set *) ul_set;
+ struct mtype *map = set->data;
+ const void *x;
+ u32 id;
+
+ /* We run parallel with other readers (test element)
+ * but adding/deleting new entries is locked out */
+ read_lock_bh(&set->lock);
+ for (id = 0; id < map->elements; id++)
+ if (mtype_gc_test(id, map)) {
+ x = get_ext(map, id);
+ if (ip_set_timeout_expired(ext_timeout(x, map)))
+ clear_bit(id, map->members);
+ }
+ read_unlock_bh(&set->lock);
+
+ map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+ add_timer(&map->gc);
+}
+
+static const struct ip_set_type_variant mtype = {
+ .kadt = mtype_kadt,
+ .uadt = mtype_uadt,
+ .adt = {
+ [IPSET_ADD] = mtype_add,
+ [IPSET_DEL] = mtype_del,
+ [IPSET_TEST] = mtype_test,
+ },
+ .destroy = mtype_destroy,
+ .flush = mtype_flush,
+ .head = mtype_head,
+ .list = mtype_list,
+ .same_set = mtype_same_set,
+};
+
+#endif /* __IP_SET_BITMAP_IP_GEN_H */
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 4a92fd47bd4c..f1a8128bef01 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -1,6 +1,6 @@
/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
* Patrick Schaaf <bof@bof.de>
- * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -24,31 +24,37 @@
#include <linux/netfilter/ipset/pfxlen.h>
#include <linux/netfilter/ipset/ip_set.h>
#include <linux/netfilter/ipset/ip_set_bitmap.h>
-#define IP_SET_BITMAP_TIMEOUT
-#include <linux/netfilter/ipset/ip_set_timeout.h>
#define REVISION_MIN 0
-#define REVISION_MAX 0
+#define REVISION_MAX 1 /* Counter support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
IP_SET_MODULE_DESC("bitmap:ip", REVISION_MIN, REVISION_MAX);
MODULE_ALIAS("ip_set_bitmap:ip");
+#define MTYPE bitmap_ip
+
/* Type structure */
struct bitmap_ip {
void *members; /* the set members */
+ void *extensions; /* data extensions */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
u32 hosts; /* number of hosts in a subnet */
size_t memsize; /* members size */
+ size_t dsize; /* extensions struct size */
+ size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
u8 netmask; /* subnet netmask */
u32 timeout; /* timeout parameter */
struct timer_list gc; /* garbage collection */
};
-/* Base variant */
+/* ADT structure for generic function args */
+struct bitmap_ip_adt_elem {
+ u16 id;
+};
static inline u32
ip_to_id(const struct bitmap_ip *m, u32 ip)
@@ -56,188 +62,67 @@ ip_to_id(const struct bitmap_ip *m, u32 ip)
return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip)/m->hosts;
}
-static int
-bitmap_ip_test(struct ip_set *set, void *value, u32 timeout, u32 flags)
-{
- const struct bitmap_ip *map = set->data;
- u16 id = *(u16 *)value;
-
- return !!test_bit(id, map->members);
-}
+/* Common functions */
-static int
-bitmap_ip_add(struct ip_set *set, void *value, u32 timeout, u32 flags)
+static inline int
+bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map)
{
- struct bitmap_ip *map = set->data;
- u16 id = *(u16 *)value;
-
- if (test_and_set_bit(id, map->members))
- return -IPSET_ERR_EXIST;
-
- return 0;
+ return !!test_bit(e->id, map->members);
}
-static int
-bitmap_ip_del(struct ip_set *set, void *value, u32 timeout, u32 flags)
+static inline int
+bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map)
{
- struct bitmap_ip *map = set->data;
- u16 id = *(u16 *)value;
-
- if (!test_and_clear_bit(id, map->members))
- return -IPSET_ERR_EXIST;
-
- return 0;
-}
-
-static int
-bitmap_ip_list(const struct ip_set *set,
- struct sk_buff *skb, struct netlink_callback *cb)
-{
- const struct bitmap_ip *map = set->data;
- struct nlattr *atd, *nested;
- u32 id, first = cb->args[2];
-
- atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
- if (!atd)
- return -EMSGSIZE;
- for (; cb->args[2] < map->elements; cb->args[2]++) {
- id = cb->args[2];
- if (!test_bit(id, map->members))
- continue;
- nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
- if (!nested) {
- if (id == first) {
- nla_nest_cancel(skb, atd);
- return -EMSGSIZE;
- } else
- goto nla_put_failure;
- }
- if (nla_put_ipaddr4(skb, IPSET_ATTR_IP,
- htonl(map->first_ip + id * map->hosts)))
- goto nla_put_failure;
- ipset_nest_end(skb, nested);
- }
- ipset_nest_end(skb, atd);
- /* Set listing finished */
- cb->args[2] = 0;
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(skb, nested);
- ipset_nest_end(skb, atd);
- if (unlikely(id == first)) {
- cb->args[2] = 0;
- return -EMSGSIZE;
- }
- return 0;
+ return !!test_bit(id, map->members);
}
-/* Timeout variant */
-
-static int
-bitmap_ip_ttest(struct ip_set *set, void *value, u32 timeout, u32 flags)
+static inline int
+bitmap_ip_do_add(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map,
+ u32 flags)
{
- const struct bitmap_ip *map = set->data;
- const unsigned long *members = map->members;
- u16 id = *(u16 *)value;
-
- return ip_set_timeout_test(members[id]);
+ return !!test_and_set_bit(e->id, map->members);
}
-static int
-bitmap_ip_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags)
+static inline int
+bitmap_ip_do_del(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map)
{
- struct bitmap_ip *map = set->data;
- unsigned long *members = map->members;
- u16 id = *(u16 *)value;
-
- if (ip_set_timeout_test(members[id]) && !(flags & IPSET_FLAG_EXIST))
- return -IPSET_ERR_EXIST;
-
- members[id] = ip_set_timeout_set(timeout);
-
- return 0;
+ return !test_and_clear_bit(e->id, map->members);
}
-static int
-bitmap_ip_tdel(struct ip_set *set, void *value, u32 timeout, u32 flags)
+static inline int
+bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id)
{
- struct bitmap_ip *map = set->data;
- unsigned long *members = map->members;
- u16 id = *(u16 *)value;
- int ret = -IPSET_ERR_EXIST;
-
- if (ip_set_timeout_test(members[id]))
- ret = 0;
-
- members[id] = IPSET_ELEM_UNSET;
- return ret;
+ return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
+ htonl(map->first_ip + id * map->hosts));
}
-static int
-bitmap_ip_tlist(const struct ip_set *set,
- struct sk_buff *skb, struct netlink_callback *cb)
+static inline int
+bitmap_ip_do_head(struct sk_buff *skb, const struct bitmap_ip *map)
{
- const struct bitmap_ip *map = set->data;
- struct nlattr *adt, *nested;
- u32 id, first = cb->args[2];
- const unsigned long *members = map->members;
-
- adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
- if (!adt)
- return -EMSGSIZE;
- for (; cb->args[2] < map->elements; cb->args[2]++) {
- id = cb->args[2];
- if (!ip_set_timeout_test(members[id]))
- continue;
- nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
- if (!nested) {
- if (id == first) {
- nla_nest_cancel(skb, adt);
- return -EMSGSIZE;
- } else
- goto nla_put_failure;
- }
- if (nla_put_ipaddr4(skb, IPSET_ATTR_IP,
- htonl(map->first_ip + id * map->hosts)) ||
- nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
- htonl(ip_set_timeout_get(members[id]))))
- goto nla_put_failure;
- ipset_nest_end(skb, nested);
- }
- ipset_nest_end(skb, adt);
-
- /* Set listing finished */
- cb->args[2] = 0;
-
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(skb, nested);
- ipset_nest_end(skb, adt);
- if (unlikely(id == first)) {
- cb->args[2] = 0;
- return -EMSGSIZE;
- }
- return 0;
+ return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) ||
+ nla_put_ipaddr4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)) ||
+ (map->netmask != 32 &&
+ nla_put_u8(skb, IPSET_ATTR_NETMASK, map->netmask));
}
static int
bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
- enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
struct bitmap_ip *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
+ struct bitmap_ip_adt_elem e = { };
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
u32 ip;
ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC));
if (ip < map->first_ip || ip > map->last_ip)
return -IPSET_ERR_BITMAP_RANGE;
- ip = ip_to_id(map, ip);
+ e.id = ip_to_id(map, ip);
- return adtfn(set, &ip, opt_timeout(opt, map), opt->cmdflags);
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
static int
@@ -246,33 +131,31 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
{
struct bitmap_ip *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- u32 timeout = map->timeout;
- u32 ip, ip_to, id;
+ u32 ip, ip_to;
+ struct bitmap_ip_adt_elem e = { };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
int ret = 0;
if (unlikely(!tb[IPSET_ATTR_IP] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
+ ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
if (ip < map->first_ip || ip > map->last_ip)
return -IPSET_ERR_BITMAP_RANGE;
- if (tb[IPSET_ATTR_TIMEOUT]) {
- if (!with_timeout(map->timeout))
- return -IPSET_ERR_TIMEOUT;
- timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
- }
-
if (adt == IPSET_TEST) {
- id = ip_to_id(map, ip);
- return adtfn(set, &id, timeout, flags);
+ e.id = ip_to_id(map, ip);
+ return adtfn(set, &e, &ext, &ext, flags);
}
if (tb[IPSET_ATTR_IP_TO]) {
@@ -297,8 +180,8 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
return -IPSET_ERR_BITMAP_RANGE;
for (; !before(ip_to, ip); ip += map->hosts) {
- id = ip_to_id(map, ip);
- ret = adtfn(set, &id, timeout, flags);
+ e.id = ip_to_id(map, ip);
+ ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
@@ -308,54 +191,6 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
}
-static void
-bitmap_ip_destroy(struct ip_set *set)
-{
- struct bitmap_ip *map = set->data;
-
- if (with_timeout(map->timeout))
- del_timer_sync(&map->gc);
-
- ip_set_free(map->members);
- kfree(map);
-
- set->data = NULL;
-}
-
-static void
-bitmap_ip_flush(struct ip_set *set)
-{
- struct bitmap_ip *map = set->data;
-
- memset(map->members, 0, map->memsize);
-}
-
-static int
-bitmap_ip_head(struct ip_set *set, struct sk_buff *skb)
-{
- const struct bitmap_ip *map = set->data;
- struct nlattr *nested;
-
- nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
- if (!nested)
- goto nla_put_failure;
- if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) ||
- nla_put_ipaddr4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)) ||
- (map->netmask != 32 &&
- nla_put_u8(skb, IPSET_ATTR_NETMASK, map->netmask)) ||
- nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
- nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
- htonl(sizeof(*map) + map->memsize)) ||
- (with_timeout(map->timeout) &&
- nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))))
- goto nla_put_failure;
- ipset_nest_end(skb, nested);
-
- return 0;
-nla_put_failure:
- return -EMSGSIZE;
-}
-
static bool
bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b)
{
@@ -365,70 +200,35 @@ bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b)
return x->first_ip == y->first_ip &&
x->last_ip == y->last_ip &&
x->netmask == y->netmask &&
- x->timeout == y->timeout;
+ x->timeout == y->timeout &&
+ a->extensions == b->extensions;
}
-static const struct ip_set_type_variant bitmap_ip = {
- .kadt = bitmap_ip_kadt,
- .uadt = bitmap_ip_uadt,
- .adt = {
- [IPSET_ADD] = bitmap_ip_add,
- [IPSET_DEL] = bitmap_ip_del,
- [IPSET_TEST] = bitmap_ip_test,
- },
- .destroy = bitmap_ip_destroy,
- .flush = bitmap_ip_flush,
- .head = bitmap_ip_head,
- .list = bitmap_ip_list,
- .same_set = bitmap_ip_same_set,
+/* Plain variant */
+
+struct bitmap_ip_elem {
};
-static const struct ip_set_type_variant bitmap_tip = {
- .kadt = bitmap_ip_kadt,
- .uadt = bitmap_ip_uadt,
- .adt = {
- [IPSET_ADD] = bitmap_ip_tadd,
- [IPSET_DEL] = bitmap_ip_tdel,
- [IPSET_TEST] = bitmap_ip_ttest,
- },
- .destroy = bitmap_ip_destroy,
- .flush = bitmap_ip_flush,
- .head = bitmap_ip_head,
- .list = bitmap_ip_tlist,
- .same_set = bitmap_ip_same_set,
+/* Timeout variant */
+
+struct bitmap_ipt_elem {
+ unsigned long timeout;
};
-static void
-bitmap_ip_gc(unsigned long ul_set)
-{
- struct ip_set *set = (struct ip_set *) ul_set;
- struct bitmap_ip *map = set->data;
- unsigned long *table = map->members;
- u32 id;
-
- /* We run parallel with other readers (test element)
- * but adding/deleting new entries is locked out */
- read_lock_bh(&set->lock);
- for (id = 0; id < map->elements; id++)
- if (ip_set_timeout_expired(table[id]))
- table[id] = IPSET_ELEM_UNSET;
- read_unlock_bh(&set->lock);
-
- map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
- add_timer(&map->gc);
-}
+/* Plain variant with counter */
-static void
-bitmap_ip_gc_init(struct ip_set *set)
-{
- struct bitmap_ip *map = set->data;
+struct bitmap_ipc_elem {
+ struct ip_set_counter counter;
+};
- init_timer(&map->gc);
- map->gc.data = (unsigned long) set;
- map->gc.function = bitmap_ip_gc;
- map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
- add_timer(&map->gc);
-}
+/* Timeout variant with counter */
+
+struct bitmap_ipct_elem {
+ unsigned long timeout;
+ struct ip_set_counter counter;
+};
+
+#include "ip_set_bitmap_gen.h"
/* Create bitmap:ip type of sets */
@@ -440,6 +240,13 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
map->members = ip_set_alloc(map->memsize);
if (!map->members)
return false;
+ if (map->dsize) {
+ map->extensions = ip_set_alloc(map->dsize * elements);
+ if (!map->extensions) {
+ kfree(map->members);
+ return false;
+ }
+ }
map->first_ip = first_ip;
map->last_ip = last_ip;
map->elements = elements;
@@ -457,13 +264,14 @@ static int
bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
{
struct bitmap_ip *map;
- u32 first_ip, last_ip, hosts;
+ u32 first_ip, last_ip, hosts, cadt_flags = 0;
u64 elements;
u8 netmask = 32;
int ret;
if (unlikely(!tb[IPSET_ATTR_IP] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &first_ip);
@@ -526,8 +334,45 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
if (!map)
return -ENOMEM;
- if (tb[IPSET_ATTR_TIMEOUT]) {
- map->memsize = elements * sizeof(unsigned long);
+ map->memsize = bitmap_bytes(0, elements - 1);
+ set->variant = &bitmap_ip;
+ if (tb[IPSET_ATTR_CADT_FLAGS])
+ cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+ if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
+ set->extensions |= IPSET_EXT_COUNTER;
+ if (tb[IPSET_ATTR_TIMEOUT]) {
+ map->dsize = sizeof(struct bitmap_ipct_elem);
+ map->offset[IPSET_OFFSET_TIMEOUT] =
+ offsetof(struct bitmap_ipct_elem, timeout);
+ map->offset[IPSET_OFFSET_COUNTER] =
+ offsetof(struct bitmap_ipct_elem, counter);
+
+ if (!init_map_ip(set, map, first_ip, last_ip,
+ elements, hosts, netmask)) {
+ kfree(map);
+ return -ENOMEM;
+ }
+
+ map->timeout = ip_set_timeout_uget(
+ tb[IPSET_ATTR_TIMEOUT]);
+ set->extensions |= IPSET_EXT_TIMEOUT;
+
+ bitmap_ip_gc_init(set, bitmap_ip_gc);
+ } else {
+ map->dsize = sizeof(struct bitmap_ipc_elem);
+ map->offset[IPSET_OFFSET_COUNTER] =
+ offsetof(struct bitmap_ipc_elem, counter);
+
+ if (!init_map_ip(set, map, first_ip, last_ip,
+ elements, hosts, netmask)) {
+ kfree(map);
+ return -ENOMEM;
+ }
+ }
+ } else if (tb[IPSET_ATTR_TIMEOUT]) {
+ map->dsize = sizeof(struct bitmap_ipt_elem);
+ map->offset[IPSET_OFFSET_TIMEOUT] =
+ offsetof(struct bitmap_ipt_elem, timeout);
if (!init_map_ip(set, map, first_ip, last_ip,
elements, hosts, netmask)) {
@@ -536,19 +381,16 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
}
map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
- set->variant = &bitmap_tip;
+ set->extensions |= IPSET_EXT_TIMEOUT;
- bitmap_ip_gc_init(set);
+ bitmap_ip_gc_init(set, bitmap_ip_gc);
} else {
- map->memsize = bitmap_bytes(0, elements - 1);
-
+ map->dsize = 0;
if (!init_map_ip(set, map, first_ip, last_ip,
elements, hosts, netmask)) {
kfree(map);
return -ENOMEM;
}
-
- set->variant = &bitmap_ip;
}
return 0;
}
@@ -568,6 +410,7 @@ static struct ip_set_type bitmap_ip_type __read_mostly = {
[IPSET_ATTR_CIDR] = { .type = NLA_U8 },
[IPSET_ATTR_NETMASK] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
},
.adt_policy = {
[IPSET_ATTR_IP] = { .type = NLA_NESTED },
@@ -575,6 +418,8 @@ static struct ip_set_type bitmap_ip_type __read_mostly = {
[IPSET_ATTR_CIDR] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
+ [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
+ [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index d7df6ac2c6f1..3b30e0bef890 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -1,7 +1,7 @@
/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
* Patrick Schaaf <bof@bof.de>
* Martin Josefsson <gandalf@wlug.westbo.se>
- * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -23,344 +23,208 @@
#include <linux/netfilter/ipset/pfxlen.h>
#include <linux/netfilter/ipset/ip_set.h>
-#include <linux/netfilter/ipset/ip_set_timeout.h>
#include <linux/netfilter/ipset/ip_set_bitmap.h>
#define REVISION_MIN 0
-#define REVISION_MAX 0
+#define REVISION_MAX 1 /* Counter support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
IP_SET_MODULE_DESC("bitmap:ip,mac", REVISION_MIN, REVISION_MAX);
MODULE_ALIAS("ip_set_bitmap:ip,mac");
+#define MTYPE bitmap_ipmac
+#define IP_SET_BITMAP_STORED_TIMEOUT
+
enum {
- MAC_EMPTY, /* element is not set */
- MAC_FILLED, /* element is set with MAC */
MAC_UNSET, /* element is set, without MAC */
+ MAC_FILLED, /* element is set with MAC */
};
/* Type structure */
struct bitmap_ipmac {
void *members; /* the set members */
+ void *extensions; /* MAC + data extensions */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
+ u32 elements; /* number of max elements in the set */
u32 timeout; /* timeout value */
struct timer_list gc; /* garbage collector */
+ size_t memsize; /* members size */
size_t dsize; /* size of element */
+ size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
};
/* ADT structure for generic function args */
-struct ipmac {
- u32 id; /* id in array */
- unsigned char *ether; /* ethernet address */
+struct bitmap_ipmac_adt_elem {
+ u16 id;
+ unsigned char *ether;
};
-/* Member element without and with timeout */
-
-struct ipmac_elem {
+struct bitmap_ipmac_elem {
unsigned char ether[ETH_ALEN];
- unsigned char match;
+ unsigned char filled;
} __attribute__ ((aligned));
-struct ipmac_telem {
- unsigned char ether[ETH_ALEN];
- unsigned char match;
- unsigned long timeout;
-} __attribute__ ((aligned));
-
-static inline void *
-bitmap_ipmac_elem(const struct bitmap_ipmac *map, u32 id)
+static inline u32
+ip_to_id(const struct bitmap_ipmac *m, u32 ip)
{
- return (void *)((char *)map->members + id * map->dsize);
+ return ip - m->first_ip;
}
-static inline bool
-bitmap_timeout(const struct bitmap_ipmac *map, u32 id)
+static inline struct bitmap_ipmac_elem *
+get_elem(void *extensions, u16 id, size_t dsize)
{
- const struct ipmac_telem *elem = bitmap_ipmac_elem(map, id);
-
- return ip_set_timeout_test(elem->timeout);
+ return (struct bitmap_ipmac_elem *)(extensions + id * dsize);
}
-static inline bool
-bitmap_expired(const struct bitmap_ipmac *map, u32 id)
-{
- const struct ipmac_telem *elem = bitmap_ipmac_elem(map, id);
-
- return ip_set_timeout_expired(elem->timeout);
-}
+/* Common functions */
static inline int
-bitmap_ipmac_exist(const struct ipmac_telem *elem)
-{
- return elem->match == MAC_UNSET ||
- (elem->match == MAC_FILLED &&
- !ip_set_timeout_expired(elem->timeout));
-}
-
-/* Base variant */
-
-static int
-bitmap_ipmac_test(struct ip_set *set, void *value, u32 timeout, u32 flags)
-{
- const struct bitmap_ipmac *map = set->data;
- const struct ipmac *data = value;
- const struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id);
-
- switch (elem->match) {
- case MAC_UNSET:
- /* Trigger kernel to fill out the ethernet address */
- return -EAGAIN;
- case MAC_FILLED:
- return data->ether == NULL ||
- ether_addr_equal(data->ether, elem->ether);
- }
- return 0;
-}
-
-static int
-bitmap_ipmac_add(struct ip_set *set, void *value, u32 timeout, u32 flags)
-{
- struct bitmap_ipmac *map = set->data;
- const struct ipmac *data = value;
- struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id);
-
- switch (elem->match) {
- case MAC_UNSET:
- if (!data->ether)
- /* Already added without ethernet address */
- return -IPSET_ERR_EXIST;
- /* Fill the MAC address */
- memcpy(elem->ether, data->ether, ETH_ALEN);
- elem->match = MAC_FILLED;
- break;
- case MAC_FILLED:
- return -IPSET_ERR_EXIST;
- case MAC_EMPTY:
- if (data->ether) {
- memcpy(elem->ether, data->ether, ETH_ALEN);
- elem->match = MAC_FILLED;
- } else
- elem->match = MAC_UNSET;
- }
-
- return 0;
-}
-
-static int
-bitmap_ipmac_del(struct ip_set *set, void *value, u32 timeout, u32 flags)
+bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
+ const struct bitmap_ipmac *map)
{
- struct bitmap_ipmac *map = set->data;
- const struct ipmac *data = value;
- struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id);
-
- if (elem->match == MAC_EMPTY)
- return -IPSET_ERR_EXIST;
+ const struct bitmap_ipmac_elem *elem;
- elem->match = MAC_EMPTY;
-
- return 0;
+ if (!test_bit(e->id, map->members))
+ return 0;
+ elem = get_elem(map->extensions, e->id, map->dsize);
+ if (elem->filled == MAC_FILLED)
+ return e->ether == NULL ||
+ ether_addr_equal(e->ether, elem->ether);
+ /* Trigger kernel to fill out the ethernet address */
+ return -EAGAIN;
}
-static int
-bitmap_ipmac_list(const struct ip_set *set,
- struct sk_buff *skb, struct netlink_callback *cb)
+static inline int
+bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map)
{
- const struct bitmap_ipmac *map = set->data;
- const struct ipmac_elem *elem;
- struct nlattr *atd, *nested;
- u32 id, first = cb->args[2];
- u32 last = map->last_ip - map->first_ip;
-
- atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
- if (!atd)
- return -EMSGSIZE;
- for (; cb->args[2] <= last; cb->args[2]++) {
- id = cb->args[2];
- elem = bitmap_ipmac_elem(map, id);
- if (elem->match == MAC_EMPTY)
- continue;
- nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
- if (!nested) {
- if (id == first) {
- nla_nest_cancel(skb, atd);
- return -EMSGSIZE;
- } else
- goto nla_put_failure;
- }
- if (nla_put_ipaddr4(skb, IPSET_ATTR_IP,
- htonl(map->first_ip + id)) ||
- (elem->match == MAC_FILLED &&
- nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN,
- elem->ether)))
- goto nla_put_failure;
- ipset_nest_end(skb, nested);
- }
- ipset_nest_end(skb, atd);
- /* Set listing finished */
- cb->args[2] = 0;
-
- return 0;
+ const struct bitmap_ipmac_elem *elem;
-nla_put_failure:
- nla_nest_cancel(skb, nested);
- ipset_nest_end(skb, atd);
- if (unlikely(id == first)) {
- cb->args[2] = 0;
- return -EMSGSIZE;
- }
- return 0;
+ if (!test_bit(id, map->members))
+ return 0;
+ elem = get_elem(map->extensions, id, map->dsize);
+ /* Timer not started for the incomplete elements */
+ return elem->filled == MAC_FILLED;
}
-/* Timeout variant */
-
-static int
-bitmap_ipmac_ttest(struct ip_set *set, void *value, u32 timeout, u32 flags)
+static inline int
+bitmap_ipmac_is_filled(const struct bitmap_ipmac_elem *elem)
{
- const struct bitmap_ipmac *map = set->data;
- const struct ipmac *data = value;
- const struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id);
-
- switch (elem->match) {
- case MAC_UNSET:
- /* Trigger kernel to fill out the ethernet address */
- return -EAGAIN;
- case MAC_FILLED:
- return (data->ether == NULL ||
- ether_addr_equal(data->ether, elem->ether)) &&
- !bitmap_expired(map, data->id);
- }
- return 0;
+ return elem->filled == MAC_FILLED;
}
-static int
-bitmap_ipmac_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags)
+static inline int
+bitmap_ipmac_add_timeout(unsigned long *timeout,
+ const struct bitmap_ipmac_adt_elem *e,
+ const struct ip_set_ext *ext,
+ struct bitmap_ipmac *map, int mode)
{
- struct bitmap_ipmac *map = set->data;
- const struct ipmac *data = value;
- struct ipmac_telem *elem = bitmap_ipmac_elem(map, data->id);
- bool flag_exist = flags & IPSET_FLAG_EXIST;
+ u32 t = ext->timeout;
- switch (elem->match) {
- case MAC_UNSET:
- if (!(data->ether || flag_exist))
- /* Already added without ethernet address */
- return -IPSET_ERR_EXIST;
- /* Fill the MAC address and activate the timer */
- memcpy(elem->ether, data->ether, ETH_ALEN);
- elem->match = MAC_FILLED;
- if (timeout == map->timeout)
+ if (mode == IPSET_ADD_START_STORED_TIMEOUT) {
+ if (t == map->timeout)
/* Timeout was not specified, get stored one */
- timeout = elem->timeout;
- elem->timeout = ip_set_timeout_set(timeout);
- break;
- case MAC_FILLED:
- if (!(bitmap_expired(map, data->id) || flag_exist))
- return -IPSET_ERR_EXIST;
- /* Fall through */
- case MAC_EMPTY:
- if (data->ether) {
- memcpy(elem->ether, data->ether, ETH_ALEN);
- elem->match = MAC_FILLED;
- } else
- elem->match = MAC_UNSET;
+ t = *timeout;
+ ip_set_timeout_set(timeout, t);
+ } else {
/* If MAC is unset yet, we store plain timeout value
* because the timer is not activated yet
* and we can reuse it later when MAC is filled out,
* possibly by the kernel */
- elem->timeout = data->ether ? ip_set_timeout_set(timeout)
- : timeout;
- break;
+ if (e->ether)
+ ip_set_timeout_set(timeout, t);
+ else
+ *timeout = t;
}
-
return 0;
}
-static int
-bitmap_ipmac_tdel(struct ip_set *set, void *value, u32 timeout, u32 flags)
+static inline int
+bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
+ struct bitmap_ipmac *map, u32 flags)
{
- struct bitmap_ipmac *map = set->data;
- const struct ipmac *data = value;
- struct ipmac_telem *elem = bitmap_ipmac_elem(map, data->id);
+ struct bitmap_ipmac_elem *elem;
+
+ elem = get_elem(map->extensions, e->id, map->dsize);
+ if (test_and_set_bit(e->id, map->members)) {
+ if (elem->filled == MAC_FILLED) {
+ if (e->ether && (flags & IPSET_FLAG_EXIST))
+ memcpy(elem->ether, e->ether, ETH_ALEN);
+ return IPSET_ADD_FAILED;
+ } else if (!e->ether)
+ /* Already added without ethernet address */
+ return IPSET_ADD_FAILED;
+ /* Fill the MAC address and trigger the timer activation */
+ memcpy(elem->ether, e->ether, ETH_ALEN);
+ elem->filled = MAC_FILLED;
+ return IPSET_ADD_START_STORED_TIMEOUT;
+ } else if (e->ether) {
+ /* We can store MAC too */
+ memcpy(elem->ether, e->ether, ETH_ALEN);
+ elem->filled = MAC_FILLED;
+ return 0;
+ } else {
+ elem->filled = MAC_UNSET;
+ /* MAC is not stored yet, don't start timer */
+ return IPSET_ADD_STORE_PLAIN_TIMEOUT;
+ }
+}
- if (elem->match == MAC_EMPTY || bitmap_expired(map, data->id))
- return -IPSET_ERR_EXIST;
+static inline int
+bitmap_ipmac_do_del(const struct bitmap_ipmac_adt_elem *e,
+ struct bitmap_ipmac *map)
+{
+ return !test_and_clear_bit(e->id, map->members);
+}
- elem->match = MAC_EMPTY;
+static inline unsigned long
+ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout)
+{
+ const struct bitmap_ipmac_elem *elem =
+ get_elem(map->extensions, id, map->dsize);
- return 0;
+ return elem->filled == MAC_FILLED ? ip_set_timeout_get(timeout) :
+ *timeout;
}
-static int
-bitmap_ipmac_tlist(const struct ip_set *set,
- struct sk_buff *skb, struct netlink_callback *cb)
+static inline int
+bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
+ u32 id)
{
- const struct bitmap_ipmac *map = set->data;
- const struct ipmac_telem *elem;
- struct nlattr *atd, *nested;
- u32 id, first = cb->args[2];
- u32 timeout, last = map->last_ip - map->first_ip;
-
- atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
- if (!atd)
- return -EMSGSIZE;
- for (; cb->args[2] <= last; cb->args[2]++) {
- id = cb->args[2];
- elem = bitmap_ipmac_elem(map, id);
- if (!bitmap_ipmac_exist(elem))
- continue;
- nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
- if (!nested) {
- if (id == first) {
- nla_nest_cancel(skb, atd);
- return -EMSGSIZE;
- } else
- goto nla_put_failure;
- }
- if (nla_put_ipaddr4(skb, IPSET_ATTR_IP,
- htonl(map->first_ip + id)) ||
- (elem->match == MAC_FILLED &&
- nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN,
- elem->ether)))
- goto nla_put_failure;
- timeout = elem->match == MAC_UNSET ? elem->timeout
- : ip_set_timeout_get(elem->timeout);
- if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(timeout)))
- goto nla_put_failure;
- ipset_nest_end(skb, nested);
- }
- ipset_nest_end(skb, atd);
- /* Set listing finished */
- cb->args[2] = 0;
+ const struct bitmap_ipmac_elem *elem =
+ get_elem(map->extensions, id, map->dsize);
- return 0;
+ return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
+ htonl(map->first_ip + id)) ||
+ (elem->filled == MAC_FILLED &&
+ nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, elem->ether));
+}
-nla_put_failure:
- nla_nest_cancel(skb, nested);
- ipset_nest_end(skb, atd);
- if (unlikely(id == first)) {
- cb->args[2] = 0;
- return -EMSGSIZE;
- }
- return 0;
+static inline int
+bitmap_ipmac_do_head(struct sk_buff *skb, const struct bitmap_ipmac *map)
+{
+ return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) ||
+ nla_put_ipaddr4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip));
}
static int
bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
- enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
struct bitmap_ipmac *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct ipmac data;
+ struct bitmap_ipmac_adt_elem e = {};
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
+ u32 ip;
/* MAC can be src only */
if (!(opt->flags & IPSET_DIM_TWO_SRC))
return 0;
- data.id = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC));
- if (data.id < map->first_ip || data.id > map->last_ip)
+ ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC));
+ if (ip < map->first_ip || ip > map->last_ip)
return -IPSET_ERR_BITMAP_RANGE;
/* Backward compatibility: we don't check the second flag */
@@ -368,10 +232,10 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
(skb_mac_header(skb) + ETH_HLEN) > skb->data)
return -EINVAL;
- data.id -= map->first_ip;
- data.ether = eth_hdr(skb)->h_source;
+ e.id = ip_to_id(map, ip);
+ e.ether = eth_hdr(skb)->h_source;
- return adtfn(set, &data, opt_timeout(opt, map), opt->cmdflags);
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
static int
@@ -380,91 +244,39 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
{
const struct bitmap_ipmac *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct ipmac data;
- u32 timeout = map->timeout;
+ struct bitmap_ipmac_adt_elem e = {};
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
+ u32 ip;
int ret = 0;
if (unlikely(!tb[IPSET_ATTR_IP] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &data.id);
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
+ ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
- if (data.id < map->first_ip || data.id > map->last_ip)
+ if (ip < map->first_ip || ip > map->last_ip)
return -IPSET_ERR_BITMAP_RANGE;
+ e.id = ip_to_id(map, ip);
if (tb[IPSET_ATTR_ETHER])
- data.ether = nla_data(tb[IPSET_ATTR_ETHER]);
+ e.ether = nla_data(tb[IPSET_ATTR_ETHER]);
else
- data.ether = NULL;
-
- if (tb[IPSET_ATTR_TIMEOUT]) {
- if (!with_timeout(map->timeout))
- return -IPSET_ERR_TIMEOUT;
- timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
- }
-
- data.id -= map->first_ip;
+ e.ether = NULL;
- ret = adtfn(set, &data, timeout, flags);
+ ret = adtfn(set, &e, &ext, &ext, flags);
return ip_set_eexist(ret, flags) ? 0 : ret;
}
-static void
-bitmap_ipmac_destroy(struct ip_set *set)
-{
- struct bitmap_ipmac *map = set->data;
-
- if (with_timeout(map->timeout))
- del_timer_sync(&map->gc);
-
- ip_set_free(map->members);
- kfree(map);
-
- set->data = NULL;
-}
-
-static void
-bitmap_ipmac_flush(struct ip_set *set)
-{
- struct bitmap_ipmac *map = set->data;
-
- memset(map->members, 0,
- (map->last_ip - map->first_ip + 1) * map->dsize);
-}
-
-static int
-bitmap_ipmac_head(struct ip_set *set, struct sk_buff *skb)
-{
- const struct bitmap_ipmac *map = set->data;
- struct nlattr *nested;
-
- nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
- if (!nested)
- goto nla_put_failure;
- if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) ||
- nla_put_ipaddr4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)) ||
- nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
- nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
- htonl(sizeof(*map) +
- ((map->last_ip - map->first_ip + 1) *
- map->dsize))) ||
- (with_timeout(map->timeout) &&
- nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))))
- goto nla_put_failure;
- ipset_nest_end(skb, nested);
-
- return 0;
-nla_put_failure:
- return -EMSGSIZE;
-}
-
static bool
bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b)
{
@@ -473,85 +285,64 @@ bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b)
return x->first_ip == y->first_ip &&
x->last_ip == y->last_ip &&
- x->timeout == y->timeout;
+ x->timeout == y->timeout &&
+ a->extensions == b->extensions;
}
-static const struct ip_set_type_variant bitmap_ipmac = {
- .kadt = bitmap_ipmac_kadt,
- .uadt = bitmap_ipmac_uadt,
- .adt = {
- [IPSET_ADD] = bitmap_ipmac_add,
- [IPSET_DEL] = bitmap_ipmac_del,
- [IPSET_TEST] = bitmap_ipmac_test,
- },
- .destroy = bitmap_ipmac_destroy,
- .flush = bitmap_ipmac_flush,
- .head = bitmap_ipmac_head,
- .list = bitmap_ipmac_list,
- .same_set = bitmap_ipmac_same_set,
-};
+/* Plain variant */
-static const struct ip_set_type_variant bitmap_tipmac = {
- .kadt = bitmap_ipmac_kadt,
- .uadt = bitmap_ipmac_uadt,
- .adt = {
- [IPSET_ADD] = bitmap_ipmac_tadd,
- [IPSET_DEL] = bitmap_ipmac_tdel,
- [IPSET_TEST] = bitmap_ipmac_ttest,
- },
- .destroy = bitmap_ipmac_destroy,
- .flush = bitmap_ipmac_flush,
- .head = bitmap_ipmac_head,
- .list = bitmap_ipmac_tlist,
- .same_set = bitmap_ipmac_same_set,
+/* Timeout variant */
+
+struct bitmap_ipmact_elem {
+ struct {
+ unsigned char ether[ETH_ALEN];
+ unsigned char filled;
+ } __attribute__ ((aligned));
+ unsigned long timeout;
};
-static void
-bitmap_ipmac_gc(unsigned long ul_set)
-{
- struct ip_set *set = (struct ip_set *) ul_set;
- struct bitmap_ipmac *map = set->data;
- struct ipmac_telem *elem;
- u32 id, last = map->last_ip - map->first_ip;
-
- /* We run parallel with other readers (test element)
- * but adding/deleting new entries is locked out */
- read_lock_bh(&set->lock);
- for (id = 0; id <= last; id++) {
- elem = bitmap_ipmac_elem(map, id);
- if (elem->match == MAC_FILLED &&
- ip_set_timeout_expired(elem->timeout))
- elem->match = MAC_EMPTY;
- }
- read_unlock_bh(&set->lock);
+/* Plain variant with counter */
- map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
- add_timer(&map->gc);
-}
+struct bitmap_ipmacc_elem {
+ struct {
+ unsigned char ether[ETH_ALEN];
+ unsigned char filled;
+ } __attribute__ ((aligned));
+ struct ip_set_counter counter;
+};
-static void
-bitmap_ipmac_gc_init(struct ip_set *set)
-{
- struct bitmap_ipmac *map = set->data;
+/* Timeout variant with counter */
- init_timer(&map->gc);
- map->gc.data = (unsigned long) set;
- map->gc.function = bitmap_ipmac_gc;
- map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
- add_timer(&map->gc);
-}
+struct bitmap_ipmacct_elem {
+ struct {
+ unsigned char ether[ETH_ALEN];
+ unsigned char filled;
+ } __attribute__ ((aligned));
+ unsigned long timeout;
+ struct ip_set_counter counter;
+};
+
+#include "ip_set_bitmap_gen.h"
/* Create bitmap:ip,mac type of sets */
static bool
init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
- u32 first_ip, u32 last_ip)
+ u32 first_ip, u32 last_ip, u32 elements)
{
map->members = ip_set_alloc((last_ip - first_ip + 1) * map->dsize);
if (!map->members)
return false;
+ if (map->dsize) {
+ map->extensions = ip_set_alloc(map->dsize * elements);
+ if (!map->extensions) {
+ kfree(map->members);
+ return false;
+ }
+ }
map->first_ip = first_ip;
map->last_ip = last_ip;
+ map->elements = elements;
map->timeout = IPSET_NO_TIMEOUT;
set->data = map;
@@ -564,13 +355,14 @@ static int
bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
u32 flags)
{
- u32 first_ip, last_ip;
+ u32 first_ip, last_ip, cadt_flags = 0;
u64 elements;
struct bitmap_ipmac *map;
int ret;
if (unlikely(!tb[IPSET_ATTR_IP] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &first_ip);
@@ -605,28 +397,59 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- if (tb[IPSET_ATTR_TIMEOUT]) {
- map->dsize = sizeof(struct ipmac_telem);
+ map->memsize = bitmap_bytes(0, elements - 1);
+ set->variant = &bitmap_ipmac;
+ if (tb[IPSET_ATTR_CADT_FLAGS])
+ cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+ if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
+ set->extensions |= IPSET_EXT_COUNTER;
+ if (tb[IPSET_ATTR_TIMEOUT]) {
+ map->dsize = sizeof(struct bitmap_ipmacct_elem);
+ map->offset[IPSET_OFFSET_TIMEOUT] =
+ offsetof(struct bitmap_ipmacct_elem, timeout);
+ map->offset[IPSET_OFFSET_COUNTER] =
+ offsetof(struct bitmap_ipmacct_elem, counter);
+
+ if (!init_map_ipmac(set, map, first_ip, last_ip,
+ elements)) {
+ kfree(map);
+ return -ENOMEM;
+ }
+ map->timeout = ip_set_timeout_uget(
+ tb[IPSET_ATTR_TIMEOUT]);
+ set->extensions |= IPSET_EXT_TIMEOUT;
+ bitmap_ipmac_gc_init(set, bitmap_ipmac_gc);
+ } else {
+ map->dsize = sizeof(struct bitmap_ipmacc_elem);
+ map->offset[IPSET_OFFSET_COUNTER] =
+ offsetof(struct bitmap_ipmacc_elem, counter);
+
+ if (!init_map_ipmac(set, map, first_ip, last_ip,
+ elements)) {
+ kfree(map);
+ return -ENOMEM;
+ }
+ }
+ } else if (tb[IPSET_ATTR_TIMEOUT]) {
+ map->dsize = sizeof(struct bitmap_ipmact_elem);
+ map->offset[IPSET_OFFSET_TIMEOUT] =
+ offsetof(struct bitmap_ipmact_elem, timeout);
- if (!init_map_ipmac(set, map, first_ip, last_ip)) {
+ if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
kfree(map);
return -ENOMEM;
}
-
map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-
- set->variant = &bitmap_tipmac;
-
- bitmap_ipmac_gc_init(set);
+ set->extensions |= IPSET_EXT_TIMEOUT;
+ bitmap_ipmac_gc_init(set, bitmap_ipmac_gc);
} else {
- map->dsize = sizeof(struct ipmac_elem);
+ map->dsize = sizeof(struct bitmap_ipmac_elem);
- if (!init_map_ipmac(set, map, first_ip, last_ip)) {
+ if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
kfree(map);
return -ENOMEM;
}
set->variant = &bitmap_ipmac;
-
}
return 0;
}
@@ -645,6 +468,7 @@ static struct ip_set_type bitmap_ipmac_type = {
[IPSET_ATTR_IP_TO] = { .type = NLA_NESTED },
[IPSET_ATTR_CIDR] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
},
.adt_policy = {
[IPSET_ATTR_IP] = { .type = NLA_NESTED },
@@ -652,6 +476,8 @@ static struct ip_set_type bitmap_ipmac_type = {
.len = ETH_ALEN },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
+ [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
+ [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index e6b2db76f4c3..8207d1fda528 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -19,205 +19,94 @@
#include <linux/netfilter/ipset/ip_set.h>
#include <linux/netfilter/ipset/ip_set_bitmap.h>
#include <linux/netfilter/ipset/ip_set_getport.h>
-#define IP_SET_BITMAP_TIMEOUT
-#include <linux/netfilter/ipset/ip_set_timeout.h>
#define REVISION_MIN 0
-#define REVISION_MAX 0
+#define REVISION_MAX 1 /* Counter support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
IP_SET_MODULE_DESC("bitmap:port", REVISION_MIN, REVISION_MAX);
MODULE_ALIAS("ip_set_bitmap:port");
+#define MTYPE bitmap_port
+
/* Type structure */
struct bitmap_port {
void *members; /* the set members */
+ void *extensions; /* data extensions */
u16 first_port; /* host byte order, included in range */
u16 last_port; /* host byte order, included in range */
+ u32 elements; /* number of max elements in the set */
size_t memsize; /* members size */
+ size_t dsize; /* extensions struct size */
+ size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
u32 timeout; /* timeout parameter */
struct timer_list gc; /* garbage collection */
};
-/* Base variant */
+/* ADT structure for generic function args */
+struct bitmap_port_adt_elem {
+ u16 id;
+};
-static int
-bitmap_port_test(struct ip_set *set, void *value, u32 timeout, u32 flags)
+static inline u16
+port_to_id(const struct bitmap_port *m, u16 port)
{
- const struct bitmap_port *map = set->data;
- u16 id = *(u16 *)value;
-
- return !!test_bit(id, map->members);
+ return port - m->first_port;
}
-static int
-bitmap_port_add(struct ip_set *set, void *value, u32 timeout, u32 flags)
-{
- struct bitmap_port *map = set->data;
- u16 id = *(u16 *)value;
-
- if (test_and_set_bit(id, map->members))
- return -IPSET_ERR_EXIST;
-
- return 0;
-}
+/* Common functions */
-static int
-bitmap_port_del(struct ip_set *set, void *value, u32 timeout, u32 flags)
+static inline int
+bitmap_port_do_test(const struct bitmap_port_adt_elem *e,
+ const struct bitmap_port *map)
{
- struct bitmap_port *map = set->data;
- u16 id = *(u16 *)value;
-
- if (!test_and_clear_bit(id, map->members))
- return -IPSET_ERR_EXIST;
-
- return 0;
+ return !!test_bit(e->id, map->members);
}
-static int
-bitmap_port_list(const struct ip_set *set,
- struct sk_buff *skb, struct netlink_callback *cb)
+static inline int
+bitmap_port_gc_test(u16 id, const struct bitmap_port *map)
{
- const struct bitmap_port *map = set->data;
- struct nlattr *atd, *nested;
- u16 id, first = cb->args[2];
- u16 last = map->last_port - map->first_port;
-
- atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
- if (!atd)
- return -EMSGSIZE;
- for (; cb->args[2] <= last; cb->args[2]++) {
- id = cb->args[2];
- if (!test_bit(id, map->members))
- continue;
- nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
- if (!nested) {
- if (id == first) {
- nla_nest_cancel(skb, atd);
- return -EMSGSIZE;
- } else
- goto nla_put_failure;
- }
- if (nla_put_net16(skb, IPSET_ATTR_PORT,
- htons(map->first_port + id)))
- goto nla_put_failure;
- ipset_nest_end(skb, nested);
- }
- ipset_nest_end(skb, atd);
- /* Set listing finished */
- cb->args[2] = 0;
-
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(skb, nested);
- ipset_nest_end(skb, atd);
- if (unlikely(id == first)) {
- cb->args[2] = 0;
- return -EMSGSIZE;
- }
- return 0;
+ return !!test_bit(id, map->members);
}
-/* Timeout variant */
-
-static int
-bitmap_port_ttest(struct ip_set *set, void *value, u32 timeout, u32 flags)
+static inline int
+bitmap_port_do_add(const struct bitmap_port_adt_elem *e,
+ struct bitmap_port *map, u32 flags)
{
- const struct bitmap_port *map = set->data;
- const unsigned long *members = map->members;
- u16 id = *(u16 *)value;
-
- return ip_set_timeout_test(members[id]);
+ return !!test_and_set_bit(e->id, map->members);
}
-static int
-bitmap_port_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags)
+static inline int
+bitmap_port_do_del(const struct bitmap_port_adt_elem *e,
+ struct bitmap_port *map)
{
- struct bitmap_port *map = set->data;
- unsigned long *members = map->members;
- u16 id = *(u16 *)value;
-
- if (ip_set_timeout_test(members[id]) && !(flags & IPSET_FLAG_EXIST))
- return -IPSET_ERR_EXIST;
-
- members[id] = ip_set_timeout_set(timeout);
-
- return 0;
+ return !test_and_clear_bit(e->id, map->members);
}
-static int
-bitmap_port_tdel(struct ip_set *set, void *value, u32 timeout, u32 flags)
+static inline int
+bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id)
{
- struct bitmap_port *map = set->data;
- unsigned long *members = map->members;
- u16 id = *(u16 *)value;
- int ret = -IPSET_ERR_EXIST;
-
- if (ip_set_timeout_test(members[id]))
- ret = 0;
-
- members[id] = IPSET_ELEM_UNSET;
- return ret;
+ return nla_put_net16(skb, IPSET_ATTR_PORT,
+ htons(map->first_port + id));
}
-static int
-bitmap_port_tlist(const struct ip_set *set,
- struct sk_buff *skb, struct netlink_callback *cb)
+static inline int
+bitmap_port_do_head(struct sk_buff *skb, const struct bitmap_port *map)
{
- const struct bitmap_port *map = set->data;
- struct nlattr *adt, *nested;
- u16 id, first = cb->args[2];
- u16 last = map->last_port - map->first_port;
- const unsigned long *members = map->members;
-
- adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
- if (!adt)
- return -EMSGSIZE;
- for (; cb->args[2] <= last; cb->args[2]++) {
- id = cb->args[2];
- if (!ip_set_timeout_test(members[id]))
- continue;
- nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
- if (!nested) {
- if (id == first) {
- nla_nest_cancel(skb, adt);
- return -EMSGSIZE;
- } else
- goto nla_put_failure;
- }
- if (nla_put_net16(skb, IPSET_ATTR_PORT,
- htons(map->first_port + id)) ||
- nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
- htonl(ip_set_timeout_get(members[id]))))
- goto nla_put_failure;
- ipset_nest_end(skb, nested);
- }
- ipset_nest_end(skb, adt);
-
- /* Set listing finished */
- cb->args[2] = 0;
-
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(skb, nested);
- ipset_nest_end(skb, adt);
- if (unlikely(id == first)) {
- cb->args[2] = 0;
- return -EMSGSIZE;
- }
- return 0;
+ return nla_put_net16(skb, IPSET_ATTR_PORT, htons(map->first_port)) ||
+ nla_put_net16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port));
}
static int
bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
- enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
struct bitmap_port *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
+ struct bitmap_port_adt_elem e = {};
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
__be16 __port;
u16 port = 0;
@@ -230,9 +119,9 @@ bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb,
if (port < map->first_port || port > map->last_port)
return -IPSET_ERR_BITMAP_RANGE;
- port -= map->first_port;
+ e.id = port_to_id(map, port);
- return adtfn(set, &port, opt_timeout(opt, map), opt->cmdflags);
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
static int
@@ -241,14 +130,17 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
{
struct bitmap_port *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- u32 timeout = map->timeout;
+ struct bitmap_port_adt_elem e = {};
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
u32 port; /* wraparound */
- u16 id, port_to;
+ u16 port_to;
int ret = 0;
if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -257,16 +149,13 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
port = ip_set_get_h16(tb[IPSET_ATTR_PORT]);
if (port < map->first_port || port > map->last_port)
return -IPSET_ERR_BITMAP_RANGE;
-
- if (tb[IPSET_ATTR_TIMEOUT]) {
- if (!with_timeout(map->timeout))
- return -IPSET_ERR_TIMEOUT;
- timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
- }
+ ret = ip_set_get_extensions(set, tb, &ext);
+ if (ret)
+ return ret;
if (adt == IPSET_TEST) {
- id = port - map->first_port;
- return adtfn(set, &id, timeout, flags);
+ e.id = port_to_id(map, port);
+ return adtfn(set, &e, &ext, &ext, flags);
}
if (tb[IPSET_ATTR_PORT_TO]) {
@@ -283,8 +172,8 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
return -IPSET_ERR_BITMAP_RANGE;
for (; port <= port_to; port++) {
- id = port - map->first_port;
- ret = adtfn(set, &id, timeout, flags);
+ e.id = port_to_id(map, port);
+ ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
@@ -294,52 +183,6 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
}
-static void
-bitmap_port_destroy(struct ip_set *set)
-{
- struct bitmap_port *map = set->data;
-
- if (with_timeout(map->timeout))
- del_timer_sync(&map->gc);
-
- ip_set_free(map->members);
- kfree(map);
-
- set->data = NULL;
-}
-
-static void
-bitmap_port_flush(struct ip_set *set)
-{
- struct bitmap_port *map = set->data;
-
- memset(map->members, 0, map->memsize);
-}
-
-static int
-bitmap_port_head(struct ip_set *set, struct sk_buff *skb)
-{
- const struct bitmap_port *map = set->data;
- struct nlattr *nested;
-
- nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
- if (!nested)
- goto nla_put_failure;
- if (nla_put_net16(skb, IPSET_ATTR_PORT, htons(map->first_port)) ||
- nla_put_net16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port)) ||
- nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
- nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
- htonl(sizeof(*map) + map->memsize)) ||
- (with_timeout(map->timeout) &&
- nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))))
- goto nla_put_failure;
- ipset_nest_end(skb, nested);
-
- return 0;
-nla_put_failure:
- return -EMSGSIZE;
-}
-
static bool
bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b)
{
@@ -348,71 +191,35 @@ bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b)
return x->first_port == y->first_port &&
x->last_port == y->last_port &&
- x->timeout == y->timeout;
+ x->timeout == y->timeout &&
+ a->extensions == b->extensions;
}
-static const struct ip_set_type_variant bitmap_port = {
- .kadt = bitmap_port_kadt,
- .uadt = bitmap_port_uadt,
- .adt = {
- [IPSET_ADD] = bitmap_port_add,
- [IPSET_DEL] = bitmap_port_del,
- [IPSET_TEST] = bitmap_port_test,
- },
- .destroy = bitmap_port_destroy,
- .flush = bitmap_port_flush,
- .head = bitmap_port_head,
- .list = bitmap_port_list,
- .same_set = bitmap_port_same_set,
+/* Plain variant */
+
+struct bitmap_port_elem {
};
-static const struct ip_set_type_variant bitmap_tport = {
- .kadt = bitmap_port_kadt,
- .uadt = bitmap_port_uadt,
- .adt = {
- [IPSET_ADD] = bitmap_port_tadd,
- [IPSET_DEL] = bitmap_port_tdel,
- [IPSET_TEST] = bitmap_port_ttest,
- },
- .destroy = bitmap_port_destroy,
- .flush = bitmap_port_flush,
- .head = bitmap_port_head,
- .list = bitmap_port_tlist,
- .same_set = bitmap_port_same_set,
+/* Timeout variant */
+
+struct bitmap_portt_elem {
+ unsigned long timeout;
};
-static void
-bitmap_port_gc(unsigned long ul_set)
-{
- struct ip_set *set = (struct ip_set *) ul_set;
- struct bitmap_port *map = set->data;
- unsigned long *table = map->members;
- u32 id; /* wraparound */
- u16 last = map->last_port - map->first_port;
-
- /* We run parallel with other readers (test element)
- * but adding/deleting new entries is locked out */
- read_lock_bh(&set->lock);
- for (id = 0; id <= last; id++)
- if (ip_set_timeout_expired(table[id]))
- table[id] = IPSET_ELEM_UNSET;
- read_unlock_bh(&set->lock);
-
- map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
- add_timer(&map->gc);
-}
+/* Plain variant with counter */
-static void
-bitmap_port_gc_init(struct ip_set *set)
-{
- struct bitmap_port *map = set->data;
+struct bitmap_portc_elem {
+ struct ip_set_counter counter;
+};
- init_timer(&map->gc);
- map->gc.data = (unsigned long) set;
- map->gc.function = bitmap_port_gc;
- map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
- add_timer(&map->gc);
-}
+/* Timeout variant with counter */
+
+struct bitmap_portct_elem {
+ unsigned long timeout;
+ struct ip_set_counter counter;
+};
+
+#include "ip_set_bitmap_gen.h"
/* Create bitmap:ip type of sets */
@@ -423,6 +230,13 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
map->members = ip_set_alloc(map->memsize);
if (!map->members)
return false;
+ if (map->dsize) {
+ map->extensions = ip_set_alloc(map->dsize * map->elements);
+ if (!map->extensions) {
+ kfree(map->members);
+ return false;
+ }
+ }
map->first_port = first_port;
map->last_port = last_port;
map->timeout = IPSET_NO_TIMEOUT;
@@ -434,15 +248,16 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
}
static int
-bitmap_port_create(struct ip_set *set, struct nlattr *tb[],
- u32 flags)
+bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
{
struct bitmap_port *map;
u16 first_port, last_port;
+ u32 cadt_flags = 0;
if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
first_port = ip_set_get_h16(tb[IPSET_ATTR_PORT]);
@@ -458,28 +273,56 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- if (tb[IPSET_ATTR_TIMEOUT]) {
- map->memsize = (last_port - first_port + 1)
- * sizeof(unsigned long);
-
+ map->elements = last_port - first_port + 1;
+ map->memsize = map->elements * sizeof(unsigned long);
+ set->variant = &bitmap_port;
+ if (tb[IPSET_ATTR_CADT_FLAGS])
+ cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+ if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
+ set->extensions |= IPSET_EXT_COUNTER;
+ if (tb[IPSET_ATTR_TIMEOUT]) {
+ map->dsize = sizeof(struct bitmap_portct_elem);
+ map->offset[IPSET_OFFSET_TIMEOUT] =
+ offsetof(struct bitmap_portct_elem, timeout);
+ map->offset[IPSET_OFFSET_COUNTER] =
+ offsetof(struct bitmap_portct_elem, counter);
+ if (!init_map_port(set, map, first_port, last_port)) {
+ kfree(map);
+ return -ENOMEM;
+ }
+
+ map->timeout =
+ ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+ set->extensions |= IPSET_EXT_TIMEOUT;
+ bitmap_port_gc_init(set, bitmap_port_gc);
+ } else {
+ map->dsize = sizeof(struct bitmap_portc_elem);
+ map->offset[IPSET_OFFSET_COUNTER] =
+ offsetof(struct bitmap_portc_elem, counter);
+ if (!init_map_port(set, map, first_port, last_port)) {
+ kfree(map);
+ return -ENOMEM;
+ }
+ }
+ } else if (tb[IPSET_ATTR_TIMEOUT]) {
+ map->dsize = sizeof(struct bitmap_portt_elem);
+ map->offset[IPSET_OFFSET_TIMEOUT] =
+ offsetof(struct bitmap_portt_elem, timeout);
if (!init_map_port(set, map, first_port, last_port)) {
kfree(map);
return -ENOMEM;
}
map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
- set->variant = &bitmap_tport;
-
- bitmap_port_gc_init(set);
+ set->extensions |= IPSET_EXT_TIMEOUT;
+ bitmap_port_gc_init(set, bitmap_port_gc);
} else {
- map->memsize = bitmap_bytes(0, last_port - first_port);
- pr_debug("memsize: %zu\n", map->memsize);
+ map->dsize = 0;
if (!init_map_port(set, map, first_port, last_port)) {
kfree(map);
return -ENOMEM;
}
- set->variant = &bitmap_port;
}
return 0;
}
@@ -497,12 +340,15 @@ static struct ip_set_type bitmap_port_type = {
[IPSET_ATTR_PORT] = { .type = NLA_U16 },
[IPSET_ATTR_PORT_TO] = { .type = NLA_U16 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
},
.adt_policy = {
[IPSET_ATTR_PORT] = { .type = NLA_U16 },
[IPSET_ATTR_PORT_TO] = { .type = NLA_U16 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
+ [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
+ [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 1ba9dbc0e107..f77139007983 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1,6 +1,6 @@
/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
* Patrick Schaaf <bof@bof.de>
- * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -15,7 +15,6 @@
#include <linux/ip.h>
#include <linux/skbuff.h>
#include <linux/spinlock.h>
-#include <linux/netlink.h>
#include <linux/rculist.h>
#include <net/netlink.h>
@@ -316,6 +315,29 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
}
EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
+int
+ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
+ struct ip_set_ext *ext)
+{
+ if (tb[IPSET_ATTR_TIMEOUT]) {
+ if (!(set->extensions & IPSET_EXT_TIMEOUT))
+ return -IPSET_ERR_TIMEOUT;
+ ext->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+ }
+ if (tb[IPSET_ATTR_BYTES] || tb[IPSET_ATTR_PACKETS]) {
+ if (!(set->extensions & IPSET_EXT_COUNTER))
+ return -IPSET_ERR_COUNTER;
+ if (tb[IPSET_ATTR_BYTES])
+ ext->bytes = be64_to_cpu(nla_get_be64(
+ tb[IPSET_ATTR_BYTES]));
+ if (tb[IPSET_ATTR_PACKETS])
+ ext->packets = be64_to_cpu(nla_get_be64(
+ tb[IPSET_ATTR_PACKETS]));
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ip_set_get_extensions);
+
/*
* Creating/destroying/renaming/swapping affect the existence and
* the properties of a set. All of these can be executed from userspace
@@ -366,8 +388,7 @@ ip_set_rcu_get(ip_set_id_t index)
int
ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
- const struct xt_action_param *par,
- const struct ip_set_adt_opt *opt)
+ const struct xt_action_param *par, struct ip_set_adt_opt *opt)
{
struct ip_set *set = ip_set_rcu_get(index);
int ret = 0;
@@ -392,7 +413,7 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
ret = 1;
} else {
/* --return-nomatch: invert matched element */
- if ((opt->flags & IPSET_RETURN_NOMATCH) &&
+ if ((opt->cmdflags & IPSET_FLAG_RETURN_NOMATCH) &&
(set->type->features & IPSET_TYPE_NOMATCH) &&
(ret > 0 || ret == -ENOTEMPTY))
ret = -ret;
@@ -405,8 +426,7 @@ EXPORT_SYMBOL_GPL(ip_set_test);
int
ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
- const struct xt_action_param *par,
- const struct ip_set_adt_opt *opt)
+ const struct xt_action_param *par, struct ip_set_adt_opt *opt)
{
struct ip_set *set = ip_set_rcu_get(index);
int ret;
@@ -428,8 +448,7 @@ EXPORT_SYMBOL_GPL(ip_set_add);
int
ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
- const struct xt_action_param *par,
- const struct ip_set_adt_opt *opt)
+ const struct xt_action_param *par, struct ip_set_adt_opt *opt)
{
struct ip_set *set = ip_set_rcu_get(index);
int ret = 0;
@@ -1085,7 +1104,7 @@ static int
dump_init(struct netlink_callback *cb)
{
struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
- int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
+ int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
struct nlattr *attr = (void *)nlh + min_len;
u32 dump_type;
@@ -1301,7 +1320,7 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
struct sk_buff *skb2;
struct nlmsgerr *errmsg;
size_t payload = sizeof(*errmsg) + nlmsg_len(nlh);
- int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
+ int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
struct nlattr *cmdattr;
u32 *errline;
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
new file mode 100644
index 000000000000..57beb1762b2d
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -0,0 +1,1100 @@
+/* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _IP_SET_HASH_GEN_H
+#define _IP_SET_HASH_GEN_H
+
+#include <linux/rcupdate.h>
+#include <linux/jhash.h>
+#include <linux/netfilter/ipset/ip_set_timeout.h>
+#ifndef rcu_dereference_bh
+#define rcu_dereference_bh(p) rcu_dereference(p)
+#endif
+
+#define CONCAT(a, b) a##b
+#define TOKEN(a, b) CONCAT(a, b)
+
+/* Hashing which uses arrays to resolve clashing. The hash table is resized
+ * (doubled) when searching becomes too long.
+ * Internally jhash is used with the assumption that the size of the
+ * stored data is a multiple of sizeof(u32). If storage supports timeout,
+ * the timeout field must be the last one in the data structure - that field
+ * is ignored when computing the hash key.
+ *
+ * Readers and resizing
+ *
+ * Resizing can be triggered by userspace command only, and those
+ * are serialized by the nfnl mutex. During resizing the set is
+ * read-locked, so the only possible concurrent operations are
+ * the kernel side readers. Those must be protected by proper RCU locking.
+ */
+
+/* Number of elements to store in an initial array block */
+#define AHASH_INIT_SIZE 4
+/* Max number of elements to store in an array block */
+#define AHASH_MAX_SIZE (3*AHASH_INIT_SIZE)
+
+/* Max number of elements can be tuned */
+#ifdef IP_SET_HASH_WITH_MULTI
+#define AHASH_MAX(h) ((h)->ahash_max)
+
+static inline u8
+tune_ahash_max(u8 curr, u32 multi)
+{
+ u32 n;
+
+ if (multi < curr)
+ return curr;
+
+ n = curr + AHASH_INIT_SIZE;
+ /* Currently, at listing one hash bucket must fit into a message.
+ * Therefore we have a hard limit here.
+ */
+ return n > curr && n <= 64 ? n : curr;
+}
+#define TUNE_AHASH_MAX(h, multi) \
+ ((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi))
+#else
+#define AHASH_MAX(h) AHASH_MAX_SIZE
+#define TUNE_AHASH_MAX(h, multi)
+#endif
+
+/* A hash bucket */
+struct hbucket {
+ void *value; /* the array of the values */
+ u8 size; /* size of the array */
+ u8 pos; /* position of the first free entry */
+};
+
+/* The hash table: the table size stored here in order to make resizing easy */
+struct htable {
+ u8 htable_bits; /* size of hash table == 2^htable_bits */
+ struct hbucket bucket[0]; /* hashtable buckets */
+};
+
+#define hbucket(h, i) (&((h)->bucket[i]))
+
+/* Book-keeping of the prefixes added to the set */
+struct net_prefixes {
+ u8 cidr; /* the different cidr values in the set */
+ u32 nets; /* number of elements per cidr */
+};
+
+/* Compute the hash table size */
+static size_t
+htable_size(u8 hbits)
+{
+ size_t hsize;
+
+ /* We must fit both into u32 in jhash and size_t */
+ if (hbits > 31)
+ return 0;
+ hsize = jhash_size(hbits);
+ if ((((size_t)-1) - sizeof(struct htable))/sizeof(struct hbucket)
+ < hsize)
+ return 0;
+
+ return hsize * sizeof(struct hbucket) + sizeof(struct htable);
+}
+
+/* Compute htable_bits from the user input parameter hashsize */
+static u8
+htable_bits(u32 hashsize)
+{
+ /* Assume that hashsize == 2^htable_bits */
+ u8 bits = fls(hashsize - 1);
+ if (jhash_size(bits) != hashsize)
+ /* Round up to the first 2^n value */
+ bits = fls(hashsize);
+
+ return bits;
+}
+
+/* Destroy the hashtable part of the set */
+static void
+ahash_destroy(struct htable *t)
+{
+ struct hbucket *n;
+ u32 i;
+
+ for (i = 0; i < jhash_size(t->htable_bits); i++) {
+ n = hbucket(t, i);
+ if (n->size)
+ /* FIXME: use slab cache */
+ kfree(n->value);
+ }
+
+ ip_set_free(t);
+}
+
+static int
+hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
+{
+ if (n->pos >= n->size) {
+ void *tmp;
+
+ if (n->size >= ahash_max)
+ /* Trigger rehashing */
+ return -EAGAIN;
+
+ tmp = kzalloc((n->size + AHASH_INIT_SIZE) * dsize,
+ GFP_ATOMIC);
+ if (!tmp)
+ return -ENOMEM;
+ if (n->size) {
+ memcpy(tmp, n->value, n->size * dsize);
+ kfree(n->value);
+ }
+ n->value = tmp;
+ n->size += AHASH_INIT_SIZE;
+ }
+ return 0;
+}
+
+#ifdef IP_SET_HASH_WITH_NETS
+#ifdef IP_SET_HASH_WITH_NETS_PACKED
+/* When cidr is packed with nomatch, cidr - 1 is stored in the entry */
+#define CIDR(cidr) (cidr + 1)
+#else
+#define CIDR(cidr) (cidr)
+#endif
+
+#define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128)
+
+#ifdef IP_SET_HASH_WITH_MULTI
+#define NETS_LENGTH(family) (SET_HOST_MASK(family) + 1)
+#else
+#define NETS_LENGTH(family) SET_HOST_MASK(family)
+#endif
+
+#else
+#define NETS_LENGTH(family) 0
+#endif /* IP_SET_HASH_WITH_NETS */
+
+#define ext_timeout(e, h) \
+(unsigned long *)(((void *)(e)) + (h)->offset[IPSET_OFFSET_TIMEOUT])
+#define ext_counter(e, h) \
+(struct ip_set_counter *)(((void *)(e)) + (h)->offset[IPSET_OFFSET_COUNTER])
+
+#endif /* _IP_SET_HASH_GEN_H */
+
+/* Family dependent templates */
+
+#undef ahash_data
+#undef mtype_data_equal
+#undef mtype_do_data_match
+#undef mtype_data_set_flags
+#undef mtype_data_reset_flags
+#undef mtype_data_netmask
+#undef mtype_data_list
+#undef mtype_data_next
+#undef mtype_elem
+
+#undef mtype_add_cidr
+#undef mtype_del_cidr
+#undef mtype_ahash_memsize
+#undef mtype_flush
+#undef mtype_destroy
+#undef mtype_gc_init
+#undef mtype_same_set
+#undef mtype_kadt
+#undef mtype_uadt
+#undef mtype
+
+#undef mtype_add
+#undef mtype_del
+#undef mtype_test_cidrs
+#undef mtype_test
+#undef mtype_expire
+#undef mtype_resize
+#undef mtype_head
+#undef mtype_list
+#undef mtype_gc
+#undef mtype_gc_init
+#undef mtype_variant
+#undef mtype_data_match
+
+#undef HKEY
+
+#define mtype_data_equal TOKEN(MTYPE, _data_equal)
+#ifdef IP_SET_HASH_WITH_NETS
+#define mtype_do_data_match TOKEN(MTYPE, _do_data_match)
+#else
+#define mtype_do_data_match(d) 1
+#endif
+#define mtype_data_set_flags TOKEN(MTYPE, _data_set_flags)
+#define mtype_data_reset_flags TOKEN(MTYPE, _data_reset_flags)
+#define mtype_data_netmask TOKEN(MTYPE, _data_netmask)
+#define mtype_data_list TOKEN(MTYPE, _data_list)
+#define mtype_data_next TOKEN(MTYPE, _data_next)
+#define mtype_elem TOKEN(MTYPE, _elem)
+#define mtype_add_cidr TOKEN(MTYPE, _add_cidr)
+#define mtype_del_cidr TOKEN(MTYPE, _del_cidr)
+#define mtype_ahash_memsize TOKEN(MTYPE, _ahash_memsize)
+#define mtype_flush TOKEN(MTYPE, _flush)
+#define mtype_destroy TOKEN(MTYPE, _destroy)
+#define mtype_gc_init TOKEN(MTYPE, _gc_init)
+#define mtype_same_set TOKEN(MTYPE, _same_set)
+#define mtype_kadt TOKEN(MTYPE, _kadt)
+#define mtype_uadt TOKEN(MTYPE, _uadt)
+#define mtype MTYPE
+
+#define mtype_elem TOKEN(MTYPE, _elem)
+#define mtype_add TOKEN(MTYPE, _add)
+#define mtype_del TOKEN(MTYPE, _del)
+#define mtype_test_cidrs TOKEN(MTYPE, _test_cidrs)
+#define mtype_test TOKEN(MTYPE, _test)
+#define mtype_expire TOKEN(MTYPE, _expire)
+#define mtype_resize TOKEN(MTYPE, _resize)
+#define mtype_head TOKEN(MTYPE, _head)
+#define mtype_list TOKEN(MTYPE, _list)
+#define mtype_gc TOKEN(MTYPE, _gc)
+#define mtype_variant TOKEN(MTYPE, _variant)
+#define mtype_data_match TOKEN(MTYPE, _data_match)
+
+#ifndef HKEY_DATALEN
+#define HKEY_DATALEN sizeof(struct mtype_elem)
+#endif
+
+#define HKEY(data, initval, htable_bits) \
+(jhash2((u32 *)(data), HKEY_DATALEN/sizeof(u32), initval) \
+ & jhash_mask(htable_bits))
+
+#ifndef htype
+#define htype HTYPE
+
+/* The generic hash structure */
+struct htype {
+ struct htable *table; /* the hash table */
+ u32 maxelem; /* max elements in the hash */
+ u32 elements; /* current element (vs timeout) */
+ u32 initval; /* random jhash init value */
+ u32 timeout; /* timeout value, if enabled */
+ size_t dsize; /* data struct size */
+ size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
+ struct timer_list gc; /* garbage collection when timeout enabled */
+ struct mtype_elem next; /* temporary storage for uadd */
+#ifdef IP_SET_HASH_WITH_MULTI
+ u8 ahash_max; /* max elements in an array block */
+#endif
+#ifdef IP_SET_HASH_WITH_NETMASK
+ u8 netmask; /* netmask value for subnets to store */
+#endif
+#ifdef IP_SET_HASH_WITH_RBTREE
+ struct rb_root rbtree;
+#endif
+#ifdef IP_SET_HASH_WITH_NETS
+ struct net_prefixes nets[0]; /* book-keeping of prefixes */
+#endif
+};
+#endif
+
+#ifdef IP_SET_HASH_WITH_NETS
+/* Network cidr size book keeping when the hash stores different
+ * sized networks */
+static void
+mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length)
+{
+ int i, j;
+
+ /* Add in increasing prefix order, so larger cidr first */
+ for (i = 0, j = -1; i < nets_length && h->nets[i].nets; i++) {
+ if (j != -1)
+ continue;
+ else if (h->nets[i].cidr < cidr)
+ j = i;
+ else if (h->nets[i].cidr == cidr) {
+ h->nets[i].nets++;
+ return;
+ }
+ }
+ if (j != -1) {
+ for (; i > j; i--) {
+ h->nets[i].cidr = h->nets[i - 1].cidr;
+ h->nets[i].nets = h->nets[i - 1].nets;
+ }
+ }
+ h->nets[i].cidr = cidr;
+ h->nets[i].nets = 1;
+}
+
+static void
+mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length)
+{
+ u8 i, j;
+
+ for (i = 0; i < nets_length - 1 && h->nets[i].cidr != cidr; i++)
+ ;
+ h->nets[i].nets--;
+
+ if (h->nets[i].nets != 0)
+ return;
+
+ for (j = i; j < nets_length - 1 && h->nets[j].nets; j++) {
+ h->nets[j].cidr = h->nets[j + 1].cidr;
+ h->nets[j].nets = h->nets[j + 1].nets;
+ }
+}
+#endif
+
+/* Calculate the actual memory size of the set data */
+static size_t
+mtype_ahash_memsize(const struct htype *h, u8 nets_length)
+{
+ u32 i;
+ struct htable *t = h->table;
+ size_t memsize = sizeof(*h)
+ + sizeof(*t)
+#ifdef IP_SET_HASH_WITH_NETS
+ + sizeof(struct net_prefixes) * nets_length
+#endif
+ + jhash_size(t->htable_bits) * sizeof(struct hbucket);
+
+ for (i = 0; i < jhash_size(t->htable_bits); i++)
+ memsize += t->bucket[i].size * h->dsize;
+
+ return memsize;
+}
+
+/* Flush a hash type of set: destroy all elements */
+static void
+mtype_flush(struct ip_set *set)
+{
+ struct htype *h = set->data;
+ struct htable *t = h->table;
+ struct hbucket *n;
+ u32 i;
+
+ for (i = 0; i < jhash_size(t->htable_bits); i++) {
+ n = hbucket(t, i);
+ if (n->size) {
+ n->size = n->pos = 0;
+ /* FIXME: use slab cache */
+ kfree(n->value);
+ }
+ }
+#ifdef IP_SET_HASH_WITH_NETS
+ memset(h->nets, 0, sizeof(struct net_prefixes)
+ * NETS_LENGTH(set->family));
+#endif
+ h->elements = 0;
+}
+
+/* Destroy a hash type of set */
+static void
+mtype_destroy(struct ip_set *set)
+{
+ struct htype *h = set->data;
+
+ if (set->extensions & IPSET_EXT_TIMEOUT)
+ del_timer_sync(&h->gc);
+
+ ahash_destroy(h->table);
+#ifdef IP_SET_HASH_WITH_RBTREE
+ rbtree_destroy(&h->rbtree);
+#endif
+ kfree(h);
+
+ set->data = NULL;
+}
+
+static void
+mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
+{
+ struct htype *h = set->data;
+
+ init_timer(&h->gc);
+ h->gc.data = (unsigned long) set;
+ h->gc.function = gc;
+ h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
+ add_timer(&h->gc);
+ pr_debug("gc initialized, run in every %u\n",
+ IPSET_GC_PERIOD(h->timeout));
+}
+
+static bool
+mtype_same_set(const struct ip_set *a, const struct ip_set *b)
+{
+ const struct htype *x = a->data;
+ const struct htype *y = b->data;
+
+ /* Resizing changes htable_bits, so we ignore it */
+ return x->maxelem == y->maxelem &&
+ x->timeout == y->timeout &&
+#ifdef IP_SET_HASH_WITH_NETMASK
+ x->netmask == y->netmask &&
+#endif
+ a->extensions == b->extensions;
+}
+
+/* Get the ith element from the array block n */
+#define ahash_data(n, i, dsize) \
+ ((struct mtype_elem *)((n)->value + ((i) * (dsize))))
+
+/* Delete expired elements from the hashtable */
+static void
+mtype_expire(struct htype *h, u8 nets_length, size_t dsize)
+{
+ struct htable *t = h->table;
+ struct hbucket *n;
+ struct mtype_elem *data;
+ u32 i;
+ int j;
+
+ for (i = 0; i < jhash_size(t->htable_bits); i++) {
+ n = hbucket(t, i);
+ for (j = 0; j < n->pos; j++) {
+ data = ahash_data(n, j, dsize);
+ if (ip_set_timeout_expired(ext_timeout(data, h))) {
+ pr_debug("expired %u/%u\n", i, j);
+#ifdef IP_SET_HASH_WITH_NETS
+ mtype_del_cidr(h, CIDR(data->cidr),
+ nets_length);
+#endif
+ if (j != n->pos - 1)
+ /* Not last one */
+ memcpy(data,
+ ahash_data(n, n->pos - 1, dsize),
+ dsize);
+ n->pos--;
+ h->elements--;
+ }
+ }
+ if (n->pos + AHASH_INIT_SIZE < n->size) {
+ void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
+ * dsize,
+ GFP_ATOMIC);
+ if (!tmp)
+ /* Still try to delete expired elements */
+ continue;
+ n->size -= AHASH_INIT_SIZE;
+ memcpy(tmp, n->value, n->size * dsize);
+ kfree(n->value);
+ n->value = tmp;
+ }
+ }
+}
+
+static void
+mtype_gc(unsigned long ul_set)
+{
+ struct ip_set *set = (struct ip_set *) ul_set;
+ struct htype *h = set->data;
+
+ pr_debug("called\n");
+ write_lock_bh(&set->lock);
+ mtype_expire(h, NETS_LENGTH(set->family), h->dsize);
+ write_unlock_bh(&set->lock);
+
+ h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
+ add_timer(&h->gc);
+}
+
+/* Resize a hash: create a new hash table with doubling the hashsize
+ * and inserting the elements to it. Repeat until we succeed or
+ * fail due to memory pressures. */
+static int
+mtype_resize(struct ip_set *set, bool retried)
+{
+ struct htype *h = set->data;
+ struct htable *t, *orig = h->table;
+ u8 htable_bits = orig->htable_bits;
+#ifdef IP_SET_HASH_WITH_NETS
+ u8 flags;
+#endif
+ struct mtype_elem *data;
+ struct mtype_elem *d;
+ struct hbucket *n, *m;
+ u32 i, j;
+ int ret;
+
+ /* Try to cleanup once */
+ if (SET_WITH_TIMEOUT(set) && !retried) {
+ i = h->elements;
+ write_lock_bh(&set->lock);
+ mtype_expire(set->data, NETS_LENGTH(set->family),
+ h->dsize);
+ write_unlock_bh(&set->lock);
+ if (h->elements < i)
+ return 0;
+ }
+
+retry:
+ ret = 0;
+ htable_bits++;
+ pr_debug("attempt to resize set %s from %u to %u, t %p\n",
+ set->name, orig->htable_bits, htable_bits, orig);
+ if (!htable_bits) {
+ /* In case we have plenty of memory :-) */
+ pr_warning("Cannot increase the hashsize of set %s further\n",
+ set->name);
+ return -IPSET_ERR_HASH_FULL;
+ }
+ t = ip_set_alloc(sizeof(*t)
+ + jhash_size(htable_bits) * sizeof(struct hbucket));
+ if (!t)
+ return -ENOMEM;
+ t->htable_bits = htable_bits;
+
+ read_lock_bh(&set->lock);
+ for (i = 0; i < jhash_size(orig->htable_bits); i++) {
+ n = hbucket(orig, i);
+ for (j = 0; j < n->pos; j++) {
+ data = ahash_data(n, j, h->dsize);
+#ifdef IP_SET_HASH_WITH_NETS
+ flags = 0;
+ mtype_data_reset_flags(data, &flags);
+#endif
+ m = hbucket(t, HKEY(data, h->initval, htable_bits));
+ ret = hbucket_elem_add(m, AHASH_MAX(h), h->dsize);
+ if (ret < 0) {
+#ifdef IP_SET_HASH_WITH_NETS
+ mtype_data_reset_flags(data, &flags);
+#endif
+ read_unlock_bh(&set->lock);
+ ahash_destroy(t);
+ if (ret == -EAGAIN)
+ goto retry;
+ return ret;
+ }
+ d = ahash_data(m, m->pos++, h->dsize);
+ memcpy(d, data, h->dsize);
+#ifdef IP_SET_HASH_WITH_NETS
+ mtype_data_reset_flags(d, &flags);
+#endif
+ }
+ }
+
+ rcu_assign_pointer(h->table, t);
+ read_unlock_bh(&set->lock);
+
+ /* Give time to other readers of the set */
+ synchronize_rcu_bh();
+
+ pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
+ orig->htable_bits, orig, t->htable_bits, t);
+ ahash_destroy(orig);
+
+ return 0;
+}
+
+/* Add an element to a hash and update the internal counters when succeeded,
+ * otherwise report the proper error code. */
+static int
+mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+ struct ip_set_ext *mext, u32 flags)
+{
+ struct htype *h = set->data;
+ struct htable *t;
+ const struct mtype_elem *d = value;
+ struct mtype_elem *data;
+ struct hbucket *n;
+ int i, ret = 0;
+ int j = AHASH_MAX(h) + 1;
+ bool flag_exist = flags & IPSET_FLAG_EXIST;
+ u32 key, multi = 0;
+
+ if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem)
+ /* FIXME: when set is full, we slow down here */
+ mtype_expire(h, NETS_LENGTH(set->family), h->dsize);
+
+ if (h->elements >= h->maxelem) {
+ if (net_ratelimit())
+ pr_warning("Set %s is full, maxelem %u reached\n",
+ set->name, h->maxelem);
+ return -IPSET_ERR_HASH_FULL;
+ }
+
+ rcu_read_lock_bh();
+ t = rcu_dereference_bh(h->table);
+ key = HKEY(value, h->initval, t->htable_bits);
+ n = hbucket(t, key);
+ for (i = 0; i < n->pos; i++) {
+ data = ahash_data(n, i, h->dsize);
+ if (mtype_data_equal(data, d, &multi)) {
+ if (flag_exist ||
+ (SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(data, h)))) {
+ /* Just the extensions could be overwritten */
+ j = i;
+ goto reuse_slot;
+ } else {
+ ret = -IPSET_ERR_EXIST;
+ goto out;
+ }
+ }
+ /* Reuse first timed out entry */
+ if (SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(data, h)) &&
+ j != AHASH_MAX(h) + 1)
+ j = i;
+ }
+reuse_slot:
+ if (j != AHASH_MAX(h) + 1) {
+ /* Fill out reused slot */
+ data = ahash_data(n, j, h->dsize);
+#ifdef IP_SET_HASH_WITH_NETS
+ mtype_del_cidr(h, CIDR(data->cidr), NETS_LENGTH(set->family));
+ mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
+#endif
+ } else {
+ /* Use/create a new slot */
+ TUNE_AHASH_MAX(h, multi);
+ ret = hbucket_elem_add(n, AHASH_MAX(h), h->dsize);
+ if (ret != 0) {
+ if (ret == -EAGAIN)
+ mtype_data_next(&h->next, d);
+ goto out;
+ }
+ data = ahash_data(n, n->pos++, h->dsize);
+#ifdef IP_SET_HASH_WITH_NETS
+ mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
+#endif
+ h->elements++;
+ }
+ memcpy(data, d, sizeof(struct mtype_elem));
+#ifdef IP_SET_HASH_WITH_NETS
+ mtype_data_set_flags(data, flags);
+#endif
+ if (SET_WITH_TIMEOUT(set))
+ ip_set_timeout_set(ext_timeout(data, h), ext->timeout);
+ if (SET_WITH_COUNTER(set))
+ ip_set_init_counter(ext_counter(data, h), ext);
+
+out:
+ rcu_read_unlock_bh();
+ return ret;
+}
+
+/* Delete an element from the hash: swap it with the last element
+ * and free up space if possible.
+ */
+static int
+mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+ struct ip_set_ext *mext, u32 flags)
+{
+ struct htype *h = set->data;
+ struct htable *t = h->table;
+ const struct mtype_elem *d = value;
+ struct mtype_elem *data;
+ struct hbucket *n;
+ int i;
+ u32 key, multi = 0;
+
+ key = HKEY(value, h->initval, t->htable_bits);
+ n = hbucket(t, key);
+ for (i = 0; i < n->pos; i++) {
+ data = ahash_data(n, i, h->dsize);
+ if (!mtype_data_equal(data, d, &multi))
+ continue;
+ if (SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(data, h)))
+ return -IPSET_ERR_EXIST;
+ if (i != n->pos - 1)
+ /* Not last one */
+ memcpy(data, ahash_data(n, n->pos - 1, h->dsize),
+ h->dsize);
+
+ n->pos--;
+ h->elements--;
+#ifdef IP_SET_HASH_WITH_NETS
+ mtype_del_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
+#endif
+ if (n->pos + AHASH_INIT_SIZE < n->size) {
+ void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
+ * h->dsize,
+ GFP_ATOMIC);
+ if (!tmp)
+ return 0;
+ n->size -= AHASH_INIT_SIZE;
+ memcpy(tmp, n->value, n->size * h->dsize);
+ kfree(n->value);
+ n->value = tmp;
+ }
+ return 0;
+ }
+
+ return -IPSET_ERR_EXIST;
+}
+
+static inline int
+mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
+ struct ip_set_ext *mext, struct ip_set *set, u32 flags)
+{
+ if (SET_WITH_COUNTER(set))
+ ip_set_update_counter(ext_counter(data,
+ (struct htype *)(set->data)),
+ ext, mext, flags);
+ return mtype_do_data_match(data);
+}
+
+#ifdef IP_SET_HASH_WITH_NETS
+/* Special test function which takes into account the different network
+ * sizes added to the set */
+static int
+mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
+ const struct ip_set_ext *ext,
+ struct ip_set_ext *mext, u32 flags)
+{
+ struct htype *h = set->data;
+ struct htable *t = h->table;
+ struct hbucket *n;
+ struct mtype_elem *data;
+ int i, j = 0;
+ u32 key, multi = 0;
+ u8 nets_length = NETS_LENGTH(set->family);
+
+ pr_debug("test by nets\n");
+ for (; j < nets_length && h->nets[j].nets && !multi; j++) {
+ mtype_data_netmask(d, h->nets[j].cidr);
+ key = HKEY(d, h->initval, t->htable_bits);
+ n = hbucket(t, key);
+ for (i = 0; i < n->pos; i++) {
+ data = ahash_data(n, i, h->dsize);
+ if (!mtype_data_equal(data, d, &multi))
+ continue;
+ if (SET_WITH_TIMEOUT(set)) {
+ if (!ip_set_timeout_expired(
+ ext_timeout(data, h)))
+ return mtype_data_match(data, ext,
+ mext, set,
+ flags);
+#ifdef IP_SET_HASH_WITH_MULTI
+ multi = 0;
+#endif
+ } else
+ return mtype_data_match(data, ext,
+ mext, set, flags);
+ }
+ }
+ return 0;
+}
+#endif
+
+/* Test whether the element is added to the set */
+static int
+mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+ struct ip_set_ext *mext, u32 flags)
+{
+ struct htype *h = set->data;
+ struct htable *t = h->table;
+ struct mtype_elem *d = value;
+ struct hbucket *n;
+ struct mtype_elem *data;
+ int i;
+ u32 key, multi = 0;
+
+#ifdef IP_SET_HASH_WITH_NETS
+ /* If we test an IP address and not a network address,
+ * try all possible network sizes */
+ if (CIDR(d->cidr) == SET_HOST_MASK(set->family))
+ return mtype_test_cidrs(set, d, ext, mext, flags);
+#endif
+
+ key = HKEY(d, h->initval, t->htable_bits);
+ n = hbucket(t, key);
+ for (i = 0; i < n->pos; i++) {
+ data = ahash_data(n, i, h->dsize);
+ if (mtype_data_equal(data, d, &multi) &&
+ !(SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(data, h))))
+ return mtype_data_match(data, ext, mext, set, flags);
+ }
+ return 0;
+}
+
+/* Reply a HEADER request: fill out the header part of the set */
+static int
+mtype_head(struct ip_set *set, struct sk_buff *skb)
+{
+ const struct htype *h = set->data;
+ struct nlattr *nested;
+ size_t memsize;
+
+ read_lock_bh(&set->lock);
+ memsize = mtype_ahash_memsize(h, NETS_LENGTH(set->family));
+ read_unlock_bh(&set->lock);
+
+ nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+ if (!nested)
+ goto nla_put_failure;
+ if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE,
+ htonl(jhash_size(h->table->htable_bits))) ||
+ nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem)))
+ goto nla_put_failure;
+#ifdef IP_SET_HASH_WITH_NETMASK
+ if (h->netmask != HOST_MASK &&
+ nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask))
+ goto nla_put_failure;
+#endif
+ if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
+ nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
+ ((set->extensions & IPSET_EXT_TIMEOUT) &&
+ nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout))) ||
+ ((set->extensions & IPSET_EXT_COUNTER) &&
+ nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS,
+ htonl(IPSET_FLAG_WITH_COUNTERS))))
+ goto nla_put_failure;
+ ipset_nest_end(skb, nested);
+
+ return 0;
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+/* Reply a LIST/SAVE request: dump the elements of the specified set */
+static int
+mtype_list(const struct ip_set *set,
+ struct sk_buff *skb, struct netlink_callback *cb)
+{
+ const struct htype *h = set->data;
+ const struct htable *t = h->table;
+ struct nlattr *atd, *nested;
+ const struct hbucket *n;
+ const struct mtype_elem *e;
+ u32 first = cb->args[2];
+ /* We assume that one hash bucket fills into one page */
+ void *incomplete;
+ int i;
+
+ atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
+ if (!atd)
+ return -EMSGSIZE;
+ pr_debug("list hash set %s\n", set->name);
+ for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) {
+ incomplete = skb_tail_pointer(skb);
+ n = hbucket(t, cb->args[2]);
+ pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n);
+ for (i = 0; i < n->pos; i++) {
+ e = ahash_data(n, i, h->dsize);
+ if (SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(e, h)))
+ continue;
+ pr_debug("list hash %lu hbucket %p i %u, data %p\n",
+ cb->args[2], n, i, e);
+ nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+ if (!nested) {
+ if (cb->args[2] == first) {
+ nla_nest_cancel(skb, atd);
+ return -EMSGSIZE;
+ } else
+ goto nla_put_failure;
+ }
+ if (mtype_data_list(skb, e))
+ goto nla_put_failure;
+ if (SET_WITH_TIMEOUT(set) &&
+ nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
+ htonl(ip_set_timeout_get(
+ ext_timeout(e, h)))))
+ goto nla_put_failure;
+ if (SET_WITH_COUNTER(set) &&
+ ip_set_put_counter(skb, ext_counter(e, h)))
+ goto nla_put_failure;
+ ipset_nest_end(skb, nested);
+ }
+ }
+ ipset_nest_end(skb, atd);
+ /* Set listing finished */
+ cb->args[2] = 0;
+
+ return 0;
+
+nla_put_failure:
+ nlmsg_trim(skb, incomplete);
+ ipset_nest_end(skb, atd);
+ if (unlikely(first == cb->args[2])) {
+ pr_warning("Can't list set %s: one bucket does not fit into "
+ "a message. Please report it!\n", set->name);
+ cb->args[2] = 0;
+ return -EMSGSIZE;
+ }
+ return 0;
+}
+
+static int
+TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb,
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt);
+
+static int
+TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[],
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried);
+
+static const struct ip_set_type_variant mtype_variant = {
+ .kadt = mtype_kadt,
+ .uadt = mtype_uadt,
+ .adt = {
+ [IPSET_ADD] = mtype_add,
+ [IPSET_DEL] = mtype_del,
+ [IPSET_TEST] = mtype_test,
+ },
+ .destroy = mtype_destroy,
+ .flush = mtype_flush,
+ .head = mtype_head,
+ .list = mtype_list,
+ .resize = mtype_resize,
+ .same_set = mtype_same_set,
+};
+
+#ifdef IP_SET_EMIT_CREATE
+static int
+TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
+{
+ u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
+ u32 cadt_flags = 0;
+ u8 hbits;
+#ifdef IP_SET_HASH_WITH_NETMASK
+ u8 netmask;
+#endif
+ size_t hsize;
+ struct HTYPE *h;
+
+ if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
+ return -IPSET_ERR_INVALID_FAMILY;
+#ifdef IP_SET_HASH_WITH_NETMASK
+ netmask = set->family == NFPROTO_IPV4 ? 32 : 128;
+ pr_debug("Create set %s with family %s\n",
+ set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6");
+#endif
+
+ if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+ return -IPSET_ERR_PROTOCOL;
+
+ if (tb[IPSET_ATTR_HASHSIZE]) {
+ hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
+ if (hashsize < IPSET_MIMINAL_HASHSIZE)
+ hashsize = IPSET_MIMINAL_HASHSIZE;
+ }
+
+ if (tb[IPSET_ATTR_MAXELEM])
+ maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
+
+#ifdef IP_SET_HASH_WITH_NETMASK
+ if (tb[IPSET_ATTR_NETMASK]) {
+ netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
+
+ if ((set->family == NFPROTO_IPV4 && netmask > 32) ||
+ (set->family == NFPROTO_IPV6 && netmask > 128) ||
+ netmask == 0)
+ return -IPSET_ERR_INVALID_NETMASK;
+ }
+#endif
+
+ hsize = sizeof(*h);
+#ifdef IP_SET_HASH_WITH_NETS
+ hsize += sizeof(struct net_prefixes) *
+ (set->family == NFPROTO_IPV4 ? 32 : 128);
+#endif
+ h = kzalloc(hsize, GFP_KERNEL);
+ if (!h)
+ return -ENOMEM;
+
+ h->maxelem = maxelem;
+#ifdef IP_SET_HASH_WITH_NETMASK
+ h->netmask = netmask;
+#endif
+ get_random_bytes(&h->initval, sizeof(h->initval));
+ h->timeout = IPSET_NO_TIMEOUT;
+
+ hbits = htable_bits(hashsize);
+ hsize = htable_size(hbits);
+ if (hsize == 0) {
+ kfree(h);
+ return -ENOMEM;
+ }
+ h->table = ip_set_alloc(hsize);
+ if (!h->table) {
+ kfree(h);
+ return -ENOMEM;
+ }
+ h->table->htable_bits = hbits;
+
+ set->data = h;
+ if (set->family == NFPROTO_IPV4)
+ set->variant = &TOKEN(HTYPE, 4_variant);
+ else
+ set->variant = &TOKEN(HTYPE, 6_variant);
+
+ if (tb[IPSET_ATTR_CADT_FLAGS])
+ cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+ if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
+ set->extensions |= IPSET_EXT_COUNTER;
+ if (tb[IPSET_ATTR_TIMEOUT]) {
+ h->timeout =
+ ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+ set->extensions |= IPSET_EXT_TIMEOUT;
+ if (set->family == NFPROTO_IPV4) {
+ h->dsize =
+ sizeof(struct TOKEN(HTYPE, 4ct_elem));
+ h->offset[IPSET_OFFSET_TIMEOUT] =
+ offsetof(struct TOKEN(HTYPE, 4ct_elem),
+ timeout);
+ h->offset[IPSET_OFFSET_COUNTER] =
+ offsetof(struct TOKEN(HTYPE, 4ct_elem),
+ counter);
+ TOKEN(HTYPE, 4_gc_init)(set,
+ TOKEN(HTYPE, 4_gc));
+ } else {
+ h->dsize =
+ sizeof(struct TOKEN(HTYPE, 6ct_elem));
+ h->offset[IPSET_OFFSET_TIMEOUT] =
+ offsetof(struct TOKEN(HTYPE, 6ct_elem),
+ timeout);
+ h->offset[IPSET_OFFSET_COUNTER] =
+ offsetof(struct TOKEN(HTYPE, 6ct_elem),
+ counter);
+ TOKEN(HTYPE, 6_gc_init)(set,
+ TOKEN(HTYPE, 6_gc));
+ }
+ } else {
+ if (set->family == NFPROTO_IPV4) {
+ h->dsize =
+ sizeof(struct TOKEN(HTYPE, 4c_elem));
+ h->offset[IPSET_OFFSET_COUNTER] =
+ offsetof(struct TOKEN(HTYPE, 4c_elem),
+ counter);
+ } else {
+ h->dsize =
+ sizeof(struct TOKEN(HTYPE, 6c_elem));
+ h->offset[IPSET_OFFSET_COUNTER] =
+ offsetof(struct TOKEN(HTYPE, 6c_elem),
+ counter);
+ }
+ }
+ } else if (tb[IPSET_ATTR_TIMEOUT]) {
+ h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+ set->extensions |= IPSET_EXT_TIMEOUT;
+ if (set->family == NFPROTO_IPV4) {
+ h->dsize = sizeof(struct TOKEN(HTYPE, 4t_elem));
+ h->offset[IPSET_OFFSET_TIMEOUT] =
+ offsetof(struct TOKEN(HTYPE, 4t_elem),
+ timeout);
+ TOKEN(HTYPE, 4_gc_init)(set, TOKEN(HTYPE, 4_gc));
+ } else {
+ h->dsize = sizeof(struct TOKEN(HTYPE, 6t_elem));
+ h->offset[IPSET_OFFSET_TIMEOUT] =
+ offsetof(struct TOKEN(HTYPE, 6t_elem),
+ timeout);
+ TOKEN(HTYPE, 6_gc_init)(set, TOKEN(HTYPE, 6_gc));
+ }
+ } else {
+ if (set->family == NFPROTO_IPV4)
+ h->dsize = sizeof(struct TOKEN(HTYPE, 4_elem));
+ else
+ h->dsize = sizeof(struct TOKEN(HTYPE, 6_elem));
+ }
+
+ pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
+ set->name, jhash_size(h->table->htable_bits),
+ h->table->htable_bits, h->maxelem, set->data, h->table);
+
+ return 0;
+}
+#endif /* IP_SET_EMIT_CREATE */
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index b7d4cb475ae6..c74e6e14cd93 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -21,11 +21,10 @@
#include <linux/netfilter.h>
#include <linux/netfilter/ipset/pfxlen.h>
#include <linux/netfilter/ipset/ip_set.h>
-#include <linux/netfilter/ipset/ip_set_timeout.h>
#include <linux/netfilter/ipset/ip_set_hash.h>
#define REVISION_MIN 0
-#define REVISION_MAX 0
+#define REVISION_MAX 1 /* Counters support */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -33,58 +32,47 @@ IP_SET_MODULE_DESC("hash:ip", REVISION_MIN, REVISION_MAX);
MODULE_ALIAS("ip_set_hash:ip");
/* Type specific function prefix */
-#define TYPE hash_ip
-
-static bool
-hash_ip_same_set(const struct ip_set *a, const struct ip_set *b);
-
-#define hash_ip4_same_set hash_ip_same_set
-#define hash_ip6_same_set hash_ip_same_set
+#define HTYPE hash_ip
+#define IP_SET_HASH_WITH_NETMASK
-/* The type variant functions: IPv4 */
+/* IPv4 variants */
-/* Member elements without timeout */
+/* Member elements */
struct hash_ip4_elem {
+ /* Zero valued IP addresses cannot be stored */
__be32 ip;
};
-/* Member elements with timeout support */
-struct hash_ip4_telem {
+struct hash_ip4t_elem {
__be32 ip;
unsigned long timeout;
};
-static inline bool
-hash_ip4_data_equal(const struct hash_ip4_elem *ip1,
- const struct hash_ip4_elem *ip2,
- u32 *multi)
-{
- return ip1->ip == ip2->ip;
-}
+struct hash_ip4c_elem {
+ __be32 ip;
+ struct ip_set_counter counter;
+};
-static inline bool
-hash_ip4_data_isnull(const struct hash_ip4_elem *elem)
-{
- return elem->ip == 0;
-}
+struct hash_ip4ct_elem {
+ __be32 ip;
+ struct ip_set_counter counter;
+ unsigned long timeout;
+};
-static inline void
-hash_ip4_data_copy(struct hash_ip4_elem *dst, const struct hash_ip4_elem *src)
-{
- dst->ip = src->ip;
-}
+/* Common functions */
-/* Zero valued IP addresses cannot be stored */
-static inline void
-hash_ip4_data_zero_out(struct hash_ip4_elem *elem)
+static inline bool
+hash_ip4_data_equal(const struct hash_ip4_elem *e1,
+ const struct hash_ip4_elem *e2,
+ u32 *multi)
{
- elem->ip = 0;
+ return e1->ip == e2->ip;
}
static inline bool
-hash_ip4_data_list(struct sk_buff *skb, const struct hash_ip4_elem *data)
+hash_ip4_data_list(struct sk_buff *skb, const struct hash_ip4_elem *e)
{
- if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip))
+ if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, e->ip))
goto nla_put_failure;
return 0;
@@ -92,41 +80,26 @@ nla_put_failure:
return 1;
}
-static bool
-hash_ip4_data_tlist(struct sk_buff *skb, const struct hash_ip4_elem *data)
+static inline void
+hash_ip4_data_next(struct hash_ip4_elem *next, const struct hash_ip4_elem *e)
{
- const struct hash_ip4_telem *tdata =
- (const struct hash_ip4_telem *)data;
-
- if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) ||
- nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
- htonl(ip_set_timeout_get(tdata->timeout))))
- goto nla_put_failure;
-
- return 0;
-
-nla_put_failure:
- return 1;
+ next->ip = e->ip;
}
-#define IP_SET_HASH_WITH_NETMASK
+#define MTYPE hash_ip4
#define PF 4
#define HOST_MASK 32
-#include <linux/netfilter/ipset/ip_set_ahash.h>
-
-static inline void
-hash_ip4_data_next(struct ip_set_hash *h, const struct hash_ip4_elem *d)
-{
- h->next.ip = d->ip;
-}
+#include "ip_set_hash_gen.h"
static int
hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
- enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct ip_set_hash *h = set->data;
+ const struct hash_ip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_ip4_elem e = {};
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
__be32 ip;
ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &ip);
@@ -134,43 +107,42 @@ hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb,
if (ip == 0)
return -EINVAL;
- return adtfn(set, &ip, opt_timeout(opt, h), opt->cmdflags);
+ e.ip = ip;
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
static int
hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct ip_set_hash *h = set->data;
+ const struct hash_ip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- u32 ip, ip_to, hosts, timeout = h->timeout;
- __be32 nip;
+ struct hash_ip4_elem e = {};
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+ u32 ip, ip_to, hosts;
int ret = 0;
if (unlikely(!tb[IPSET_ATTR_IP] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
+ ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
ip &= ip_set_hostmask(h->netmask);
- if (tb[IPSET_ATTR_TIMEOUT]) {
- if (!with_timeout(h->timeout))
- return -IPSET_ERR_TIMEOUT;
- timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
- }
-
if (adt == IPSET_TEST) {
- nip = htonl(ip);
- if (nip == 0)
+ e.ip = htonl(ip);
+ if (e.ip == 0)
return -IPSET_ERR_HASH_ELEM;
- return adtfn(set, &nip, timeout, flags);
+ return adtfn(set, &e, &ext, &ext, flags);
}
ip_to = ip;
@@ -193,10 +165,10 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
ip = ntohl(h->next.ip);
for (; !before(ip_to, ip); ip += hosts) {
- nip = htonl(ip);
- if (nip == 0)
+ e.ip = htonl(ip);
+ if (e.ip == 0)
return -IPSET_ERR_HASH_ELEM;
- ret = adtfn(set, &nip, timeout, flags);
+ ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
@@ -206,29 +178,31 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
}
-static bool
-hash_ip_same_set(const struct ip_set *a, const struct ip_set *b)
-{
- const struct ip_set_hash *x = a->data;
- const struct ip_set_hash *y = b->data;
+/* IPv6 variants */
- /* Resizing changes htable_bits, so we ignore it */
- return x->maxelem == y->maxelem &&
- x->timeout == y->timeout &&
- x->netmask == y->netmask;
-}
+/* Member elements */
+struct hash_ip6_elem {
+ union nf_inet_addr ip;
+};
-/* The type variant functions: IPv6 */
+struct hash_ip6t_elem {
+ union nf_inet_addr ip;
+ unsigned long timeout;
+};
-struct hash_ip6_elem {
+struct hash_ip6c_elem {
union nf_inet_addr ip;
+ struct ip_set_counter counter;
};
-struct hash_ip6_telem {
+struct hash_ip6ct_elem {
union nf_inet_addr ip;
+ struct ip_set_counter counter;
unsigned long timeout;
};
+/* Common functions */
+
static inline bool
hash_ip6_data_equal(const struct hash_ip6_elem *ip1,
const struct hash_ip6_elem *ip2,
@@ -237,37 +211,16 @@ hash_ip6_data_equal(const struct hash_ip6_elem *ip1,
return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6);
}
-static inline bool
-hash_ip6_data_isnull(const struct hash_ip6_elem *elem)
-{
- return ipv6_addr_any(&elem->ip.in6);
-}
-
static inline void
-hash_ip6_data_copy(struct hash_ip6_elem *dst, const struct hash_ip6_elem *src)
+hash_ip6_netmask(union nf_inet_addr *ip, u8 prefix)
{
- dst->ip.in6 = src->ip.in6;
-}
-
-static inline void
-hash_ip6_data_zero_out(struct hash_ip6_elem *elem)
-{
- ipv6_addr_set(&elem->ip.in6, 0, 0, 0, 0);
-}
-
-static inline void
-ip6_netmask(union nf_inet_addr *ip, u8 prefix)
-{
- ip->ip6[0] &= ip_set_netmask6(prefix)[0];
- ip->ip6[1] &= ip_set_netmask6(prefix)[1];
- ip->ip6[2] &= ip_set_netmask6(prefix)[2];
- ip->ip6[3] &= ip_set_netmask6(prefix)[3];
+ ip6_netmask(ip, prefix);
}
static bool
-hash_ip6_data_list(struct sk_buff *skb, const struct hash_ip6_elem *data)
+hash_ip6_data_list(struct sk_buff *skb, const struct hash_ip6_elem *e)
{
- if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6))
+ if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6))
goto nla_put_failure;
return 0;
@@ -275,69 +228,55 @@ nla_put_failure:
return 1;
}
-static bool
-hash_ip6_data_tlist(struct sk_buff *skb, const struct hash_ip6_elem *data)
+static inline void
+hash_ip6_data_next(struct hash_ip4_elem *next, const struct hash_ip6_elem *e)
{
- const struct hash_ip6_telem *e =
- (const struct hash_ip6_telem *)data;
-
- if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) ||
- nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
- htonl(ip_set_timeout_get(e->timeout))))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return 1;
}
+#undef MTYPE
#undef PF
#undef HOST_MASK
+#undef HKEY_DATALEN
+#define MTYPE hash_ip6
#define PF 6
#define HOST_MASK 128
-#include <linux/netfilter/ipset/ip_set_ahash.h>
-static inline void
-hash_ip6_data_next(struct ip_set_hash *h, const struct hash_ip6_elem *d)
-{
-}
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
static int
hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
- enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct ip_set_hash *h = set->data;
+ const struct hash_ip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- union nf_inet_addr ip;
+ struct hash_ip6_elem e = {};
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
- ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &ip.in6);
- ip6_netmask(&ip, h->netmask);
- if (ipv6_addr_any(&ip.in6))
+ ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+ hash_ip6_netmask(&e.ip, h->netmask);
+ if (ipv6_addr_any(&e.ip.in6))
return -EINVAL;
- return adtfn(set, &ip, opt_timeout(opt, h), opt->cmdflags);
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
-static const struct nla_policy hash_ip6_adt_policy[IPSET_ATTR_ADT_MAX + 1] = {
- [IPSET_ATTR_IP] = { .type = NLA_NESTED },
- [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
- [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
-};
-
static int
hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct ip_set_hash *h = set->data;
+ const struct hash_ip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- union nf_inet_addr ip;
- u32 timeout = h->timeout;
+ struct hash_ip6_elem e = {};
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
int ret;
if (unlikely(!tb[IPSET_ATTR_IP] ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_CIDR]))
return -IPSET_ERR_PROTOCOL;
@@ -345,110 +284,20 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &ip);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
+ ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
- ip6_netmask(&ip, h->netmask);
- if (ipv6_addr_any(&ip.in6))
+ hash_ip6_netmask(&e.ip, h->netmask);
+ if (ipv6_addr_any(&e.ip.in6))
return -IPSET_ERR_HASH_ELEM;
- if (tb[IPSET_ATTR_TIMEOUT]) {
- if (!with_timeout(h->timeout))
- return -IPSET_ERR_TIMEOUT;
- timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
- }
-
- ret = adtfn(set, &ip, timeout, flags);
+ ret = adtfn(set, &e, &ext, &ext, flags);
return ip_set_eexist(ret, flags) ? 0 : ret;
}
-/* Create hash:ip type of sets */
-
-static int
-hash_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
-{
- u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
- u8 netmask, hbits;
- size_t hsize;
- struct ip_set_hash *h;
-
- if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
- return -IPSET_ERR_INVALID_FAMILY;
- netmask = set->family == NFPROTO_IPV4 ? 32 : 128;
- pr_debug("Create set %s with family %s\n",
- set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6");
-
- if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
- return -IPSET_ERR_PROTOCOL;
-
- if (tb[IPSET_ATTR_HASHSIZE]) {
- hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
- if (hashsize < IPSET_MIMINAL_HASHSIZE)
- hashsize = IPSET_MIMINAL_HASHSIZE;
- }
-
- if (tb[IPSET_ATTR_MAXELEM])
- maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
-
- if (tb[IPSET_ATTR_NETMASK]) {
- netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
-
- if ((set->family == NFPROTO_IPV4 && netmask > 32) ||
- (set->family == NFPROTO_IPV6 && netmask > 128) ||
- netmask == 0)
- return -IPSET_ERR_INVALID_NETMASK;
- }
-
- h = kzalloc(sizeof(*h), GFP_KERNEL);
- if (!h)
- return -ENOMEM;
-
- h->maxelem = maxelem;
- h->netmask = netmask;
- get_random_bytes(&h->initval, sizeof(h->initval));
- h->timeout = IPSET_NO_TIMEOUT;
-
- hbits = htable_bits(hashsize);
- hsize = htable_size(hbits);
- if (hsize == 0) {
- kfree(h);
- return -ENOMEM;
- }
- h->table = ip_set_alloc(hsize);
- if (!h->table) {
- kfree(h);
- return -ENOMEM;
- }
- h->table->htable_bits = hbits;
-
- set->data = h;
-
- if (tb[IPSET_ATTR_TIMEOUT]) {
- h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-
- set->variant = set->family == NFPROTO_IPV4
- ? &hash_ip4_tvariant : &hash_ip6_tvariant;
-
- if (set->family == NFPROTO_IPV4)
- hash_ip4_gc_init(set);
- else
- hash_ip6_gc_init(set);
- } else {
- set->variant = set->family == NFPROTO_IPV4
- ? &hash_ip4_variant : &hash_ip6_variant;
- }
-
- pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
- set->name, jhash_size(h->table->htable_bits),
- h->table->htable_bits, h->maxelem, set->data, h->table);
-
- return 0;
-}
-
static struct ip_set_type hash_ip_type __read_mostly = {
.name = "hash:ip",
.protocol = IPSET_PROTOCOL,
@@ -465,6 +314,7 @@ static struct ip_set_type hash_ip_type __read_mostly = {
[IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
[IPSET_ATTR_NETMASK] = { .type = NLA_U8 },
+ [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
},
.adt_policy = {
[IPSET_ATTR_IP] = { .type = NLA_NESTED },
@@ -472,6 +322,8 @@ static struct ip_set_type hash_ip_type __read_mostly = {
[IPSET_ATTR_CIDR] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
+ [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
+ [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index d8f77bacae86..7a2d2bd98d04 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -21,12 +21,12 @@
#include <linux/netfilter.h>
#include <linux/netfilter/ipset/pfxlen.h>
#include <linux/netfilter/ipset/ip_set.h>
-#include <linux/netfilter/ipset/ip_set_timeout.h>
#include <linux/netfilter/ipset/ip_set_getport.h>
#include <linux/netfilter/ipset/ip_set_hash.h>
#define REVISION_MIN 0
-#define REVISION_MAX 1 /* SCTP and UDPLITE support added */
+/* 1 SCTP and UDPLITE support added */
+#define REVISION_MAX 2 /* Counters support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -34,33 +34,45 @@ IP_SET_MODULE_DESC("hash:ip,port", REVISION_MIN, REVISION_MAX);
MODULE_ALIAS("ip_set_hash:ip,port");
/* Type specific function prefix */
-#define TYPE hash_ipport
+#define HTYPE hash_ipport
-static bool
-hash_ipport_same_set(const struct ip_set *a, const struct ip_set *b);
+/* IPv4 variants */
-#define hash_ipport4_same_set hash_ipport_same_set
-#define hash_ipport6_same_set hash_ipport_same_set
+/* Member elements */
+struct hash_ipport4_elem {
+ __be32 ip;
+ __be16 port;
+ u8 proto;
+ u8 padding;
+};
-/* The type variant functions: IPv4 */
+struct hash_ipport4t_elem {
+ __be32 ip;
+ __be16 port;
+ u8 proto;
+ u8 padding;
+ unsigned long timeout;
+};
-/* Member elements without timeout */
-struct hash_ipport4_elem {
+struct hash_ipport4c_elem {
__be32 ip;
__be16 port;
u8 proto;
u8 padding;
+ struct ip_set_counter counter;
};
-/* Member elements with timeout support */
-struct hash_ipport4_telem {
+struct hash_ipport4ct_elem {
__be32 ip;
__be16 port;
u8 proto;
u8 padding;
+ struct ip_set_counter counter;
unsigned long timeout;
};
+/* Common functions */
+
static inline bool
hash_ipport4_data_equal(const struct hash_ipport4_elem *ip1,
const struct hash_ipport4_elem *ip2,
@@ -71,27 +83,6 @@ hash_ipport4_data_equal(const struct hash_ipport4_elem *ip1,
ip1->proto == ip2->proto;
}
-static inline bool
-hash_ipport4_data_isnull(const struct hash_ipport4_elem *elem)
-{
- return elem->proto == 0;
-}
-
-static inline void
-hash_ipport4_data_copy(struct hash_ipport4_elem *dst,
- const struct hash_ipport4_elem *src)
-{
- dst->ip = src->ip;
- dst->port = src->port;
- dst->proto = src->proto;
-}
-
-static inline void
-hash_ipport4_data_zero_out(struct hash_ipport4_elem *elem)
-{
- elem->proto = 0;
-}
-
static bool
hash_ipport4_data_list(struct sk_buff *skb,
const struct hash_ipport4_elem *data)
@@ -106,111 +97,91 @@ nla_put_failure:
return 1;
}
-static bool
-hash_ipport4_data_tlist(struct sk_buff *skb,
- const struct hash_ipport4_elem *data)
-{
- const struct hash_ipport4_telem *tdata =
- (const struct hash_ipport4_telem *)data;
-
- if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) ||
- nla_put_net16(skb, IPSET_ATTR_PORT, tdata->port) ||
- nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
- nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
- htonl(ip_set_timeout_get(tdata->timeout))))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return 1;
-}
-
-#define PF 4
-#define HOST_MASK 32
-#include <linux/netfilter/ipset/ip_set_ahash.h>
-
static inline void
-hash_ipport4_data_next(struct ip_set_hash *h,
+hash_ipport4_data_next(struct hash_ipport4_elem *next,
const struct hash_ipport4_elem *d)
{
- h->next.ip = d->ip;
- h->next.port = d->port;
+ next->ip = d->ip;
+ next->port = d->port;
}
+#define MTYPE hash_ipport4
+#define PF 4
+#define HOST_MASK 32
+#define HKEY_DATALEN sizeof(struct hash_ipport4_elem)
+#include "ip_set_hash_gen.h"
+
static int
hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
- enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct ip_set_hash *h = set->data;
+ const struct hash_ipport *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipport4_elem data = { };
+ struct hash_ipport4_elem e = { };
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
- &data.port, &data.proto))
+ &e.port, &e.proto))
return -EINVAL;
- ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip);
-
- return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
+ ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
static int
hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct ip_set_hash *h = set->data;
+ const struct hash_ipport *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipport4_elem data = { };
+ struct hash_ipport4_elem e = { };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
u32 ip, ip_to, p = 0, port, port_to;
- u32 timeout = h->timeout;
bool with_ports = false;
int ret;
if (unlikely(!tb[IPSET_ATTR_IP] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip);
+ ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) ||
+ ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
if (tb[IPSET_ATTR_PORT])
- data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
else
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_PROTO]) {
- data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
- with_ports = ip_set_proto_with_ports(data.proto);
+ e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+ with_ports = ip_set_proto_with_ports(e.proto);
- if (data.proto == 0)
+ if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
} else
return -IPSET_ERR_MISSING_PROTO;
- if (!(with_ports || data.proto == IPPROTO_ICMP))
- data.port = 0;
-
- if (tb[IPSET_ATTR_TIMEOUT]) {
- if (!with_timeout(h->timeout))
- return -IPSET_ERR_TIMEOUT;
- timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
- }
+ if (!(with_ports || e.proto == IPPROTO_ICMP))
+ e.port = 0;
if (adt == IPSET_TEST ||
!(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||
tb[IPSET_ATTR_PORT_TO])) {
- ret = adtfn(set, &data, timeout, flags);
+ ret = adtfn(set, &e, &ext, &ext, flags);
return ip_set_eexist(ret, flags) ? 0 : ret;
}
- ip_to = ip = ntohl(data.ip);
+ ip_to = ip = ntohl(e.ip);
if (tb[IPSET_ATTR_IP_TO]) {
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
if (ret)
@@ -225,7 +196,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
ip_set_mask_from_to(ip, ip_to, cidr);
}
- port_to = port = ntohs(data.port);
+ port_to = port = ntohs(e.port);
if (with_ports && tb[IPSET_ATTR_PORT_TO]) {
port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
if (port > port_to)
@@ -238,9 +209,9 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
: port;
for (; p <= port_to; p++) {
- data.ip = htonl(ip);
- data.port = htons(p);
- ret = adtfn(set, &data, timeout, flags);
+ e.ip = htonl(ip);
+ e.port = htons(p);
+ ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
@@ -251,34 +222,42 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
}
-static bool
-hash_ipport_same_set(const struct ip_set *a, const struct ip_set *b)
-{
- const struct ip_set_hash *x = a->data;
- const struct ip_set_hash *y = b->data;
+/* IPv6 variants */
- /* Resizing changes htable_bits, so we ignore it */
- return x->maxelem == y->maxelem &&
- x->timeout == y->timeout;
-}
+struct hash_ipport6_elem {
+ union nf_inet_addr ip;
+ __be16 port;
+ u8 proto;
+ u8 padding;
+};
-/* The type variant functions: IPv6 */
+struct hash_ipport6t_elem {
+ union nf_inet_addr ip;
+ __be16 port;
+ u8 proto;
+ u8 padding;
+ unsigned long timeout;
+};
-struct hash_ipport6_elem {
+struct hash_ipport6c_elem {
union nf_inet_addr ip;
__be16 port;
u8 proto;
u8 padding;
+ struct ip_set_counter counter;
};
-struct hash_ipport6_telem {
+struct hash_ipport6ct_elem {
union nf_inet_addr ip;
__be16 port;
u8 proto;
u8 padding;
+ struct ip_set_counter counter;
unsigned long timeout;
};
+/* Common functions */
+
static inline bool
hash_ipport6_data_equal(const struct hash_ipport6_elem *ip1,
const struct hash_ipport6_elem *ip2,
@@ -289,25 +268,6 @@ hash_ipport6_data_equal(const struct hash_ipport6_elem *ip1,
ip1->proto == ip2->proto;
}
-static inline bool
-hash_ipport6_data_isnull(const struct hash_ipport6_elem *elem)
-{
- return elem->proto == 0;
-}
-
-static inline void
-hash_ipport6_data_copy(struct hash_ipport6_elem *dst,
- const struct hash_ipport6_elem *src)
-{
- memcpy(dst, src, sizeof(*dst));
-}
-
-static inline void
-hash_ipport6_data_zero_out(struct hash_ipport6_elem *elem)
-{
- elem->proto = 0;
-}
-
static bool
hash_ipport6_data_list(struct sk_buff *skb,
const struct hash_ipport6_elem *data)
@@ -322,66 +282,52 @@ nla_put_failure:
return 1;
}
-static bool
-hash_ipport6_data_tlist(struct sk_buff *skb,
- const struct hash_ipport6_elem *data)
+static inline void
+hash_ipport6_data_next(struct hash_ipport4_elem *next,
+ const struct hash_ipport6_elem *d)
{
- const struct hash_ipport6_telem *e =
- (const struct hash_ipport6_telem *)data;
-
- if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) ||
- nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
- nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
- nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
- htonl(ip_set_timeout_get(e->timeout))))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return 1;
+ next->port = d->port;
}
+#undef MTYPE
#undef PF
#undef HOST_MASK
+#undef HKEY_DATALEN
+#define MTYPE hash_ipport6
#define PF 6
#define HOST_MASK 128
-#include <linux/netfilter/ipset/ip_set_ahash.h>
-
-static inline void
-hash_ipport6_data_next(struct ip_set_hash *h,
- const struct hash_ipport6_elem *d)
-{
- h->next.port = d->port;
-}
+#define HKEY_DATALEN sizeof(struct hash_ipport6_elem)
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
static int
hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
- enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct ip_set_hash *h = set->data;
+ const struct hash_ipport *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipport6_elem data = { };
+ struct hash_ipport6_elem e = { };
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
- &data.port, &data.proto))
+ &e.port, &e.proto))
return -EINVAL;
- ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6);
-
- return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
+ ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
static int
hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct ip_set_hash *h = set->data;
+ const struct hash_ipport *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipport6_elem data = { };
+ struct hash_ipport6_elem e = { };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
u32 port, port_to;
- u32 timeout = h->timeout;
bool with_ports = false;
int ret;
@@ -389,6 +335,8 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_CIDR]))
return -IPSET_ERR_PROTOCOL;
@@ -396,39 +344,34 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
+ ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
if (tb[IPSET_ATTR_PORT])
- data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
else
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_PROTO]) {
- data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
- with_ports = ip_set_proto_with_ports(data.proto);
+ e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+ with_ports = ip_set_proto_with_ports(e.proto);
- if (data.proto == 0)
+ if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
} else
return -IPSET_ERR_MISSING_PROTO;
- if (!(with_ports || data.proto == IPPROTO_ICMPV6))
- data.port = 0;
-
- if (tb[IPSET_ATTR_TIMEOUT]) {
- if (!with_timeout(h->timeout))
- return -IPSET_ERR_TIMEOUT;
- timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
- }
+ if (!(with_ports || e.proto == IPPROTO_ICMPV6))
+ e.port = 0;
if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
- ret = adtfn(set, &data, timeout, flags);
+ ret = adtfn(set, &e, &ext, &ext, flags);
return ip_set_eexist(ret, flags) ? 0 : ret;
}
- port = ntohs(data.port);
+ port = ntohs(e.port);
port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
if (port > port_to)
swap(port, port_to);
@@ -436,8 +379,8 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
port = ntohs(h->next.port);
for (; port <= port_to; port++) {
- data.port = htons(port);
- ret = adtfn(set, &data, timeout, flags);
+ e.port = htons(port);
+ ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
@@ -447,78 +390,6 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
}
-/* Create hash:ip type of sets */
-
-static int
-hash_ipport_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
-{
- struct ip_set_hash *h;
- u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
- u8 hbits;
- size_t hsize;
-
- if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
- return -IPSET_ERR_INVALID_FAMILY;
-
- if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
- return -IPSET_ERR_PROTOCOL;
-
- if (tb[IPSET_ATTR_HASHSIZE]) {
- hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
- if (hashsize < IPSET_MIMINAL_HASHSIZE)
- hashsize = IPSET_MIMINAL_HASHSIZE;
- }
-
- if (tb[IPSET_ATTR_MAXELEM])
- maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
-
- h = kzalloc(sizeof(*h), GFP_KERNEL);
- if (!h)
- return -ENOMEM;
-
- h->maxelem = maxelem;
- get_random_bytes(&h->initval, sizeof(h->initval));
- h->timeout = IPSET_NO_TIMEOUT;
-
- hbits = htable_bits(hashsize);
- hsize = htable_size(hbits);
- if (hsize == 0) {
- kfree(h);
- return -ENOMEM;
- }
- h->table = ip_set_alloc(hsize);
- if (!h->table) {
- kfree(h);
- return -ENOMEM;
- }
- h->table->htable_bits = hbits;
-
- set->data = h;
-
- if (tb[IPSET_ATTR_TIMEOUT]) {
- h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-
- set->variant = set->family == NFPROTO_IPV4
- ? &hash_ipport4_tvariant : &hash_ipport6_tvariant;
-
- if (set->family == NFPROTO_IPV4)
- hash_ipport4_gc_init(set);
- else
- hash_ipport6_gc_init(set);
- } else {
- set->variant = set->family == NFPROTO_IPV4
- ? &hash_ipport4_variant : &hash_ipport6_variant;
- }
-
- pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
- set->name, jhash_size(h->table->htable_bits),
- h->table->htable_bits, h->maxelem, set->data, h->table);
-
- return 0;
-}
-
static struct ip_set_type hash_ipport_type __read_mostly = {
.name = "hash:ip,port",
.protocol = IPSET_PROTOCOL,
@@ -535,6 +406,7 @@ static struct ip_set_type hash_ipport_type __read_mostly = {
[IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
[IPSET_ATTR_PROTO] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
},
.adt_policy = {
[IPSET_ATTR_IP] = { .type = NLA_NESTED },
@@ -545,6 +417,8 @@ static struct ip_set_type hash_ipport_type __read_mostly = {
[IPSET_ATTR_PROTO] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
+ [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
+ [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 1da1e955f38b..34e8a1acce42 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -21,12 +21,12 @@
#include <linux/netfilter.h>
#include <linux/netfilter/ipset/pfxlen.h>
#include <linux/netfilter/ipset/ip_set.h>
-#include <linux/netfilter/ipset/ip_set_timeout.h>
#include <linux/netfilter/ipset/ip_set_getport.h>
#include <linux/netfilter/ipset/ip_set_hash.h>
#define REVISION_MIN 0
-#define REVISION_MAX 1 /* SCTP and UDPLITE support added */
+/* 1 SCTP and UDPLITE support added */
+#define REVISION_MAX 2 /* Counters support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -34,32 +34,44 @@ IP_SET_MODULE_DESC("hash:ip,port,ip", REVISION_MIN, REVISION_MAX);
MODULE_ALIAS("ip_set_hash:ip,port,ip");
/* Type specific function prefix */
-#define TYPE hash_ipportip
+#define HTYPE hash_ipportip
-static bool
-hash_ipportip_same_set(const struct ip_set *a, const struct ip_set *b);
+/* IPv4 variants */
-#define hash_ipportip4_same_set hash_ipportip_same_set
-#define hash_ipportip6_same_set hash_ipportip_same_set
+/* Member elements */
+struct hash_ipportip4_elem {
+ __be32 ip;
+ __be32 ip2;
+ __be16 port;
+ u8 proto;
+ u8 padding;
+};
-/* The type variant functions: IPv4 */
+struct hash_ipportip4t_elem {
+ __be32 ip;
+ __be32 ip2;
+ __be16 port;
+ u8 proto;
+ u8 padding;
+ unsigned long timeout;
+};
-/* Member elements without timeout */
-struct hash_ipportip4_elem {
+struct hash_ipportip4c_elem {
__be32 ip;
__be32 ip2;
__be16 port;
u8 proto;
u8 padding;
+ struct ip_set_counter counter;
};
-/* Member elements with timeout support */
-struct hash_ipportip4_telem {
+struct hash_ipportip4ct_elem {
__be32 ip;
__be32 ip2;
__be16 port;
u8 proto;
u8 padding;
+ struct ip_set_counter counter;
unsigned long timeout;
};
@@ -74,25 +86,6 @@ hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1,
ip1->proto == ip2->proto;
}
-static inline bool
-hash_ipportip4_data_isnull(const struct hash_ipportip4_elem *elem)
-{
- return elem->proto == 0;
-}
-
-static inline void
-hash_ipportip4_data_copy(struct hash_ipportip4_elem *dst,
- const struct hash_ipportip4_elem *src)
-{
- memcpy(dst, src, sizeof(*dst));
-}
-
-static inline void
-hash_ipportip4_data_zero_out(struct hash_ipportip4_elem *elem)
-{
- elem->proto = 0;
-}
-
static bool
hash_ipportip4_data_list(struct sk_buff *skb,
const struct hash_ipportip4_elem *data)
@@ -108,117 +101,96 @@ nla_put_failure:
return 1;
}
-static bool
-hash_ipportip4_data_tlist(struct sk_buff *skb,
- const struct hash_ipportip4_elem *data)
+static inline void
+hash_ipportip4_data_next(struct hash_ipportip4_elem *next,
+ const struct hash_ipportip4_elem *d)
{
- const struct hash_ipportip4_telem *tdata =
- (const struct hash_ipportip4_telem *)data;
-
- if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) ||
- nla_put_ipaddr4(skb, IPSET_ATTR_IP2, tdata->ip2) ||
- nla_put_net16(skb, IPSET_ATTR_PORT, tdata->port) ||
- nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
- nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
- htonl(ip_set_timeout_get(tdata->timeout))))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return 1;
+ next->ip = d->ip;
+ next->port = d->port;
}
+/* Common functions */
+#define MTYPE hash_ipportip4
#define PF 4
#define HOST_MASK 32
-#include <linux/netfilter/ipset/ip_set_ahash.h>
-
-static inline void
-hash_ipportip4_data_next(struct ip_set_hash *h,
- const struct hash_ipportip4_elem *d)
-{
- h->next.ip = d->ip;
- h->next.port = d->port;
-}
+#include "ip_set_hash_gen.h"
static int
hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
- enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct ip_set_hash *h = set->data;
+ const struct hash_ipportip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipportip4_elem data = { };
+ struct hash_ipportip4_elem e = { };
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
- &data.port, &data.proto))
+ &e.port, &e.proto))
return -EINVAL;
- ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip);
- ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &data.ip2);
-
- return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
+ ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
+ ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2);
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
static int
hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct ip_set_hash *h = set->data;
+ const struct hash_ipportip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipportip4_elem data = { };
+ struct hash_ipportip4_elem e = { };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
u32 ip, ip_to, p = 0, port, port_to;
- u32 timeout = h->timeout;
bool with_ports = false;
int ret;
if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip);
+ ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) ||
+ ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
- ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP2], &data.ip2);
+ ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP2], &e.ip2);
if (ret)
return ret;
if (tb[IPSET_ATTR_PORT])
- data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
else
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_PROTO]) {
- data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
- with_ports = ip_set_proto_with_ports(data.proto);
+ e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+ with_ports = ip_set_proto_with_ports(e.proto);
- if (data.proto == 0)
+ if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
} else
return -IPSET_ERR_MISSING_PROTO;
- if (!(with_ports || data.proto == IPPROTO_ICMP))
- data.port = 0;
-
- if (tb[IPSET_ATTR_TIMEOUT]) {
- if (!with_timeout(h->timeout))
- return -IPSET_ERR_TIMEOUT;
- timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
- }
+ if (!(with_ports || e.proto == IPPROTO_ICMP))
+ e.port = 0;
if (adt == IPSET_TEST ||
!(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||
tb[IPSET_ATTR_PORT_TO])) {
- ret = adtfn(set, &data, timeout, flags);
+ ret = adtfn(set, &e, &ext, &ext, flags);
return ip_set_eexist(ret, flags) ? 0 : ret;
}
- ip_to = ip = ntohl(data.ip);
+ ip_to = ip = ntohl(e.ip);
if (tb[IPSET_ATTR_IP_TO]) {
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
if (ret)
@@ -233,7 +205,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
ip_set_mask_from_to(ip, ip_to, cidr);
}
- port_to = port = ntohs(data.port);
+ port_to = port = ntohs(e.port);
if (with_ports && tb[IPSET_ATTR_PORT_TO]) {
port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
if (port > port_to)
@@ -246,9 +218,9 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
: port;
for (; p <= port_to; p++) {
- data.ip = htonl(ip);
- data.port = htons(p);
- ret = adtfn(set, &data, timeout, flags);
+ e.ip = htonl(ip);
+ e.port = htons(p);
+ ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
@@ -259,36 +231,46 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
}
-static bool
-hash_ipportip_same_set(const struct ip_set *a, const struct ip_set *b)
-{
- const struct ip_set_hash *x = a->data;
- const struct ip_set_hash *y = b->data;
+/* IPv6 variants */
- /* Resizing changes htable_bits, so we ignore it */
- return x->maxelem == y->maxelem &&
- x->timeout == y->timeout;
-}
+struct hash_ipportip6_elem {
+ union nf_inet_addr ip;
+ union nf_inet_addr ip2;
+ __be16 port;
+ u8 proto;
+ u8 padding;
+};
-/* The type variant functions: IPv6 */
+struct hash_ipportip6t_elem {
+ union nf_inet_addr ip;
+ union nf_inet_addr ip2;
+ __be16 port;
+ u8 proto;
+ u8 padding;
+ unsigned long timeout;
+};
-struct hash_ipportip6_elem {
+struct hash_ipportip6c_elem {
union nf_inet_addr ip;
union nf_inet_addr ip2;
__be16 port;
u8 proto;
u8 padding;
+ struct ip_set_counter counter;
};
-struct hash_ipportip6_telem {
+struct hash_ipportip6ct_elem {
union nf_inet_addr ip;
union nf_inet_addr ip2;
__be16 port;
u8 proto;
u8 padding;
+ struct ip_set_counter counter;
unsigned long timeout;
};
+/* Common functions */
+
static inline bool
hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1,
const struct hash_ipportip6_elem *ip2,
@@ -300,25 +282,6 @@ hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1,
ip1->proto == ip2->proto;
}
-static inline bool
-hash_ipportip6_data_isnull(const struct hash_ipportip6_elem *elem)
-{
- return elem->proto == 0;
-}
-
-static inline void
-hash_ipportip6_data_copy(struct hash_ipportip6_elem *dst,
- const struct hash_ipportip6_elem *src)
-{
- memcpy(dst, src, sizeof(*dst));
-}
-
-static inline void
-hash_ipportip6_data_zero_out(struct hash_ipportip6_elem *elem)
-{
- elem->proto = 0;
-}
-
static bool
hash_ipportip6_data_list(struct sk_buff *skb,
const struct hash_ipportip6_elem *data)
@@ -334,68 +297,51 @@ nla_put_failure:
return 1;
}
-static bool
-hash_ipportip6_data_tlist(struct sk_buff *skb,
- const struct hash_ipportip6_elem *data)
+static inline void
+hash_ipportip6_data_next(struct hash_ipportip4_elem *next,
+ const struct hash_ipportip6_elem *d)
{
- const struct hash_ipportip6_telem *e =
- (const struct hash_ipportip6_telem *)data;
-
- if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) ||
- nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip2.in6) ||
- nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
- nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
- nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
- htonl(ip_set_timeout_get(e->timeout))))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return 1;
+ next->port = d->port;
}
+#undef MTYPE
#undef PF
#undef HOST_MASK
+#define MTYPE hash_ipportip6
#define PF 6
#define HOST_MASK 128
-#include <linux/netfilter/ipset/ip_set_ahash.h>
-
-static inline void
-hash_ipportip6_data_next(struct ip_set_hash *h,
- const struct hash_ipportip6_elem *d)
-{
- h->next.port = d->port;
-}
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
static int
hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
- enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct ip_set_hash *h = set->data;
+ const struct hash_ipportip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipportip6_elem data = { };
+ struct hash_ipportip6_elem e = { };
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
- &data.port, &data.proto))
+ &e.port, &e.proto))
return -EINVAL;
- ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6);
- ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &data.ip2.in6);
-
- return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
+ ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+ ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2.in6);
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
static int
hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct ip_set_hash *h = set->data;
+ const struct hash_ipportip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipportip6_elem data = { };
+ struct hash_ipportip6_elem e = { };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
u32 port, port_to;
- u32 timeout = h->timeout;
bool with_ports = false;
int ret;
@@ -403,6 +349,8 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_CIDR]))
return -IPSET_ERR_PROTOCOL;
@@ -410,43 +358,38 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
+ ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &data.ip2);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip2);
if (ret)
return ret;
if (tb[IPSET_ATTR_PORT])
- data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
else
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_PROTO]) {
- data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
- with_ports = ip_set_proto_with_ports(data.proto);
+ e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+ with_ports = ip_set_proto_with_ports(e.proto);
- if (data.proto == 0)
+ if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
} else
return -IPSET_ERR_MISSING_PROTO;
- if (!(with_ports || data.proto == IPPROTO_ICMPV6))
- data.port = 0;
-
- if (tb[IPSET_ATTR_TIMEOUT]) {
- if (!with_timeout(h->timeout))
- return -IPSET_ERR_TIMEOUT;
- timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
- }
+ if (!(with_ports || e.proto == IPPROTO_ICMPV6))
+ e.port = 0;
if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
- ret = adtfn(set, &data, timeout, flags);
+ ret = adtfn(set, &e, &ext, &ext, flags);
return ip_set_eexist(ret, flags) ? 0 : ret;
}
- port = ntohs(data.port);
+ port = ntohs(e.port);
port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
if (port > port_to)
swap(port, port_to);
@@ -454,8 +397,8 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
port = ntohs(h->next.port);
for (; port <= port_to; port++) {
- data.port = htons(port);
- ret = adtfn(set, &data, timeout, flags);
+ e.port = htons(port);
+ ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
@@ -465,78 +408,6 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
}
-/* Create hash:ip type of sets */
-
-static int
-hash_ipportip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
-{
- struct ip_set_hash *h;
- u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
- u8 hbits;
- size_t hsize;
-
- if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
- return -IPSET_ERR_INVALID_FAMILY;
-
- if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
- return -IPSET_ERR_PROTOCOL;
-
- if (tb[IPSET_ATTR_HASHSIZE]) {
- hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
- if (hashsize < IPSET_MIMINAL_HASHSIZE)
- hashsize = IPSET_MIMINAL_HASHSIZE;
- }
-
- if (tb[IPSET_ATTR_MAXELEM])
- maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
-
- h = kzalloc(sizeof(*h), GFP_KERNEL);
- if (!h)
- return -ENOMEM;
-
- h->maxelem = maxelem;
- get_random_bytes(&h->initval, sizeof(h->initval));
- h->timeout = IPSET_NO_TIMEOUT;
-
- hbits = htable_bits(hashsize);
- hsize = htable_size(hbits);
- if (hsize == 0) {
- kfree(h);
- return -ENOMEM;
- }
- h->table = ip_set_alloc(hsize);
- if (!h->table) {
- kfree(h);
- return -ENOMEM;
- }
- h->table->htable_bits = hbits;
-
- set->data = h;
-
- if (tb[IPSET_ATTR_TIMEOUT]) {
- h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-
- set->variant = set->family == NFPROTO_IPV4
- ? &hash_ipportip4_tvariant : &hash_ipportip6_tvariant;
-
- if (set->family == NFPROTO_IPV4)
- hash_ipportip4_gc_init(set);
- else
- hash_ipportip6_gc_init(set);
- } else {
- set->variant = set->family == NFPROTO_IPV4
- ? &hash_ipportip4_variant : &hash_ipportip6_variant;
- }
-
- pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
- set->name, jhash_size(h->table->htable_bits),
- h->table->htable_bits, h->maxelem, set->data, h->table);
-
- return 0;
-}
-
static struct ip_set_type hash_ipportip_type __read_mostly = {
.name = "hash:ip,port,ip",
.protocol = IPSET_PROTOCOL,
@@ -552,6 +423,7 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {
[IPSET_ATTR_PROBES] = { .type = NLA_U8 },
[IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
},
.adt_policy = {
[IPSET_ATTR_IP] = { .type = NLA_NESTED },
@@ -563,6 +435,8 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {
[IPSET_ATTR_PROTO] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
+ [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
+ [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 10a30b4fc7db..c6a525373be4 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -21,14 +21,14 @@
#include <linux/netfilter.h>
#include <linux/netfilter/ipset/pfxlen.h>
#include <linux/netfilter/ipset/ip_set.h>
-#include <linux/netfilter/ipset/ip_set_timeout.h>
#include <linux/netfilter/ipset/ip_set_getport.h>
#include <linux/netfilter/ipset/ip_set_hash.h>
#define REVISION_MIN 0
/* 1 SCTP and UDPLITE support added */
/* 2 Range as input support for IPv4 added */
-#define REVISION_MAX 3 /* nomatch flag support added */
+/* 3 nomatch flag support added */
+#define REVISION_MAX 4 /* Counters support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -36,23 +36,19 @@ IP_SET_MODULE_DESC("hash:ip,port,net", REVISION_MIN, REVISION_MAX);
MODULE_ALIAS("ip_set_hash:ip,port,net");
/* Type specific function prefix */
-#define TYPE hash_ipportnet
-
-static bool
-hash_ipportnet_same_set(const struct ip_set *a, const struct ip_set *b);
-
-#define hash_ipportnet4_same_set hash_ipportnet_same_set
-#define hash_ipportnet6_same_set hash_ipportnet_same_set
-
-/* The type variant functions: IPv4 */
+#define HTYPE hash_ipportnet
/* We squeeze the "nomatch" flag into cidr: we don't support cidr == 0
* However this way we have to store internally cidr - 1,
* dancing back and forth.
*/
#define IP_SET_HASH_WITH_NETS_PACKED
+#define IP_SET_HASH_WITH_PROTO
+#define IP_SET_HASH_WITH_NETS
+
+/* IPv4 variants */
-/* Member elements without timeout */
+/* Member elements */
struct hash_ipportnet4_elem {
__be32 ip;
__be32 ip2;
@@ -62,8 +58,7 @@ struct hash_ipportnet4_elem {
u8 proto;
};
-/* Member elements with timeout support */
-struct hash_ipportnet4_telem {
+struct hash_ipportnet4t_elem {
__be32 ip;
__be32 ip2;
__be16 port;
@@ -73,6 +68,29 @@ struct hash_ipportnet4_telem {
unsigned long timeout;
};
+struct hash_ipportnet4c_elem {
+ __be32 ip;
+ __be32 ip2;
+ __be16 port;
+ u8 cidr:7;
+ u8 nomatch:1;
+ u8 proto;
+ struct ip_set_counter counter;
+};
+
+struct hash_ipportnet4ct_elem {
+ __be32 ip;
+ __be32 ip2;
+ __be16 port;
+ u8 cidr:7;
+ u8 nomatch:1;
+ u8 proto;
+ struct ip_set_counter counter;
+ unsigned long timeout;
+};
+
+/* Common functions */
+
static inline bool
hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1,
const struct hash_ipportnet4_elem *ip2,
@@ -85,38 +103,22 @@ hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1,
ip1->proto == ip2->proto;
}
-static inline bool
-hash_ipportnet4_data_isnull(const struct hash_ipportnet4_elem *elem)
-{
- return elem->proto == 0;
-}
-
-static inline void
-hash_ipportnet4_data_copy(struct hash_ipportnet4_elem *dst,
- const struct hash_ipportnet4_elem *src)
+static inline int
+hash_ipportnet4_do_data_match(const struct hash_ipportnet4_elem *elem)
{
- memcpy(dst, src, sizeof(*dst));
+ return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
-hash_ipportnet4_data_flags(struct hash_ipportnet4_elem *dst, u32 flags)
+hash_ipportnet4_data_set_flags(struct hash_ipportnet4_elem *elem, u32 flags)
{
- dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH);
+ elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
static inline void
-hash_ipportnet4_data_reset_flags(struct hash_ipportnet4_elem *dst, u32 *flags)
+hash_ipportnet4_data_reset_flags(struct hash_ipportnet4_elem *elem, u8 *flags)
{
- if (dst->nomatch) {
- *flags = IPSET_FLAG_NOMATCH;
- dst->nomatch = 0;
- }
-}
-
-static inline int
-hash_ipportnet4_data_match(const struct hash_ipportnet4_elem *elem)
-{
- return elem->nomatch ? -ENOTEMPTY : 1;
+ swap(*flags, elem->nomatch);
}
static inline void
@@ -126,12 +128,6 @@ hash_ipportnet4_data_netmask(struct hash_ipportnet4_elem *elem, u8 cidr)
elem->cidr = cidr - 1;
}
-static inline void
-hash_ipportnet4_data_zero_out(struct hash_ipportnet4_elem *elem)
-{
- elem->proto = 0;
-}
-
static bool
hash_ipportnet4_data_list(struct sk_buff *skb,
const struct hash_ipportnet4_elem *data)
@@ -152,81 +148,56 @@ nla_put_failure:
return 1;
}
-static bool
-hash_ipportnet4_data_tlist(struct sk_buff *skb,
- const struct hash_ipportnet4_elem *data)
+static inline void
+hash_ipportnet4_data_next(struct hash_ipportnet4_elem *next,
+ const struct hash_ipportnet4_elem *d)
{
- const struct hash_ipportnet4_telem *tdata =
- (const struct hash_ipportnet4_telem *)data;
- u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
-
- if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) ||
- nla_put_ipaddr4(skb, IPSET_ATTR_IP2, tdata->ip2) ||
- nla_put_net16(skb, IPSET_ATTR_PORT, tdata->port) ||
- nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr + 1) ||
- nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
- nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
- htonl(ip_set_timeout_get(tdata->timeout))) ||
- (flags &&
- nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return 1;
+ next->ip = d->ip;
+ next->port = d->port;
+ next->ip2 = d->ip2;
}
-#define IP_SET_HASH_WITH_PROTO
-#define IP_SET_HASH_WITH_NETS
-
+#define MTYPE hash_ipportnet4
#define PF 4
#define HOST_MASK 32
-#include <linux/netfilter/ipset/ip_set_ahash.h>
-
-static inline void
-hash_ipportnet4_data_next(struct ip_set_hash *h,
- const struct hash_ipportnet4_elem *d)
-{
- h->next.ip = d->ip;
- h->next.port = d->port;
- h->next.ip2 = d->ip2;
-}
+#include "ip_set_hash_gen.h"
static int
hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
- enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct ip_set_hash *h = set->data;
+ const struct hash_ipportnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipportnet4_elem data = {
+ struct hash_ipportnet4_elem e = {
.cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1
};
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
if (adt == IPSET_TEST)
- data.cidr = HOST_MASK - 1;
+ e.cidr = HOST_MASK - 1;
if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
- &data.port, &data.proto))
+ &e.port, &e.proto))
return -EINVAL;
- ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip);
- ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &data.ip2);
- data.ip2 &= ip_set_netmask(data.cidr + 1);
+ ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
+ ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2);
+ e.ip2 &= ip_set_netmask(e.cidr + 1);
- return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
static int
hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct ip_set_hash *h = set->data;
+ const struct hash_ipportnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipportnet4_elem data = { .cidr = HOST_MASK - 1 };
+ struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
u32 ip, ip_to, p = 0, port, port_to;
u32 ip2_from, ip2_to, ip2_last, ip2;
- u32 timeout = h->timeout;
bool with_ports = false;
u8 cidr;
int ret;
@@ -235,13 +206,16 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
+ ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -253,46 +227,41 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
if (!cidr || cidr > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
- data.cidr = cidr - 1;
+ e.cidr = cidr - 1;
}
if (tb[IPSET_ATTR_PORT])
- data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
else
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_PROTO]) {
- data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
- with_ports = ip_set_proto_with_ports(data.proto);
+ e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+ with_ports = ip_set_proto_with_ports(e.proto);
- if (data.proto == 0)
+ if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
} else
return -IPSET_ERR_MISSING_PROTO;
- if (!(with_ports || data.proto == IPPROTO_ICMP))
- data.port = 0;
-
- if (tb[IPSET_ATTR_TIMEOUT]) {
- if (!with_timeout(h->timeout))
- return -IPSET_ERR_TIMEOUT;
- timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
- }
+ if (!(with_ports || e.proto == IPPROTO_ICMP))
+ e.port = 0;
- if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) {
+ if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
if (cadt_flags & IPSET_FLAG_NOMATCH)
- flags |= (cadt_flags << 16);
+ flags |= (IPSET_FLAG_NOMATCH << 16);
}
with_ports = with_ports && tb[IPSET_ATTR_PORT_TO];
if (adt == IPSET_TEST ||
!(tb[IPSET_ATTR_CIDR] || tb[IPSET_ATTR_IP_TO] || with_ports ||
tb[IPSET_ATTR_IP2_TO])) {
- data.ip = htonl(ip);
- data.ip2 = htonl(ip2_from & ip_set_hostmask(data.cidr + 1));
- ret = adtfn(set, &data, timeout, flags);
- return ip_set_eexist(ret, flags) ? 0 : ret;
+ e.ip = htonl(ip);
+ e.ip2 = htonl(ip2_from & ip_set_hostmask(e.cidr + 1));
+ ret = adtfn(set, &e, &ext, &ext, flags);
+ return ip_set_enomatch(ret, flags, adt) ? 1 :
+ ip_set_eexist(ret, flags) ? 0 : ret;
}
ip_to = ip;
@@ -310,7 +279,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
ip_set_mask_from_to(ip, ip_to, cidr);
}
- port_to = port = ntohs(data.port);
+ port_to = port = ntohs(e.port);
if (tb[IPSET_ATTR_PORT_TO]) {
port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
if (port > port_to)
@@ -326,28 +295,27 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(ip2_from, ip2_to);
if (ip2_from + UINT_MAX == ip2_to)
return -IPSET_ERR_HASH_RANGE;
- } else {
- ip_set_mask_from_to(ip2_from, ip2_to, data.cidr + 1);
- }
+ } else
+ ip_set_mask_from_to(ip2_from, ip2_to, e.cidr + 1);
if (retried)
ip = ntohl(h->next.ip);
for (; !before(ip_to, ip); ip++) {
- data.ip = htonl(ip);
+ e.ip = htonl(ip);
p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
: port;
for (; p <= port_to; p++) {
- data.port = htons(p);
+ e.port = htons(p);
ip2 = retried
&& ip == ntohl(h->next.ip)
&& p == ntohs(h->next.port)
? ntohl(h->next.ip2) : ip2_from;
while (!after(ip2, ip2_to)) {
- data.ip2 = htonl(ip2);
+ e.ip2 = htonl(ip2);
ip2_last = ip_set_range_to_cidr(ip2, ip2_to,
&cidr);
- data.cidr = cidr - 1;
- ret = adtfn(set, &data, timeout, flags);
+ e.cidr = cidr - 1;
+ ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
@@ -360,38 +328,50 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
}
-static bool
-hash_ipportnet_same_set(const struct ip_set *a, const struct ip_set *b)
-{
- const struct ip_set_hash *x = a->data;
- const struct ip_set_hash *y = b->data;
+/* IPv6 variants */
- /* Resizing changes htable_bits, so we ignore it */
- return x->maxelem == y->maxelem &&
- x->timeout == y->timeout;
-}
+struct hash_ipportnet6_elem {
+ union nf_inet_addr ip;
+ union nf_inet_addr ip2;
+ __be16 port;
+ u8 cidr:7;
+ u8 nomatch:1;
+ u8 proto;
+};
-/* The type variant functions: IPv6 */
+struct hash_ipportnet6t_elem {
+ union nf_inet_addr ip;
+ union nf_inet_addr ip2;
+ __be16 port;
+ u8 cidr:7;
+ u8 nomatch:1;
+ u8 proto;
+ unsigned long timeout;
+};
-struct hash_ipportnet6_elem {
+struct hash_ipportnet6c_elem {
union nf_inet_addr ip;
union nf_inet_addr ip2;
__be16 port;
u8 cidr:7;
u8 nomatch:1;
u8 proto;
+ struct ip_set_counter counter;
};
-struct hash_ipportnet6_telem {
+struct hash_ipportnet6ct_elem {
union nf_inet_addr ip;
union nf_inet_addr ip2;
__be16 port;
u8 cidr:7;
u8 nomatch:1;
u8 proto;
+ struct ip_set_counter counter;
unsigned long timeout;
};
+/* Common functions */
+
static inline bool
hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1,
const struct hash_ipportnet6_elem *ip2,
@@ -404,53 +384,22 @@ hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1,
ip1->proto == ip2->proto;
}
-static inline bool
-hash_ipportnet6_data_isnull(const struct hash_ipportnet6_elem *elem)
-{
- return elem->proto == 0;
-}
-
-static inline void
-hash_ipportnet6_data_copy(struct hash_ipportnet6_elem *dst,
- const struct hash_ipportnet6_elem *src)
-{
- memcpy(dst, src, sizeof(*dst));
-}
-
-static inline void
-hash_ipportnet6_data_flags(struct hash_ipportnet6_elem *dst, u32 flags)
-{
- dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH);
-}
-
-static inline void
-hash_ipportnet6_data_reset_flags(struct hash_ipportnet6_elem *dst, u32 *flags)
-{
- if (dst->nomatch) {
- *flags = IPSET_FLAG_NOMATCH;
- dst->nomatch = 0;
- }
-}
-
static inline int
-hash_ipportnet6_data_match(const struct hash_ipportnet6_elem *elem)
+hash_ipportnet6_do_data_match(const struct hash_ipportnet6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
-hash_ipportnet6_data_zero_out(struct hash_ipportnet6_elem *elem)
+hash_ipportnet6_data_set_flags(struct hash_ipportnet6_elem *elem, u32 flags)
{
- elem->proto = 0;
+ elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
static inline void
-ip6_netmask(union nf_inet_addr *ip, u8 prefix)
+hash_ipportnet6_data_reset_flags(struct hash_ipportnet6_elem *elem, u8 *flags)
{
- ip->ip6[0] &= ip_set_netmask6(prefix)[0];
- ip->ip6[1] &= ip_set_netmask6(prefix)[1];
- ip->ip6[2] &= ip_set_netmask6(prefix)[2];
- ip->ip6[3] &= ip_set_netmask6(prefix)[3];
+ swap(*flags, elem->nomatch);
}
static inline void
@@ -480,78 +429,58 @@ nla_put_failure:
return 1;
}
-static bool
-hash_ipportnet6_data_tlist(struct sk_buff *skb,
- const struct hash_ipportnet6_elem *data)
+static inline void
+hash_ipportnet6_data_next(struct hash_ipportnet4_elem *next,
+ const struct hash_ipportnet6_elem *d)
{
- const struct hash_ipportnet6_telem *e =
- (const struct hash_ipportnet6_telem *)data;
- u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
-
- if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) ||
- nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip2.in6) ||
- nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
- nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr + 1) ||
- nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
- nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
- htonl(ip_set_timeout_get(e->timeout))) ||
- (flags &&
- nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return 1;
+ next->port = d->port;
}
+#undef MTYPE
#undef PF
#undef HOST_MASK
+#define MTYPE hash_ipportnet6
#define PF 6
#define HOST_MASK 128
-#include <linux/netfilter/ipset/ip_set_ahash.h>
-
-static inline void
-hash_ipportnet6_data_next(struct ip_set_hash *h,
- const struct hash_ipportnet6_elem *d)
-{
- h->next.port = d->port;
-}
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
static int
hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
- enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct ip_set_hash *h = set->data;
+ const struct hash_ipportnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipportnet6_elem data = {
+ struct hash_ipportnet6_elem e = {
.cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1
};
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
if (adt == IPSET_TEST)
- data.cidr = HOST_MASK - 1;
+ e.cidr = HOST_MASK - 1;
if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
- &data.port, &data.proto))
+ &e.port, &e.proto))
return -EINVAL;
- ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6);
- ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &data.ip2.in6);
- ip6_netmask(&data.ip2, data.cidr + 1);
+ ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+ ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2.in6);
+ ip6_netmask(&e.ip2, e.cidr + 1);
- return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
static int
hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct ip_set_hash *h = set->data;
+ const struct hash_ipportnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipportnet6_elem data = { .cidr = HOST_MASK - 1 };
+ struct hash_ipportnet6_elem e = { .cidr = HOST_MASK - 1 };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
u32 port, port_to;
- u32 timeout = h->timeout;
bool with_ports = false;
u8 cidr;
int ret;
@@ -561,6 +490,8 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_CIDR]))
return -IPSET_ERR_PROTOCOL;
@@ -570,11 +501,12 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
+ ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &data.ip2);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip2);
if (ret)
return ret;
@@ -582,46 +514,41 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
if (!cidr || cidr > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
- data.cidr = cidr - 1;
+ e.cidr = cidr - 1;
}
- ip6_netmask(&data.ip2, data.cidr + 1);
+ ip6_netmask(&e.ip2, e.cidr + 1);
if (tb[IPSET_ATTR_PORT])
- data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
else
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_PROTO]) {
- data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
- with_ports = ip_set_proto_with_ports(data.proto);
+ e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+ with_ports = ip_set_proto_with_ports(e.proto);
- if (data.proto == 0)
+ if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
} else
return -IPSET_ERR_MISSING_PROTO;
- if (!(with_ports || data.proto == IPPROTO_ICMPV6))
- data.port = 0;
-
- if (tb[IPSET_ATTR_TIMEOUT]) {
- if (!with_timeout(h->timeout))
- return -IPSET_ERR_TIMEOUT;
- timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
- }
+ if (!(with_ports || e.proto == IPPROTO_ICMPV6))
+ e.port = 0;
- if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) {
+ if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
if (cadt_flags & IPSET_FLAG_NOMATCH)
- flags |= (cadt_flags << 16);
+ flags |= (IPSET_FLAG_NOMATCH << 16);
}
if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
- ret = adtfn(set, &data, timeout, flags);
- return ip_set_eexist(ret, flags) ? 0 : ret;
+ ret = adtfn(set, &e, &ext, &ext, flags);
+ return ip_set_enomatch(ret, flags, adt) ? 1 :
+ ip_set_eexist(ret, flags) ? 0 : ret;
}
- port = ntohs(data.port);
+ port = ntohs(e.port);
port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
if (port > port_to)
swap(port, port_to);
@@ -629,8 +556,8 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
port = ntohs(h->next.port);
for (; port <= port_to; port++) {
- data.port = htons(port);
- ret = adtfn(set, &data, timeout, flags);
+ e.port = htons(port);
+ ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
@@ -640,81 +567,6 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
}
-/* Create hash:ip type of sets */
-
-static int
-hash_ipportnet_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
-{
- struct ip_set_hash *h;
- u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
- u8 hbits;
- size_t hsize;
-
- if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
- return -IPSET_ERR_INVALID_FAMILY;
-
- if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
- return -IPSET_ERR_PROTOCOL;
-
- if (tb[IPSET_ATTR_HASHSIZE]) {
- hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
- if (hashsize < IPSET_MIMINAL_HASHSIZE)
- hashsize = IPSET_MIMINAL_HASHSIZE;
- }
-
- if (tb[IPSET_ATTR_MAXELEM])
- maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
-
- h = kzalloc(sizeof(*h)
- + sizeof(struct ip_set_hash_nets)
- * (set->family == NFPROTO_IPV4 ? 32 : 128), GFP_KERNEL);
- if (!h)
- return -ENOMEM;
-
- h->maxelem = maxelem;
- get_random_bytes(&h->initval, sizeof(h->initval));
- h->timeout = IPSET_NO_TIMEOUT;
-
- hbits = htable_bits(hashsize);
- hsize = htable_size(hbits);
- if (hsize == 0) {
- kfree(h);
- return -ENOMEM;
- }
- h->table = ip_set_alloc(hsize);
- if (!h->table) {
- kfree(h);
- return -ENOMEM;
- }
- h->table->htable_bits = hbits;
-
- set->data = h;
-
- if (tb[IPSET_ATTR_TIMEOUT]) {
- h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-
- set->variant = set->family == NFPROTO_IPV4
- ? &hash_ipportnet4_tvariant
- : &hash_ipportnet6_tvariant;
-
- if (set->family == NFPROTO_IPV4)
- hash_ipportnet4_gc_init(set);
- else
- hash_ipportnet6_gc_init(set);
- } else {
- set->variant = set->family == NFPROTO_IPV4
- ? &hash_ipportnet4_variant : &hash_ipportnet6_variant;
- }
-
- pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
- set->name, jhash_size(h->table->htable_bits),
- h->table->htable_bits, h->maxelem, set->data, h->table);
-
- return 0;
-}
-
static struct ip_set_type hash_ipportnet_type __read_mostly = {
.name = "hash:ip,port,net",
.protocol = IPSET_PROTOCOL,
@@ -731,6 +583,7 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {
[IPSET_ATTR_PROBES] = { .type = NLA_U8 },
[IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
},
.adt_policy = {
[IPSET_ATTR_IP] = { .type = NLA_NESTED },
@@ -745,6 +598,8 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
+ [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
+ [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index d6a59154d710..da740ceb56ae 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -20,12 +20,12 @@
#include <linux/netfilter.h>
#include <linux/netfilter/ipset/pfxlen.h>
#include <linux/netfilter/ipset/ip_set.h>
-#include <linux/netfilter/ipset/ip_set_timeout.h>
#include <linux/netfilter/ipset/ip_set_hash.h>
#define REVISION_MIN 0
/* 1 Range as input support for IPv4 added */
-#define REVISION_MAX 2 /* nomatch flag support added */
+/* 2 nomatch flag support added */
+#define REVISION_MAX 3 /* Counters support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -33,33 +33,46 @@ IP_SET_MODULE_DESC("hash:net", REVISION_MIN, REVISION_MAX);
MODULE_ALIAS("ip_set_hash:net");
/* Type specific function prefix */
-#define TYPE hash_net
+#define HTYPE hash_net
+#define IP_SET_HASH_WITH_NETS
-static bool
-hash_net_same_set(const struct ip_set *a, const struct ip_set *b);
+/* IPv4 variants */
-#define hash_net4_same_set hash_net_same_set
-#define hash_net6_same_set hash_net_same_set
+/* Member elements */
+struct hash_net4_elem {
+ __be32 ip;
+ u16 padding0;
+ u8 nomatch;
+ u8 cidr;
+};
-/* The type variant functions: IPv4 */
+struct hash_net4t_elem {
+ __be32 ip;
+ u16 padding0;
+ u8 nomatch;
+ u8 cidr;
+ unsigned long timeout;
+};
-/* Member elements without timeout */
-struct hash_net4_elem {
+struct hash_net4c_elem {
__be32 ip;
u16 padding0;
u8 nomatch;
u8 cidr;
+ struct ip_set_counter counter;
};
-/* Member elements with timeout support */
-struct hash_net4_telem {
+struct hash_net4ct_elem {
__be32 ip;
u16 padding0;
u8 nomatch;
u8 cidr;
+ struct ip_set_counter counter;
unsigned long timeout;
};
+/* Common functions */
+
static inline bool
hash_net4_data_equal(const struct hash_net4_elem *ip1,
const struct hash_net4_elem *ip2,
@@ -69,40 +82,22 @@ hash_net4_data_equal(const struct hash_net4_elem *ip1,
ip1->cidr == ip2->cidr;
}
-static inline bool
-hash_net4_data_isnull(const struct hash_net4_elem *elem)
-{
- return elem->cidr == 0;
-}
-
-static inline void
-hash_net4_data_copy(struct hash_net4_elem *dst,
- const struct hash_net4_elem *src)
+static inline int
+hash_net4_do_data_match(const struct hash_net4_elem *elem)
{
- dst->ip = src->ip;
- dst->cidr = src->cidr;
- dst->nomatch = src->nomatch;
+ return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
-hash_net4_data_flags(struct hash_net4_elem *dst, u32 flags)
+hash_net4_data_set_flags(struct hash_net4_elem *elem, u32 flags)
{
- dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH);
+ elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
static inline void
-hash_net4_data_reset_flags(struct hash_net4_elem *dst, u32 *flags)
+hash_net4_data_reset_flags(struct hash_net4_elem *elem, u8 *flags)
{
- if (dst->nomatch) {
- *flags = IPSET_FLAG_NOMATCH;
- dst->nomatch = 0;
- }
-}
-
-static inline int
-hash_net4_data_match(const struct hash_net4_elem *elem)
-{
- return elem->nomatch ? -ENOTEMPTY : 1;
+ swap(*flags, elem->nomatch);
}
static inline void
@@ -112,13 +107,6 @@ hash_net4_data_netmask(struct hash_net4_elem *elem, u8 cidr)
elem->cidr = cidr;
}
-/* Zero CIDR values cannot be stored */
-static inline void
-hash_net4_data_zero_out(struct hash_net4_elem *elem)
-{
- elem->cidr = 0;
-}
-
static bool
hash_net4_data_list(struct sk_buff *skb, const struct hash_net4_elem *data)
{
@@ -135,106 +123,84 @@ nla_put_failure:
return 1;
}
-static bool
-hash_net4_data_tlist(struct sk_buff *skb, const struct hash_net4_elem *data)
+static inline void
+hash_net4_data_next(struct hash_net4_elem *next,
+ const struct hash_net4_elem *d)
{
- const struct hash_net4_telem *tdata =
- (const struct hash_net4_telem *)data;
- u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
-
- if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) ||
- nla_put_u8(skb, IPSET_ATTR_CIDR, tdata->cidr) ||
- nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
- htonl(ip_set_timeout_get(tdata->timeout))) ||
- (flags &&
- nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return 1;
+ next->ip = d->ip;
}
-#define IP_SET_HASH_WITH_NETS
-
+#define MTYPE hash_net4
#define PF 4
#define HOST_MASK 32
-#include <linux/netfilter/ipset/ip_set_ahash.h>
-
-static inline void
-hash_net4_data_next(struct ip_set_hash *h,
- const struct hash_net4_elem *d)
-{
- h->next.ip = d->ip;
-}
+#include "ip_set_hash_gen.h"
static int
hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
- enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct ip_set_hash *h = set->data;
+ const struct hash_net *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_net4_elem data = {
+ struct hash_net4_elem e = {
.cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK
};
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
- if (data.cidr == 0)
+ if (e.cidr == 0)
return -EINVAL;
if (adt == IPSET_TEST)
- data.cidr = HOST_MASK;
+ e.cidr = HOST_MASK;
- ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip);
- data.ip &= ip_set_netmask(data.cidr);
+ ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
+ e.ip &= ip_set_netmask(e.cidr);
- return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
static int
hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct ip_set_hash *h = set->data;
+ const struct hash_net *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_net4_elem data = { .cidr = HOST_MASK };
- u32 timeout = h->timeout;
+ struct hash_net4_elem e = { .cidr = HOST_MASK };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
u32 ip = 0, ip_to, last;
int ret;
if (unlikely(!tb[IPSET_ATTR_IP] ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
+ ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
if (tb[IPSET_ATTR_CIDR]) {
- data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (!data.cidr || data.cidr > HOST_MASK)
+ e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+ if (!e.cidr || e.cidr > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
}
- if (tb[IPSET_ATTR_TIMEOUT]) {
- if (!with_timeout(h->timeout))
- return -IPSET_ERR_TIMEOUT;
- timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
- }
-
- if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) {
+ if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
if (cadt_flags & IPSET_FLAG_NOMATCH)
- flags |= (cadt_flags << 16);
+ flags |= (IPSET_FLAG_NOMATCH << 16);
}
if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) {
- data.ip = htonl(ip & ip_set_hostmask(data.cidr));
- ret = adtfn(set, &data, timeout, flags);
- return ip_set_eexist(ret, flags) ? 0 : ret;
+ e.ip = htonl(ip & ip_set_hostmask(e.cidr));
+ ret = adtfn(set, &e, &ext, &ext, flags);
+ return ip_set_enomatch(ret, flags, adt) ? 1 :
+ ip_set_eexist(ret, flags) ? 0 : ret;
}
ip_to = ip;
@@ -250,9 +216,9 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
ip = ntohl(h->next.ip);
while (!after(ip, ip_to)) {
- data.ip = htonl(ip);
- last = ip_set_range_to_cidr(ip, ip_to, &data.cidr);
- ret = adtfn(set, &data, timeout, flags);
+ e.ip = htonl(ip);
+ last = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
+ ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
else
@@ -262,34 +228,42 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
}
-static bool
-hash_net_same_set(const struct ip_set *a, const struct ip_set *b)
-{
- const struct ip_set_hash *x = a->data;
- const struct ip_set_hash *y = b->data;
+/* IPv6 variants */
- /* Resizing changes htable_bits, so we ignore it */
- return x->maxelem == y->maxelem &&
- x->timeout == y->timeout;
-}
+struct hash_net6_elem {
+ union nf_inet_addr ip;
+ u16 padding0;
+ u8 nomatch;
+ u8 cidr;
+};
-/* The type variant functions: IPv6 */
+struct hash_net6t_elem {
+ union nf_inet_addr ip;
+ u16 padding0;
+ u8 nomatch;
+ u8 cidr;
+ unsigned long timeout;
+};
-struct hash_net6_elem {
+struct hash_net6c_elem {
union nf_inet_addr ip;
u16 padding0;
u8 nomatch;
u8 cidr;
+ struct ip_set_counter counter;
};
-struct hash_net6_telem {
+struct hash_net6ct_elem {
union nf_inet_addr ip;
u16 padding0;
u8 nomatch;
u8 cidr;
+ struct ip_set_counter counter;
unsigned long timeout;
};
+/* Common functions */
+
static inline bool
hash_net6_data_equal(const struct hash_net6_elem *ip1,
const struct hash_net6_elem *ip2,
@@ -299,55 +273,22 @@ hash_net6_data_equal(const struct hash_net6_elem *ip1,
ip1->cidr == ip2->cidr;
}
-static inline bool
-hash_net6_data_isnull(const struct hash_net6_elem *elem)
-{
- return elem->cidr == 0;
-}
-
-static inline void
-hash_net6_data_copy(struct hash_net6_elem *dst,
- const struct hash_net6_elem *src)
-{
- dst->ip.in6 = src->ip.in6;
- dst->cidr = src->cidr;
- dst->nomatch = src->nomatch;
-}
-
-static inline void
-hash_net6_data_flags(struct hash_net6_elem *dst, u32 flags)
-{
- dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH);
-}
-
-static inline void
-hash_net6_data_reset_flags(struct hash_net6_elem *dst, u32 *flags)
-{
- if (dst->nomatch) {
- *flags = IPSET_FLAG_NOMATCH;
- dst->nomatch = 0;
- }
-}
-
static inline int
-hash_net6_data_match(const struct hash_net6_elem *elem)
+hash_net6_do_data_match(const struct hash_net6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
-hash_net6_data_zero_out(struct hash_net6_elem *elem)
+hash_net6_data_set_flags(struct hash_net6_elem *elem, u32 flags)
{
- elem->cidr = 0;
+ elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
static inline void
-ip6_netmask(union nf_inet_addr *ip, u8 prefix)
+hash_net6_data_reset_flags(struct hash_net6_elem *elem, u8 *flags)
{
- ip->ip6[0] &= ip_set_netmask6(prefix)[0];
- ip->ip6[1] &= ip_set_netmask6(prefix)[1];
- ip->ip6[2] &= ip_set_netmask6(prefix)[2];
- ip->ip6[3] &= ip_set_netmask6(prefix)[3];
+ swap(*flags, elem->nomatch);
}
static inline void
@@ -373,74 +314,60 @@ nla_put_failure:
return 1;
}
-static bool
-hash_net6_data_tlist(struct sk_buff *skb, const struct hash_net6_elem *data)
+static inline void
+hash_net6_data_next(struct hash_net4_elem *next,
+ const struct hash_net6_elem *d)
{
- const struct hash_net6_telem *e =
- (const struct hash_net6_telem *)data;
- u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
-
- if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) ||
- nla_put_u8(skb, IPSET_ATTR_CIDR, e->cidr) ||
- nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
- htonl(ip_set_timeout_get(e->timeout))) ||
- (flags &&
- nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return 1;
}
+#undef MTYPE
#undef PF
#undef HOST_MASK
+#define MTYPE hash_net6
#define PF 6
#define HOST_MASK 128
-#include <linux/netfilter/ipset/ip_set_ahash.h>
-
-static inline void
-hash_net6_data_next(struct ip_set_hash *h,
- const struct hash_net6_elem *d)
-{
-}
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
static int
hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
- enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct ip_set_hash *h = set->data;
+ const struct hash_net *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_net6_elem data = {
+ struct hash_net6_elem e = {
.cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK
};
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
- if (data.cidr == 0)
+ if (e.cidr == 0)
return -EINVAL;
if (adt == IPSET_TEST)
- data.cidr = HOST_MASK;
+ e.cidr = HOST_MASK;
- ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6);
- ip6_netmask(&data.ip, data.cidr);
+ ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+ ip6_netmask(&e.ip, e.cidr);
- return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
static int
hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct ip_set_hash *h = set->data;
+ const struct hash_net *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_net6_elem data = { .cidr = HOST_MASK };
- u32 timeout = h->timeout;
+ struct hash_net6_elem e = { .cidr = HOST_MASK };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
int ret;
if (unlikely(!tb[IPSET_ATTR_IP] ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -448,107 +375,29 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
+ ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
if (tb[IPSET_ATTR_CIDR])
- data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+ e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (!data.cidr || data.cidr > HOST_MASK)
+ if (!e.cidr || e.cidr > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
- ip6_netmask(&data.ip, data.cidr);
+ ip6_netmask(&e.ip, e.cidr);
- if (tb[IPSET_ATTR_TIMEOUT]) {
- if (!with_timeout(h->timeout))
- return -IPSET_ERR_TIMEOUT;
- timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
- }
-
- if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) {
+ if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
if (cadt_flags & IPSET_FLAG_NOMATCH)
- flags |= (cadt_flags << 16);
+ flags |= (IPSET_FLAG_NOMATCH << 16);
}
- ret = adtfn(set, &data, timeout, flags);
+ ret = adtfn(set, &e, &ext, &ext, flags);
- return ip_set_eexist(ret, flags) ? 0 : ret;
-}
-
-/* Create hash:ip type of sets */
-
-static int
-hash_net_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
-{
- u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
- struct ip_set_hash *h;
- u8 hbits;
- size_t hsize;
-
- if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
- return -IPSET_ERR_INVALID_FAMILY;
-
- if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
- return -IPSET_ERR_PROTOCOL;
-
- if (tb[IPSET_ATTR_HASHSIZE]) {
- hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
- if (hashsize < IPSET_MIMINAL_HASHSIZE)
- hashsize = IPSET_MIMINAL_HASHSIZE;
- }
-
- if (tb[IPSET_ATTR_MAXELEM])
- maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
-
- h = kzalloc(sizeof(*h)
- + sizeof(struct ip_set_hash_nets)
- * (set->family == NFPROTO_IPV4 ? 32 : 128), GFP_KERNEL);
- if (!h)
- return -ENOMEM;
-
- h->maxelem = maxelem;
- get_random_bytes(&h->initval, sizeof(h->initval));
- h->timeout = IPSET_NO_TIMEOUT;
-
- hbits = htable_bits(hashsize);
- hsize = htable_size(hbits);
- if (hsize == 0) {
- kfree(h);
- return -ENOMEM;
- }
- h->table = ip_set_alloc(hsize);
- if (!h->table) {
- kfree(h);
- return -ENOMEM;
- }
- h->table->htable_bits = hbits;
-
- set->data = h;
-
- if (tb[IPSET_ATTR_TIMEOUT]) {
- h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-
- set->variant = set->family == NFPROTO_IPV4
- ? &hash_net4_tvariant : &hash_net6_tvariant;
-
- if (set->family == NFPROTO_IPV4)
- hash_net4_gc_init(set);
- else
- hash_net6_gc_init(set);
- } else {
- set->variant = set->family == NFPROTO_IPV4
- ? &hash_net4_variant : &hash_net6_variant;
- }
-
- pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
- set->name, jhash_size(h->table->htable_bits),
- h->table->htable_bits, h->maxelem, set->data, h->table);
-
- return 0;
+ return ip_set_enomatch(ret, flags, adt) ? 1 :
+ ip_set_eexist(ret, flags) ? 0 : ret;
}
static struct ip_set_type hash_net_type __read_mostly = {
@@ -566,6 +415,7 @@ static struct ip_set_type hash_net_type __read_mostly = {
[IPSET_ATTR_PROBES] = { .type = NLA_U8 },
[IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
},
.adt_policy = {
[IPSET_ATTR_IP] = { .type = NLA_NESTED },
@@ -573,6 +423,8 @@ static struct ip_set_type hash_net_type __read_mostly = {
[IPSET_ATTR_CIDR] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
+ [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
+ [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index f2b0a3c30130..84ae6f6ce624 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+/* Copyright (C) 2011-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -21,12 +21,12 @@
#include <linux/netfilter.h>
#include <linux/netfilter/ipset/pfxlen.h>
#include <linux/netfilter/ipset/ip_set.h>
-#include <linux/netfilter/ipset/ip_set_timeout.h>
#include <linux/netfilter/ipset/ip_set_hash.h>
#define REVISION_MIN 0
/* 1 nomatch flag support added */
-#define REVISION_MAX 2 /* /0 support added */
+/* 2 /0 support added */
+#define REVISION_MAX 3 /* Counters support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -127,17 +127,14 @@ iface_add(struct rb_root *root, const char **iface)
}
/* Type specific function prefix */
-#define TYPE hash_netiface
-
-static bool
-hash_netiface_same_set(const struct ip_set *a, const struct ip_set *b);
-
-#define hash_netiface4_same_set hash_netiface_same_set
-#define hash_netiface6_same_set hash_netiface_same_set
+#define HTYPE hash_netiface
+#define IP_SET_HASH_WITH_NETS
+#define IP_SET_HASH_WITH_RBTREE
+#define IP_SET_HASH_WITH_MULTI
#define STREQ(a, b) (strcmp(a, b) == 0)
-/* The type variant functions: IPv4 */
+/* IPv4 variants */
struct hash_netiface4_elem_hashed {
__be32 ip;
@@ -147,8 +144,6 @@ struct hash_netiface4_elem_hashed {
u8 elem;
};
-#define HKEY_DATALEN sizeof(struct hash_netiface4_elem_hashed)
-
/* Member elements without timeout */
struct hash_netiface4_elem {
__be32 ip;
@@ -159,17 +154,39 @@ struct hash_netiface4_elem {
const char *iface;
};
-/* Member elements with timeout support */
-struct hash_netiface4_telem {
+struct hash_netiface4t_elem {
+ __be32 ip;
+ u8 physdev;
+ u8 cidr;
+ u8 nomatch;
+ u8 elem;
+ const char *iface;
+ unsigned long timeout;
+};
+
+struct hash_netiface4c_elem {
+ __be32 ip;
+ u8 physdev;
+ u8 cidr;
+ u8 nomatch;
+ u8 elem;
+ const char *iface;
+ struct ip_set_counter counter;
+};
+
+struct hash_netiface4ct_elem {
__be32 ip;
u8 physdev;
u8 cidr;
u8 nomatch;
u8 elem;
const char *iface;
+ struct ip_set_counter counter;
unsigned long timeout;
};
+/* Common functions */
+
static inline bool
hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1,
const struct hash_netiface4_elem *ip2,
@@ -182,38 +199,22 @@ hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1,
ip1->iface == ip2->iface;
}
-static inline bool
-hash_netiface4_data_isnull(const struct hash_netiface4_elem *elem)
-{
- return elem->elem == 0;
-}
-
-static inline void
-hash_netiface4_data_copy(struct hash_netiface4_elem *dst,
- const struct hash_netiface4_elem *src)
+static inline int
+hash_netiface4_do_data_match(const struct hash_netiface4_elem *elem)
{
- memcpy(dst, src, sizeof(*dst));
+ return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
-hash_netiface4_data_flags(struct hash_netiface4_elem *dst, u32 flags)
+hash_netiface4_data_set_flags(struct hash_netiface4_elem *elem, u32 flags)
{
- dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH);
+ elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
static inline void
-hash_netiface4_data_reset_flags(struct hash_netiface4_elem *dst, u32 *flags)
-{
- if (dst->nomatch) {
- *flags = IPSET_FLAG_NOMATCH;
- dst->nomatch = 0;
- }
-}
-
-static inline int
-hash_netiface4_data_match(const struct hash_netiface4_elem *elem)
+hash_netiface4_data_reset_flags(struct hash_netiface4_elem *elem, u8 *flags)
{
- return elem->nomatch ? -ENOTEMPTY : 1;
+ swap(*flags, elem->nomatch);
}
static inline void
@@ -223,12 +224,6 @@ hash_netiface4_data_netmask(struct hash_netiface4_elem *elem, u8 cidr)
elem->cidr = cidr;
}
-static inline void
-hash_netiface4_data_zero_out(struct hash_netiface4_elem *elem)
-{
- elem->elem = 0;
-}
-
static bool
hash_netiface4_data_list(struct sk_buff *skb,
const struct hash_netiface4_elem *data)
@@ -249,66 +244,40 @@ nla_put_failure:
return 1;
}
-static bool
-hash_netiface4_data_tlist(struct sk_buff *skb,
- const struct hash_netiface4_elem *data)
+static inline void
+hash_netiface4_data_next(struct hash_netiface4_elem *next,
+ const struct hash_netiface4_elem *d)
{
- const struct hash_netiface4_telem *tdata =
- (const struct hash_netiface4_telem *)data;
- u32 flags = data->physdev ? IPSET_FLAG_PHYSDEV : 0;
-
- if (data->nomatch)
- flags |= IPSET_FLAG_NOMATCH;
- if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) ||
- nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr) ||
- nla_put_string(skb, IPSET_ATTR_IFACE, data->iface) ||
- (flags &&
- nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))) ||
- nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
- htonl(ip_set_timeout_get(tdata->timeout))))
- goto nla_put_failure;
-
- return 0;
-
-nla_put_failure:
- return 1;
+ next->ip = d->ip;
}
-#define IP_SET_HASH_WITH_NETS
-#define IP_SET_HASH_WITH_RBTREE
-#define IP_SET_HASH_WITH_MULTI
-
+#define MTYPE hash_netiface4
#define PF 4
#define HOST_MASK 32
-#include <linux/netfilter/ipset/ip_set_ahash.h>
-
-static inline void
-hash_netiface4_data_next(struct ip_set_hash *h,
- const struct hash_netiface4_elem *d)
-{
- h->next.ip = d->ip;
-}
+#define HKEY_DATALEN sizeof(struct hash_netiface4_elem_hashed)
+#include "ip_set_hash_gen.h"
static int
hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
- enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- struct ip_set_hash *h = set->data;
+ struct hash_netiface *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netiface4_elem data = {
+ struct hash_netiface4_elem e = {
.cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK,
.elem = 1,
};
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
int ret;
- if (data.cidr == 0)
+ if (e.cidr == 0)
return -EINVAL;
if (adt == IPSET_TEST)
- data.cidr = HOST_MASK;
+ e.cidr = HOST_MASK;
- ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip);
- data.ip &= ip_set_netmask(data.cidr);
+ ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
+ e.ip &= ip_set_netmask(e.cidr);
#define IFACE(dir) (par->dir ? par->dir->name : NULL)
#define PHYSDEV(dir) (nf_bridge->dir ? nf_bridge->dir->name : NULL)
@@ -320,72 +289,69 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
if (!nf_bridge)
return -EINVAL;
- data.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev);
- data.physdev = 1;
+ e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev);
+ e.physdev = 1;
#else
- data.iface = NULL;
+ e.iface = NULL;
#endif
} else
- data.iface = SRCDIR ? IFACE(in) : IFACE(out);
+ e.iface = SRCDIR ? IFACE(in) : IFACE(out);
- if (!data.iface)
+ if (!e.iface)
return -EINVAL;
- ret = iface_test(&h->rbtree, &data.iface);
+ ret = iface_test(&h->rbtree, &e.iface);
if (adt == IPSET_ADD) {
if (!ret) {
- ret = iface_add(&h->rbtree, &data.iface);
+ ret = iface_add(&h->rbtree, &e.iface);
if (ret)
return ret;
}
} else if (!ret)
return ret;
- return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
static int
hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- struct ip_set_hash *h = set->data;
+ struct hash_netiface *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netiface4_elem data = { .cidr = HOST_MASK, .elem = 1 };
+ struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
u32 ip = 0, ip_to, last;
- u32 timeout = h->timeout;
char iface[IFNAMSIZ];
int ret;
if (unlikely(!tb[IPSET_ATTR_IP] ||
!tb[IPSET_ATTR_IFACE] ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
+ ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
if (tb[IPSET_ATTR_CIDR]) {
- data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (data.cidr > HOST_MASK)
+ e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+ if (e.cidr > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
}
- if (tb[IPSET_ATTR_TIMEOUT]) {
- if (!with_timeout(h->timeout))
- return -IPSET_ERR_TIMEOUT;
- timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
- }
-
strcpy(iface, nla_data(tb[IPSET_ATTR_IFACE]));
- data.iface = iface;
- ret = iface_test(&h->rbtree, &data.iface);
+ e.iface = iface;
+ ret = iface_test(&h->rbtree, &e.iface);
if (adt == IPSET_ADD) {
if (!ret) {
- ret = iface_add(&h->rbtree, &data.iface);
+ ret = iface_add(&h->rbtree, &e.iface);
if (ret)
return ret;
}
@@ -395,14 +361,15 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
if (cadt_flags & IPSET_FLAG_PHYSDEV)
- data.physdev = 1;
- if (adt == IPSET_ADD && (cadt_flags & IPSET_FLAG_NOMATCH))
- flags |= (cadt_flags << 16);
+ e.physdev = 1;
+ if (cadt_flags & IPSET_FLAG_NOMATCH)
+ flags |= (IPSET_FLAG_NOMATCH << 16);
}
if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) {
- data.ip = htonl(ip & ip_set_hostmask(data.cidr));
- ret = adtfn(set, &data, timeout, flags);
- return ip_set_eexist(ret, flags) ? 0 : ret;
+ e.ip = htonl(ip & ip_set_hostmask(e.cidr));
+ ret = adtfn(set, &e, &ext, &ext, flags);
+ return ip_set_enomatch(ret, flags, adt) ? 1 :
+ ip_set_eexist(ret, flags) ? 0 : ret;
}
if (tb[IPSET_ATTR_IP_TO]) {
@@ -413,16 +380,15 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(ip, ip_to);
if (ip + UINT_MAX == ip_to)
return -IPSET_ERR_HASH_RANGE;
- } else {
- ip_set_mask_from_to(ip, ip_to, data.cidr);
- }
+ } else
+ ip_set_mask_from_to(ip, ip_to, e.cidr);
if (retried)
ip = ntohl(h->next.ip);
while (!after(ip, ip_to)) {
- data.ip = htonl(ip);
- last = ip_set_range_to_cidr(ip, ip_to, &data.cidr);
- ret = adtfn(set, &data, timeout, flags);
+ e.ip = htonl(ip);
+ last = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
+ ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
@@ -433,18 +399,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
}
-static bool
-hash_netiface_same_set(const struct ip_set *a, const struct ip_set *b)
-{
- const struct ip_set_hash *x = a->data;
- const struct ip_set_hash *y = b->data;
-
- /* Resizing changes htable_bits, so we ignore it */
- return x->maxelem == y->maxelem &&
- x->timeout == y->timeout;
-}
-
-/* The type variant functions: IPv6 */
+/* IPv6 variants */
struct hash_netiface6_elem_hashed {
union nf_inet_addr ip;
@@ -454,8 +409,6 @@ struct hash_netiface6_elem_hashed {
u8 elem;
};
-#define HKEY_DATALEN sizeof(struct hash_netiface6_elem_hashed)
-
struct hash_netiface6_elem {
union nf_inet_addr ip;
u8 physdev;
@@ -465,16 +418,39 @@ struct hash_netiface6_elem {
const char *iface;
};
-struct hash_netiface6_telem {
+struct hash_netiface6t_elem {
+ union nf_inet_addr ip;
+ u8 physdev;
+ u8 cidr;
+ u8 nomatch;
+ u8 elem;
+ const char *iface;
+ unsigned long timeout;
+};
+
+struct hash_netiface6c_elem {
union nf_inet_addr ip;
u8 physdev;
u8 cidr;
u8 nomatch;
u8 elem;
const char *iface;
+ struct ip_set_counter counter;
+};
+
+struct hash_netiface6ct_elem {
+ union nf_inet_addr ip;
+ u8 physdev;
+ u8 cidr;
+ u8 nomatch;
+ u8 elem;
+ const char *iface;
+ struct ip_set_counter counter;
unsigned long timeout;
};
+/* Common functions */
+
static inline bool
hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1,
const struct hash_netiface6_elem *ip2,
@@ -487,53 +463,22 @@ hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1,
ip1->iface == ip2->iface;
}
-static inline bool
-hash_netiface6_data_isnull(const struct hash_netiface6_elem *elem)
-{
- return elem->elem == 0;
-}
-
-static inline void
-hash_netiface6_data_copy(struct hash_netiface6_elem *dst,
- const struct hash_netiface6_elem *src)
-{
- memcpy(dst, src, sizeof(*dst));
-}
-
-static inline void
-hash_netiface6_data_flags(struct hash_netiface6_elem *dst, u32 flags)
-{
- dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH);
-}
-
static inline int
-hash_netiface6_data_match(const struct hash_netiface6_elem *elem)
+hash_netiface6_do_data_match(const struct hash_netiface6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
-hash_netiface6_data_reset_flags(struct hash_netiface6_elem *dst, u32 *flags)
-{
- if (dst->nomatch) {
- *flags = IPSET_FLAG_NOMATCH;
- dst->nomatch = 0;
- }
-}
-
-static inline void
-hash_netiface6_data_zero_out(struct hash_netiface6_elem *elem)
+hash_netiface6_data_set_flags(struct hash_netiface6_elem *elem, u32 flags)
{
- elem->elem = 0;
+ elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
static inline void
-ip6_netmask(union nf_inet_addr *ip, u8 prefix)
+hash_netiface6_data_reset_flags(struct hash_netiface6_elem *elem, u8 *flags)
{
- ip->ip6[0] &= ip_set_netmask6(prefix)[0];
- ip->ip6[1] &= ip_set_netmask6(prefix)[1];
- ip->ip6[2] &= ip_set_netmask6(prefix)[2];
- ip->ip6[3] &= ip_set_netmask6(prefix)[3];
+ swap(*flags, elem->nomatch);
}
static inline void
@@ -563,63 +508,45 @@ nla_put_failure:
return 1;
}
-static bool
-hash_netiface6_data_tlist(struct sk_buff *skb,
- const struct hash_netiface6_elem *data)
+static inline void
+hash_netiface6_data_next(struct hash_netiface4_elem *next,
+ const struct hash_netiface6_elem *d)
{
- const struct hash_netiface6_telem *e =
- (const struct hash_netiface6_telem *)data;
- u32 flags = data->physdev ? IPSET_FLAG_PHYSDEV : 0;
-
- if (data->nomatch)
- flags |= IPSET_FLAG_NOMATCH;
- if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) ||
- nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr) ||
- nla_put_string(skb, IPSET_ATTR_IFACE, data->iface) ||
- (flags &&
- nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))) ||
- nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
- htonl(ip_set_timeout_get(e->timeout))))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return 1;
}
+#undef MTYPE
#undef PF
#undef HOST_MASK
+#undef HKEY_DATALEN
+#define MTYPE hash_netiface6
#define PF 6
#define HOST_MASK 128
-#include <linux/netfilter/ipset/ip_set_ahash.h>
-
-static inline void
-hash_netiface6_data_next(struct ip_set_hash *h,
- const struct hash_netiface6_elem *d)
-{
-}
+#define HKEY_DATALEN sizeof(struct hash_netiface6_elem_hashed)
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
static int
hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
- enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- struct ip_set_hash *h = set->data;
+ struct hash_netiface *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netiface6_elem data = {
+ struct hash_netiface6_elem e = {
.cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK,
.elem = 1,
};
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
int ret;
- if (data.cidr == 0)
+ if (e.cidr == 0)
return -EINVAL;
if (adt == IPSET_TEST)
- data.cidr = HOST_MASK;
+ e.cidr = HOST_MASK;
- ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6);
- ip6_netmask(&data.ip, data.cidr);
+ ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+ ip6_netmask(&e.ip, e.cidr);
if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
#ifdef CONFIG_BRIDGE_NETFILTER
@@ -627,44 +554,46 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
if (!nf_bridge)
return -EINVAL;
- data.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev);
- data.physdev = 1;
+ e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev);
+ e.physdev = 1;
#else
- data.iface = NULL;
+ e.iface = NULL;
#endif
} else
- data.iface = SRCDIR ? IFACE(in) : IFACE(out);
+ e.iface = SRCDIR ? IFACE(in) : IFACE(out);
- if (!data.iface)
+ if (!e.iface)
return -EINVAL;
- ret = iface_test(&h->rbtree, &data.iface);
+ ret = iface_test(&h->rbtree, &e.iface);
if (adt == IPSET_ADD) {
if (!ret) {
- ret = iface_add(&h->rbtree, &data.iface);
+ ret = iface_add(&h->rbtree, &e.iface);
if (ret)
return ret;
}
} else if (!ret)
return ret;
- return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
static int
hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- struct ip_set_hash *h = set->data;
+ struct hash_netiface *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netiface6_elem data = { .cidr = HOST_MASK, .elem = 1 };
- u32 timeout = h->timeout;
+ struct hash_netiface6_elem e = { .cidr = HOST_MASK, .elem = 1 };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
char iface[IFNAMSIZ];
int ret;
if (unlikely(!tb[IPSET_ATTR_IP] ||
!tb[IPSET_ATTR_IFACE] ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -672,28 +601,23 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
+ ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
if (tb[IPSET_ATTR_CIDR])
- data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (data.cidr > HOST_MASK)
+ e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+ if (e.cidr > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
- ip6_netmask(&data.ip, data.cidr);
-
- if (tb[IPSET_ATTR_TIMEOUT]) {
- if (!with_timeout(h->timeout))
- return -IPSET_ERR_TIMEOUT;
- timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
- }
+ ip6_netmask(&e.ip, e.cidr);
strcpy(iface, nla_data(tb[IPSET_ATTR_IFACE]));
- data.iface = iface;
- ret = iface_test(&h->rbtree, &data.iface);
+ e.iface = iface;
+ ret = iface_test(&h->rbtree, &e.iface);
if (adt == IPSET_ADD) {
if (!ret) {
- ret = iface_add(&h->rbtree, &data.iface);
+ ret = iface_add(&h->rbtree, &e.iface);
if (ret)
return ret;
}
@@ -703,90 +627,15 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
if (cadt_flags & IPSET_FLAG_PHYSDEV)
- data.physdev = 1;
- if (adt == IPSET_ADD && (cadt_flags & IPSET_FLAG_NOMATCH))
- flags |= (cadt_flags << 16);
- }
-
- ret = adtfn(set, &data, timeout, flags);
-
- return ip_set_eexist(ret, flags) ? 0 : ret;
-}
-
-/* Create hash:ip type of sets */
-
-static int
-hash_netiface_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
-{
- struct ip_set_hash *h;
- u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
- u8 hbits;
- size_t hsize;
-
- if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
- return -IPSET_ERR_INVALID_FAMILY;
-
- if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
- return -IPSET_ERR_PROTOCOL;
-
- if (tb[IPSET_ATTR_HASHSIZE]) {
- hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
- if (hashsize < IPSET_MIMINAL_HASHSIZE)
- hashsize = IPSET_MIMINAL_HASHSIZE;
+ e.physdev = 1;
+ if (cadt_flags & IPSET_FLAG_NOMATCH)
+ flags |= (IPSET_FLAG_NOMATCH << 16);
}
- if (tb[IPSET_ATTR_MAXELEM])
- maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
-
- h = kzalloc(sizeof(*h)
- + sizeof(struct ip_set_hash_nets)
- * (set->family == NFPROTO_IPV4 ? 32 : 128), GFP_KERNEL);
- if (!h)
- return -ENOMEM;
+ ret = adtfn(set, &e, &ext, &ext, flags);
- h->maxelem = maxelem;
- get_random_bytes(&h->initval, sizeof(h->initval));
- h->timeout = IPSET_NO_TIMEOUT;
- h->ahash_max = AHASH_MAX_SIZE;
-
- hbits = htable_bits(hashsize);
- hsize = htable_size(hbits);
- if (hsize == 0) {
- kfree(h);
- return -ENOMEM;
- }
- h->table = ip_set_alloc(hsize);
- if (!h->table) {
- kfree(h);
- return -ENOMEM;
- }
- h->table->htable_bits = hbits;
- h->rbtree = RB_ROOT;
-
- set->data = h;
-
- if (tb[IPSET_ATTR_TIMEOUT]) {
- h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-
- set->variant = set->family == NFPROTO_IPV4
- ? &hash_netiface4_tvariant : &hash_netiface6_tvariant;
-
- if (set->family == NFPROTO_IPV4)
- hash_netiface4_gc_init(set);
- else
- hash_netiface6_gc_init(set);
- } else {
- set->variant = set->family == NFPROTO_IPV4
- ? &hash_netiface4_variant : &hash_netiface6_variant;
- }
-
- pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
- set->name, jhash_size(h->table->htable_bits),
- h->table->htable_bits, h->maxelem, set->data, h->table);
-
- return 0;
+ return ip_set_enomatch(ret, flags, adt) ? 1 :
+ ip_set_eexist(ret, flags) ? 0 : ret;
}
static struct ip_set_type hash_netiface_type __read_mostly = {
@@ -806,6 +655,7 @@ static struct ip_set_type hash_netiface_type __read_mostly = {
[IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
[IPSET_ATTR_PROTO] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
},
.adt_policy = {
[IPSET_ATTR_IP] = { .type = NLA_NESTED },
@@ -816,6 +666,8 @@ static struct ip_set_type hash_netiface_type __read_mostly = {
[IPSET_ATTR_CIDR] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
+ [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
+ [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 349deb672a2d..9a0869853be5 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -20,14 +20,14 @@
#include <linux/netfilter.h>
#include <linux/netfilter/ipset/pfxlen.h>
#include <linux/netfilter/ipset/ip_set.h>
-#include <linux/netfilter/ipset/ip_set_timeout.h>
#include <linux/netfilter/ipset/ip_set_getport.h>
#include <linux/netfilter/ipset/ip_set_hash.h>
#define REVISION_MIN 0
/* 1 SCTP and UDPLITE support added */
/* 2 Range as input support for IPv4 added */
-#define REVISION_MAX 3 /* nomatch flag support added */
+/* 3 nomatch flag support added */
+#define REVISION_MAX 4 /* Counters support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -35,15 +35,9 @@ IP_SET_MODULE_DESC("hash:net,port", REVISION_MIN, REVISION_MAX);
MODULE_ALIAS("ip_set_hash:net,port");
/* Type specific function prefix */
-#define TYPE hash_netport
-
-static bool
-hash_netport_same_set(const struct ip_set *a, const struct ip_set *b);
-
-#define hash_netport4_same_set hash_netport_same_set
-#define hash_netport6_same_set hash_netport_same_set
-
-/* The type variant functions: IPv4 */
+#define HTYPE hash_netport
+#define IP_SET_HASH_WITH_PROTO
+#define IP_SET_HASH_WITH_NETS
/* We squeeze the "nomatch" flag into cidr: we don't support cidr == 0
* However this way we have to store internally cidr - 1,
@@ -51,7 +45,9 @@ hash_netport_same_set(const struct ip_set *a, const struct ip_set *b);
*/
#define IP_SET_HASH_WITH_NETS_PACKED
-/* Member elements without timeout */
+/* IPv4 variants */
+
+/* Member elements */
struct hash_netport4_elem {
__be32 ip;
__be16 port;
@@ -60,16 +56,36 @@ struct hash_netport4_elem {
u8 nomatch:1;
};
-/* Member elements with timeout support */
-struct hash_netport4_telem {
+struct hash_netport4t_elem {
+ __be32 ip;
+ __be16 port;
+ u8 proto;
+ u8 cidr:7;
+ u8 nomatch:1;
+ unsigned long timeout;
+};
+
+struct hash_netport4c_elem {
+ __be32 ip;
+ __be16 port;
+ u8 proto;
+ u8 cidr:7;
+ u8 nomatch:1;
+ struct ip_set_counter counter;
+};
+
+struct hash_netport4ct_elem {
__be32 ip;
__be16 port;
u8 proto;
u8 cidr:7;
u8 nomatch:1;
+ struct ip_set_counter counter;
unsigned long timeout;
};
+/* Common functions */
+
static inline bool
hash_netport4_data_equal(const struct hash_netport4_elem *ip1,
const struct hash_netport4_elem *ip2,
@@ -81,42 +97,22 @@ hash_netport4_data_equal(const struct hash_netport4_elem *ip1,
ip1->cidr == ip2->cidr;
}
-static inline bool
-hash_netport4_data_isnull(const struct hash_netport4_elem *elem)
-{
- return elem->proto == 0;
-}
-
-static inline void
-hash_netport4_data_copy(struct hash_netport4_elem *dst,
- const struct hash_netport4_elem *src)
+static inline int
+hash_netport4_do_data_match(const struct hash_netport4_elem *elem)
{
- dst->ip = src->ip;
- dst->port = src->port;
- dst->proto = src->proto;
- dst->cidr = src->cidr;
- dst->nomatch = src->nomatch;
+ return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
-hash_netport4_data_flags(struct hash_netport4_elem *dst, u32 flags)
+hash_netport4_data_set_flags(struct hash_netport4_elem *elem, u32 flags)
{
- dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH);
+ elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
static inline void
-hash_netport4_data_reset_flags(struct hash_netport4_elem *dst, u32 *flags)
+hash_netport4_data_reset_flags(struct hash_netport4_elem *elem, u8 *flags)
{
- if (dst->nomatch) {
- *flags = IPSET_FLAG_NOMATCH;
- dst->nomatch = 0;
- }
-}
-
-static inline int
-hash_netport4_data_match(const struct hash_netport4_elem *elem)
-{
- return elem->nomatch ? -ENOTEMPTY : 1;
+ swap(*flags, elem->nomatch);
}
static inline void
@@ -126,12 +122,6 @@ hash_netport4_data_netmask(struct hash_netport4_elem *elem, u8 cidr)
elem->cidr = cidr - 1;
}
-static inline void
-hash_netport4_data_zero_out(struct hash_netport4_elem *elem)
-{
- elem->proto = 0;
-}
-
static bool
hash_netport4_data_list(struct sk_buff *skb,
const struct hash_netport4_elem *data)
@@ -151,77 +141,53 @@ nla_put_failure:
return 1;
}
-static bool
-hash_netport4_data_tlist(struct sk_buff *skb,
- const struct hash_netport4_elem *data)
+static inline void
+hash_netport4_data_next(struct hash_netport4_elem *next,
+ const struct hash_netport4_elem *d)
{
- const struct hash_netport4_telem *tdata =
- (const struct hash_netport4_telem *)data;
- u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
-
- if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) ||
- nla_put_net16(skb, IPSET_ATTR_PORT, tdata->port) ||
- nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr + 1) ||
- nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
- nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
- htonl(ip_set_timeout_get(tdata->timeout))) ||
- (flags &&
- nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return 1;
+ next->ip = d->ip;
+ next->port = d->port;
}
-#define IP_SET_HASH_WITH_PROTO
-#define IP_SET_HASH_WITH_NETS
-
+#define MTYPE hash_netport4
#define PF 4
#define HOST_MASK 32
-#include <linux/netfilter/ipset/ip_set_ahash.h>
-
-static inline void
-hash_netport4_data_next(struct ip_set_hash *h,
- const struct hash_netport4_elem *d)
-{
- h->next.ip = d->ip;
- h->next.port = d->port;
-}
+#include "ip_set_hash_gen.h"
static int
hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
- enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct ip_set_hash *h = set->data;
+ const struct hash_netport *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netport4_elem data = {
+ struct hash_netport4_elem e = {
.cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1
};
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
if (adt == IPSET_TEST)
- data.cidr = HOST_MASK - 1;
+ e.cidr = HOST_MASK - 1;
if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
- &data.port, &data.proto))
+ &e.port, &e.proto))
return -EINVAL;
- ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip);
- data.ip &= ip_set_netmask(data.cidr + 1);
+ ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
+ e.ip &= ip_set_netmask(e.cidr + 1);
- return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
static int
hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct ip_set_hash *h = set->data;
+ const struct hash_netport *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netport4_elem data = { .cidr = HOST_MASK - 1 };
+ struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
u32 port, port_to, p = 0, ip = 0, ip_to, last;
- u32 timeout = h->timeout;
bool with_ports = false;
u8 cidr;
int ret;
@@ -230,13 +196,16 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
+ ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -244,47 +213,42 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
if (!cidr || cidr > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
- data.cidr = cidr - 1;
+ e.cidr = cidr - 1;
}
if (tb[IPSET_ATTR_PORT])
- data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
else
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_PROTO]) {
- data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
- with_ports = ip_set_proto_with_ports(data.proto);
+ e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+ with_ports = ip_set_proto_with_ports(e.proto);
- if (data.proto == 0)
+ if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
} else
return -IPSET_ERR_MISSING_PROTO;
- if (!(with_ports || data.proto == IPPROTO_ICMP))
- data.port = 0;
-
- if (tb[IPSET_ATTR_TIMEOUT]) {
- if (!with_timeout(h->timeout))
- return -IPSET_ERR_TIMEOUT;
- timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
- }
+ if (!(with_ports || e.proto == IPPROTO_ICMP))
+ e.port = 0;
with_ports = with_ports && tb[IPSET_ATTR_PORT_TO];
- if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) {
+ if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
if (cadt_flags & IPSET_FLAG_NOMATCH)
- flags |= (cadt_flags << 16);
+ flags |= (IPSET_FLAG_NOMATCH << 16);
}
if (adt == IPSET_TEST || !(with_ports || tb[IPSET_ATTR_IP_TO])) {
- data.ip = htonl(ip & ip_set_hostmask(data.cidr + 1));
- ret = adtfn(set, &data, timeout, flags);
- return ip_set_eexist(ret, flags) ? 0 : ret;
+ e.ip = htonl(ip & ip_set_hostmask(e.cidr + 1));
+ ret = adtfn(set, &e, &ext, &ext, flags);
+ return ip_set_enomatch(ret, flags, adt) ? 1 :
+ ip_set_eexist(ret, flags) ? 0 : ret;
}
- port = port_to = ntohs(data.port);
+ port = port_to = ntohs(e.port);
if (tb[IPSET_ATTR_PORT_TO]) {
port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
if (port_to < port)
@@ -298,21 +262,20 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(ip, ip_to);
if (ip + UINT_MAX == ip_to)
return -IPSET_ERR_HASH_RANGE;
- } else {
- ip_set_mask_from_to(ip, ip_to, data.cidr + 1);
- }
+ } else
+ ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
if (retried)
ip = ntohl(h->next.ip);
while (!after(ip, ip_to)) {
- data.ip = htonl(ip);
+ e.ip = htonl(ip);
last = ip_set_range_to_cidr(ip, ip_to, &cidr);
- data.cidr = cidr - 1;
+ e.cidr = cidr - 1;
p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
: port;
for (; p <= port_to; p++) {
- data.port = htons(p);
- ret = adtfn(set, &data, timeout, flags);
+ e.port = htons(p);
+ ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
@@ -324,36 +287,46 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
}
-static bool
-hash_netport_same_set(const struct ip_set *a, const struct ip_set *b)
-{
- const struct ip_set_hash *x = a->data;
- const struct ip_set_hash *y = b->data;
+/* IPv6 variants */
- /* Resizing changes htable_bits, so we ignore it */
- return x->maxelem == y->maxelem &&
- x->timeout == y->timeout;
-}
+struct hash_netport6_elem {
+ union nf_inet_addr ip;
+ __be16 port;
+ u8 proto;
+ u8 cidr:7;
+ u8 nomatch:1;
+};
-/* The type variant functions: IPv6 */
+struct hash_netport6t_elem {
+ union nf_inet_addr ip;
+ __be16 port;
+ u8 proto;
+ u8 cidr:7;
+ u8 nomatch:1;
+ unsigned long timeout;
+};
-struct hash_netport6_elem {
+struct hash_netport6c_elem {
union nf_inet_addr ip;
__be16 port;
u8 proto;
u8 cidr:7;
u8 nomatch:1;
+ struct ip_set_counter counter;
};
-struct hash_netport6_telem {
+struct hash_netport6ct_elem {
union nf_inet_addr ip;
__be16 port;
u8 proto;
u8 cidr:7;
u8 nomatch:1;
+ struct ip_set_counter counter;
unsigned long timeout;
};
+/* Common functions */
+
static inline bool
hash_netport6_data_equal(const struct hash_netport6_elem *ip1,
const struct hash_netport6_elem *ip2,
@@ -365,53 +338,22 @@ hash_netport6_data_equal(const struct hash_netport6_elem *ip1,
ip1->cidr == ip2->cidr;
}
-static inline bool
-hash_netport6_data_isnull(const struct hash_netport6_elem *elem)
-{
- return elem->proto == 0;
-}
-
-static inline void
-hash_netport6_data_copy(struct hash_netport6_elem *dst,
- const struct hash_netport6_elem *src)
-{
- memcpy(dst, src, sizeof(*dst));
-}
-
-static inline void
-hash_netport6_data_flags(struct hash_netport6_elem *dst, u32 flags)
-{
- dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH);
-}
-
-static inline void
-hash_netport6_data_reset_flags(struct hash_netport6_elem *dst, u32 *flags)
-{
- if (dst->nomatch) {
- *flags = IPSET_FLAG_NOMATCH;
- dst->nomatch = 0;
- }
-}
-
static inline int
-hash_netport6_data_match(const struct hash_netport6_elem *elem)
+hash_netport6_do_data_match(const struct hash_netport6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
-hash_netport6_data_zero_out(struct hash_netport6_elem *elem)
+hash_netport6_data_set_flags(struct hash_netport6_elem *elem, u32 flags)
{
- elem->proto = 0;
+ elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
static inline void
-ip6_netmask(union nf_inet_addr *ip, u8 prefix)
+hash_netport6_data_reset_flags(struct hash_netport6_elem *elem, u8 *flags)
{
- ip->ip6[0] &= ip_set_netmask6(prefix)[0];
- ip->ip6[1] &= ip_set_netmask6(prefix)[1];
- ip->ip6[2] &= ip_set_netmask6(prefix)[2];
- ip->ip6[3] &= ip_set_netmask6(prefix)[3];
+ swap(*flags, elem->nomatch);
}
static inline void
@@ -440,76 +382,57 @@ nla_put_failure:
return 1;
}
-static bool
-hash_netport6_data_tlist(struct sk_buff *skb,
- const struct hash_netport6_elem *data)
+static inline void
+hash_netport6_data_next(struct hash_netport4_elem *next,
+ const struct hash_netport6_elem *d)
{
- const struct hash_netport6_telem *e =
- (const struct hash_netport6_telem *)data;
- u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
-
- if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) ||
- nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
- nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr + 1) ||
- nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
- nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
- htonl(ip_set_timeout_get(e->timeout))) ||
- (flags &&
- nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return 1;
+ next->port = d->port;
}
+#undef MTYPE
#undef PF
#undef HOST_MASK
+#define MTYPE hash_netport6
#define PF 6
#define HOST_MASK 128
-#include <linux/netfilter/ipset/ip_set_ahash.h>
-
-static inline void
-hash_netport6_data_next(struct ip_set_hash *h,
- const struct hash_netport6_elem *d)
-{
- h->next.port = d->port;
-}
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
static int
hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
- enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct ip_set_hash *h = set->data;
+ const struct hash_netport *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netport6_elem data = {
+ struct hash_netport6_elem e = {
.cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1,
};
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
if (adt == IPSET_TEST)
- data.cidr = HOST_MASK - 1;
+ e.cidr = HOST_MASK - 1;
if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
- &data.port, &data.proto))
+ &e.port, &e.proto))
return -EINVAL;
- ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6);
- ip6_netmask(&data.ip, data.cidr + 1);
+ ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+ ip6_netmask(&e.ip, e.cidr + 1);
- return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
static int
hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct ip_set_hash *h = set->data;
+ const struct hash_netport *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netport6_elem data = { .cidr = HOST_MASK - 1 };
+ struct hash_netport6_elem e = { .cidr = HOST_MASK - 1 };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
u32 port, port_to;
- u32 timeout = h->timeout;
bool with_ports = false;
u8 cidr;
int ret;
@@ -518,7 +441,9 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -526,7 +451,8 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
+ ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -534,45 +460,40 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
if (!cidr || cidr > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
- data.cidr = cidr - 1;
+ e.cidr = cidr - 1;
}
- ip6_netmask(&data.ip, data.cidr + 1);
+ ip6_netmask(&e.ip, e.cidr + 1);
if (tb[IPSET_ATTR_PORT])
- data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
else
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_PROTO]) {
- data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
- with_ports = ip_set_proto_with_ports(data.proto);
+ e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+ with_ports = ip_set_proto_with_ports(e.proto);
- if (data.proto == 0)
+ if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
} else
return -IPSET_ERR_MISSING_PROTO;
- if (!(with_ports || data.proto == IPPROTO_ICMPV6))
- data.port = 0;
+ if (!(with_ports || e.proto == IPPROTO_ICMPV6))
+ e.port = 0;
- if (tb[IPSET_ATTR_TIMEOUT]) {
- if (!with_timeout(h->timeout))
- return -IPSET_ERR_TIMEOUT;
- timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
- }
-
- if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) {
+ if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
if (cadt_flags & IPSET_FLAG_NOMATCH)
- flags |= (cadt_flags << 16);
+ flags |= (IPSET_FLAG_NOMATCH << 16);
}
if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
- ret = adtfn(set, &data, timeout, flags);
- return ip_set_eexist(ret, flags) ? 0 : ret;
+ ret = adtfn(set, &e, &ext, &ext, flags);
+ return ip_set_enomatch(ret, flags, adt) ? 1 :
+ ip_set_eexist(ret, flags) ? 0 : ret;
}
- port = ntohs(data.port);
+ port = ntohs(e.port);
port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
if (port > port_to)
swap(port, port_to);
@@ -580,8 +501,8 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
port = ntohs(h->next.port);
for (; port <= port_to; port++) {
- data.port = htons(port);
- ret = adtfn(set, &data, timeout, flags);
+ e.port = htons(port);
+ ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
@@ -591,80 +512,6 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
}
-/* Create hash:ip type of sets */
-
-static int
-hash_netport_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
-{
- struct ip_set_hash *h;
- u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
- u8 hbits;
- size_t hsize;
-
- if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
- return -IPSET_ERR_INVALID_FAMILY;
-
- if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
- return -IPSET_ERR_PROTOCOL;
-
- if (tb[IPSET_ATTR_HASHSIZE]) {
- hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
- if (hashsize < IPSET_MIMINAL_HASHSIZE)
- hashsize = IPSET_MIMINAL_HASHSIZE;
- }
-
- if (tb[IPSET_ATTR_MAXELEM])
- maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
-
- h = kzalloc(sizeof(*h)
- + sizeof(struct ip_set_hash_nets)
- * (set->family == NFPROTO_IPV4 ? 32 : 128), GFP_KERNEL);
- if (!h)
- return -ENOMEM;
-
- h->maxelem = maxelem;
- get_random_bytes(&h->initval, sizeof(h->initval));
- h->timeout = IPSET_NO_TIMEOUT;
-
- hbits = htable_bits(hashsize);
- hsize = htable_size(hbits);
- if (hsize == 0) {
- kfree(h);
- return -ENOMEM;
- }
- h->table = ip_set_alloc(hsize);
- if (!h->table) {
- kfree(h);
- return -ENOMEM;
- }
- h->table->htable_bits = hbits;
-
- set->data = h;
-
- if (tb[IPSET_ATTR_TIMEOUT]) {
- h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-
- set->variant = set->family == NFPROTO_IPV4
- ? &hash_netport4_tvariant : &hash_netport6_tvariant;
-
- if (set->family == NFPROTO_IPV4)
- hash_netport4_gc_init(set);
- else
- hash_netport6_gc_init(set);
- } else {
- set->variant = set->family == NFPROTO_IPV4
- ? &hash_netport4_variant : &hash_netport6_variant;
- }
-
- pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
- set->name, jhash_size(h->table->htable_bits),
- h->table->htable_bits, h->maxelem, set->data, h->table);
-
- return 0;
-}
-
static struct ip_set_type hash_netport_type __read_mostly = {
.name = "hash:net,port",
.protocol = IPSET_PROTOCOL,
@@ -681,6 +528,7 @@ static struct ip_set_type hash_netport_type __read_mostly = {
[IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
[IPSET_ATTR_PROTO] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
},
.adt_policy = {
[IPSET_ATTR_IP] = { .type = NLA_NESTED },
@@ -692,6 +540,8 @@ static struct ip_set_type hash_netport_type __read_mostly = {
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
+ [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
+ [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 09c744aa8982..979b8c90e422 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2008-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+/* Copyright (C) 2008-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -13,30 +13,53 @@
#include <linux/errno.h>
#include <linux/netfilter/ipset/ip_set.h>
-#include <linux/netfilter/ipset/ip_set_timeout.h>
#include <linux/netfilter/ipset/ip_set_list.h>
#define REVISION_MIN 0
-#define REVISION_MAX 0
+#define REVISION_MAX 1 /* Counters support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
IP_SET_MODULE_DESC("list:set", REVISION_MIN, REVISION_MAX);
MODULE_ALIAS("ip_set_list:set");
-/* Member elements without and with timeout */
+/* Member elements */
struct set_elem {
ip_set_id_t id;
};
-struct set_telem {
- ip_set_id_t id;
+struct sett_elem {
+ struct {
+ ip_set_id_t id;
+ } __attribute__ ((aligned));
+ unsigned long timeout;
+};
+
+struct setc_elem {
+ struct {
+ ip_set_id_t id;
+ } __attribute__ ((aligned));
+ struct ip_set_counter counter;
+};
+
+struct setct_elem {
+ struct {
+ ip_set_id_t id;
+ } __attribute__ ((aligned));
+ struct ip_set_counter counter;
unsigned long timeout;
};
+struct set_adt_elem {
+ ip_set_id_t id;
+ ip_set_id_t refid;
+ int before;
+};
+
/* Type structure */
struct list_set {
size_t dsize; /* element size */
+ size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
u32 size; /* size of set list array */
u32 timeout; /* timeout value */
struct timer_list gc; /* garbage collection */
@@ -49,179 +72,311 @@ list_set_elem(const struct list_set *map, u32 id)
return (struct set_elem *)((void *)map->members + id * map->dsize);
}
-static inline struct set_telem *
-list_set_telem(const struct list_set *map, u32 id)
-{
- return (struct set_telem *)((void *)map->members + id * map->dsize);
-}
+#define ext_timeout(e, m) \
+(unsigned long *)((void *)(e) + (m)->offset[IPSET_OFFSET_TIMEOUT])
+#define ext_counter(e, m) \
+(struct ip_set_counter *)((void *)(e) + (m)->offset[IPSET_OFFSET_COUNTER])
-static inline bool
-list_set_timeout(const struct list_set *map, u32 id)
+static int
+list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
+ const struct xt_action_param *par,
+ struct ip_set_adt_opt *opt, const struct ip_set_ext *ext)
{
- const struct set_telem *elem = list_set_telem(map, id);
+ struct list_set *map = set->data;
+ struct set_elem *e;
+ u32 i, cmdflags = opt->cmdflags;
+ int ret;
- return ip_set_timeout_test(elem->timeout);
+ /* Don't lookup sub-counters at all */
+ opt->cmdflags &= ~IPSET_FLAG_MATCH_COUNTERS;
+ if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE)
+ opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE;
+ for (i = 0; i < map->size; i++) {
+ e = list_set_elem(map, i);
+ if (e->id == IPSET_INVALID_ID)
+ return 0;
+ if (SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(e, map)))
+ continue;
+ ret = ip_set_test(e->id, skb, par, opt);
+ if (ret > 0) {
+ if (SET_WITH_COUNTER(set))
+ ip_set_update_counter(ext_counter(e, map),
+ ext, &opt->ext,
+ cmdflags);
+ return ret;
+ }
+ }
+ return 0;
}
-static inline bool
-list_set_expired(const struct list_set *map, u32 id)
+static int
+list_set_kadd(struct ip_set *set, const struct sk_buff *skb,
+ const struct xt_action_param *par,
+ struct ip_set_adt_opt *opt, const struct ip_set_ext *ext)
{
- const struct set_telem *elem = list_set_telem(map, id);
+ struct list_set *map = set->data;
+ struct set_elem *e;
+ u32 i;
+ int ret;
- return ip_set_timeout_expired(elem->timeout);
+ for (i = 0; i < map->size; i++) {
+ e = list_set_elem(map, i);
+ if (e->id == IPSET_INVALID_ID)
+ return 0;
+ if (SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(e, map)))
+ continue;
+ ret = ip_set_add(e->id, skb, par, opt);
+ if (ret == 0)
+ return ret;
+ }
+ return 0;
}
-/* Set list without and with timeout */
-
static int
-list_set_kadt(struct ip_set *set, const struct sk_buff *skb,
+list_set_kdel(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
- enum ipset_adt adt, const struct ip_set_adt_opt *opt)
+ struct ip_set_adt_opt *opt, const struct ip_set_ext *ext)
{
struct list_set *map = set->data;
- struct set_elem *elem;
+ struct set_elem *e;
u32 i;
int ret;
for (i = 0; i < map->size; i++) {
- elem = list_set_elem(map, i);
- if (elem->id == IPSET_INVALID_ID)
+ e = list_set_elem(map, i);
+ if (e->id == IPSET_INVALID_ID)
return 0;
- if (with_timeout(map->timeout) && list_set_expired(map, i))
+ if (SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(e, map)))
continue;
- switch (adt) {
- case IPSET_TEST:
- ret = ip_set_test(elem->id, skb, par, opt);
- if (ret > 0)
- return ret;
- break;
- case IPSET_ADD:
- ret = ip_set_add(elem->id, skb, par, opt);
- if (ret == 0)
- return ret;
- break;
- case IPSET_DEL:
- ret = ip_set_del(elem->id, skb, par, opt);
- if (ret == 0)
- return ret;
- break;
- default:
- break;
- }
+ ret = ip_set_del(e->id, skb, par, opt);
+ if (ret == 0)
+ return ret;
+ }
+ return 0;
+}
+
+static int
+list_set_kadt(struct ip_set *set, const struct sk_buff *skb,
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+ struct list_set *map = set->data;
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
+
+ switch (adt) {
+ case IPSET_TEST:
+ return list_set_ktest(set, skb, par, opt, &ext);
+ case IPSET_ADD:
+ return list_set_kadd(set, skb, par, opt, &ext);
+ case IPSET_DEL:
+ return list_set_kdel(set, skb, par, opt, &ext);
+ default:
+ break;
}
return -EINVAL;
}
static bool
-id_eq(const struct list_set *map, u32 i, ip_set_id_t id)
+id_eq(const struct ip_set *set, u32 i, ip_set_id_t id)
{
- const struct set_elem *elem;
+ const struct list_set *map = set->data;
+ const struct set_elem *e;
- if (i < map->size) {
- elem = list_set_elem(map, i);
- return elem->id == id;
+ if (i >= map->size)
+ return 0;
+
+ e = list_set_elem(map, i);
+ return !!(e->id == id &&
+ !(SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(e, map))));
+}
+
+static int
+list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d,
+ const struct ip_set_ext *ext)
+{
+ struct list_set *map = set->data;
+ struct set_elem *e = list_set_elem(map, i);
+
+ if (e->id != IPSET_INVALID_ID) {
+ if (i == map->size - 1)
+ /* Last element replaced: e.g. add new,before,last */
+ ip_set_put_byindex(e->id);
+ else {
+ struct set_elem *x = list_set_elem(map, map->size - 1);
+
+ /* Last element pushed off */
+ if (x->id != IPSET_INVALID_ID)
+ ip_set_put_byindex(x->id);
+ memmove(list_set_elem(map, i + 1), e,
+ map->dsize * (map->size - (i + 1)));
+ }
}
+ e->id = d->id;
+ if (SET_WITH_TIMEOUT(set))
+ ip_set_timeout_set(ext_timeout(e, map), ext->timeout);
+ if (SET_WITH_COUNTER(set))
+ ip_set_init_counter(ext_counter(e, map), ext);
return 0;
}
-static bool
-id_eq_timeout(const struct list_set *map, u32 i, ip_set_id_t id)
+static int
+list_set_del(struct ip_set *set, u32 i)
{
- const struct set_elem *elem;
+ struct list_set *map = set->data;
+ struct set_elem *e = list_set_elem(map, i);
- if (i < map->size) {
- elem = list_set_elem(map, i);
- return !!(elem->id == id &&
- !(with_timeout(map->timeout) &&
- list_set_expired(map, i)));
- }
+ ip_set_put_byindex(e->id);
+ if (i < map->size - 1)
+ memmove(e, list_set_elem(map, i + 1),
+ map->dsize * (map->size - (i + 1)));
+
+ /* Last element */
+ e = list_set_elem(map, map->size - 1);
+ e->id = IPSET_INVALID_ID;
return 0;
}
static void
-list_elem_add(struct list_set *map, u32 i, ip_set_id_t id)
+set_cleanup_entries(struct ip_set *set)
{
+ struct list_set *map = set->data;
struct set_elem *e;
+ u32 i;
- for (; i < map->size; i++) {
+ for (i = 0; i < map->size; i++) {
e = list_set_elem(map, i);
- swap(e->id, id);
- if (e->id == IPSET_INVALID_ID)
- break;
+ if (e->id != IPSET_INVALID_ID &&
+ ip_set_timeout_expired(ext_timeout(e, map)))
+ list_set_del(set, i);
}
}
-static void
-list_elem_tadd(struct list_set *map, u32 i, ip_set_id_t id,
- unsigned long timeout)
+static int
+list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+ struct ip_set_ext *mext, u32 flags)
{
- struct set_telem *e;
+ struct list_set *map = set->data;
+ struct set_adt_elem *d = value;
+ struct set_elem *e;
+ u32 i;
+ int ret;
- for (; i < map->size; i++) {
- e = list_set_telem(map, i);
- swap(e->id, id);
- swap(e->timeout, timeout);
+ for (i = 0; i < map->size; i++) {
+ e = list_set_elem(map, i);
if (e->id == IPSET_INVALID_ID)
- break;
+ return 0;
+ else if (SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(e, map)))
+ continue;
+ else if (e->id != d->id)
+ continue;
+
+ if (d->before == 0)
+ return 1;
+ else if (d->before > 0)
+ ret = id_eq(set, i + 1, d->refid);
+ else
+ ret = i > 0 && id_eq(set, i - 1, d->refid);
+ return ret;
}
+ return 0;
}
+
static int
-list_set_add(struct list_set *map, u32 i, ip_set_id_t id,
- unsigned long timeout)
+list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+ struct ip_set_ext *mext, u32 flags)
{
- const struct set_elem *e = list_set_elem(map, i);
+ struct list_set *map = set->data;
+ struct set_adt_elem *d = value;
+ struct set_elem *e;
+ bool flag_exist = flags & IPSET_FLAG_EXIST;
+ u32 i, ret = 0;
- if (e->id != IPSET_INVALID_ID) {
- const struct set_elem *x = list_set_elem(map, map->size - 1);
+ /* Check already added element */
+ for (i = 0; i < map->size; i++) {
+ e = list_set_elem(map, i);
+ if (e->id == IPSET_INVALID_ID)
+ goto insert;
+ else if (SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(e, map)))
+ continue;
+ else if (e->id != d->id)
+ continue;
- /* Last element replaced or pushed off */
- if (x->id != IPSET_INVALID_ID)
- ip_set_put_byindex(x->id);
+ if ((d->before > 1 && !id_eq(set, i + 1, d->refid)) ||
+ (d->before < 0 &&
+ (i == 0 || !id_eq(set, i - 1, d->refid))))
+ /* Before/after doesn't match */
+ return -IPSET_ERR_REF_EXIST;
+ if (!flag_exist)
+ /* Can't re-add */
+ return -IPSET_ERR_EXIST;
+ /* Update extensions */
+ if (SET_WITH_TIMEOUT(set))
+ ip_set_timeout_set(ext_timeout(e, map), ext->timeout);
+ if (SET_WITH_COUNTER(set))
+ ip_set_init_counter(ext_counter(e, map), ext);
+ /* Set is already added to the list */
+ ip_set_put_byindex(d->id);
+ return 0;
+ }
+insert:
+ ret = -IPSET_ERR_LIST_FULL;
+ for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) {
+ e = list_set_elem(map, i);
+ if (e->id == IPSET_INVALID_ID)
+ ret = d->before != 0 ? -IPSET_ERR_REF_EXIST
+ : list_set_add(set, i, d, ext);
+ else if (e->id != d->refid)
+ continue;
+ else if (d->before > 0)
+ ret = list_set_add(set, i, d, ext);
+ else if (i + 1 < map->size)
+ ret = list_set_add(set, i + 1, d, ext);
}
- if (with_timeout(map->timeout))
- list_elem_tadd(map, i, id, ip_set_timeout_set(timeout));
- else
- list_elem_add(map, i, id);
- return 0;
+ return ret;
}
static int
-list_set_del(struct list_set *map, u32 i)
+list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+ struct ip_set_ext *mext, u32 flags)
{
- struct set_elem *a = list_set_elem(map, i), *b;
-
- ip_set_put_byindex(a->id);
-
- for (; i < map->size - 1; i++) {
- b = list_set_elem(map, i + 1);
- a->id = b->id;
- if (with_timeout(map->timeout))
- ((struct set_telem *)a)->timeout =
- ((struct set_telem *)b)->timeout;
- a = b;
- if (a->id == IPSET_INVALID_ID)
- break;
- }
- /* Last element */
- a->id = IPSET_INVALID_ID;
- return 0;
-}
-
-static void
-cleanup_entries(struct list_set *map)
-{
- struct set_telem *e;
+ struct list_set *map = set->data;
+ struct set_adt_elem *d = value;
+ struct set_elem *e;
u32 i;
for (i = 0; i < map->size; i++) {
- e = list_set_telem(map, i);
- if (e->id != IPSET_INVALID_ID && list_set_expired(map, i))
- list_set_del(map, i);
+ e = list_set_elem(map, i);
+ if (e->id == IPSET_INVALID_ID)
+ return d->before != 0 ? -IPSET_ERR_REF_EXIST
+ : -IPSET_ERR_EXIST;
+ else if (SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(e, map)))
+ continue;
+ else if (e->id != d->id)
+ continue;
+
+ if (d->before == 0)
+ return list_set_del(set, i);
+ else if (d->before > 0) {
+ if (!id_eq(set, i + 1, d->refid))
+ return -IPSET_ERR_REF_EXIST;
+ return list_set_del(set, i);
+ } else if (i == 0 || !id_eq(set, i - 1, d->refid))
+ return -IPSET_ERR_REF_EXIST;
+ else
+ return list_set_del(set, i);
}
+ return -IPSET_ERR_EXIST;
}
static int
@@ -229,26 +384,27 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
struct list_set *map = set->data;
- bool with_timeout = with_timeout(map->timeout);
- bool flag_exist = flags & IPSET_FLAG_EXIST;
- int before = 0;
- u32 timeout = map->timeout;
- ip_set_id_t id, refid = IPSET_INVALID_ID;
- const struct set_elem *elem;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct set_adt_elem e = { .refid = IPSET_INVALID_ID };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
struct ip_set *s;
- u32 i;
int ret = 0;
if (unlikely(!tb[IPSET_ATTR_NAME] ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- id = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAME]), &s);
- if (id == IPSET_INVALID_ID)
+ ret = ip_set_get_extensions(set, tb, &ext);
+ if (ret)
+ return ret;
+ e.id = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAME]), &s);
+ if (e.id == IPSET_INVALID_ID)
return -IPSET_ERR_NAME;
/* "Loop detection" */
if (s->type->features & IPSET_TYPE_NAME) {
@@ -258,115 +414,34 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 f = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
- before = f & IPSET_FLAG_BEFORE;
+ e.before = f & IPSET_FLAG_BEFORE;
}
- if (before && !tb[IPSET_ATTR_NAMEREF]) {
+ if (e.before && !tb[IPSET_ATTR_NAMEREF]) {
ret = -IPSET_ERR_BEFORE;
goto finish;
}
if (tb[IPSET_ATTR_NAMEREF]) {
- refid = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAMEREF]),
- &s);
- if (refid == IPSET_INVALID_ID) {
+ e.refid = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAMEREF]),
+ &s);
+ if (e.refid == IPSET_INVALID_ID) {
ret = -IPSET_ERR_NAMEREF;
goto finish;
}
- if (!before)
- before = -1;
- }
- if (tb[IPSET_ATTR_TIMEOUT]) {
- if (!with_timeout) {
- ret = -IPSET_ERR_TIMEOUT;
- goto finish;
- }
- timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+ if (!e.before)
+ e.before = -1;
}
- if (with_timeout && adt != IPSET_TEST)
- cleanup_entries(map);
+ if (adt != IPSET_TEST && SET_WITH_TIMEOUT(set))
+ set_cleanup_entries(set);
- switch (adt) {
- case IPSET_TEST:
- for (i = 0; i < map->size && !ret; i++) {
- elem = list_set_elem(map, i);
- if (elem->id == IPSET_INVALID_ID ||
- (before != 0 && i + 1 >= map->size))
- break;
- else if (with_timeout && list_set_expired(map, i))
- continue;
- else if (before > 0 && elem->id == id)
- ret = id_eq_timeout(map, i + 1, refid);
- else if (before < 0 && elem->id == refid)
- ret = id_eq_timeout(map, i + 1, id);
- else if (before == 0 && elem->id == id)
- ret = 1;
- }
- break;
- case IPSET_ADD:
- for (i = 0; i < map->size; i++) {
- elem = list_set_elem(map, i);
- if (elem->id != id)
- continue;
- if (!(with_timeout && flag_exist)) {
- ret = -IPSET_ERR_EXIST;
- goto finish;
- } else {
- struct set_telem *e = list_set_telem(map, i);
-
- if ((before > 1 &&
- !id_eq(map, i + 1, refid)) ||
- (before < 0 &&
- (i == 0 || !id_eq(map, i - 1, refid)))) {
- ret = -IPSET_ERR_EXIST;
- goto finish;
- }
- e->timeout = ip_set_timeout_set(timeout);
- ip_set_put_byindex(id);
- ret = 0;
- goto finish;
- }
- }
- ret = -IPSET_ERR_LIST_FULL;
- for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) {
- elem = list_set_elem(map, i);
- if (elem->id == IPSET_INVALID_ID)
- ret = before != 0 ? -IPSET_ERR_REF_EXIST
- : list_set_add(map, i, id, timeout);
- else if (elem->id != refid)
- continue;
- else if (before > 0)
- ret = list_set_add(map, i, id, timeout);
- else if (i + 1 < map->size)
- ret = list_set_add(map, i + 1, id, timeout);
- }
- break;
- case IPSET_DEL:
- ret = -IPSET_ERR_EXIST;
- for (i = 0; i < map->size && ret == -IPSET_ERR_EXIST; i++) {
- elem = list_set_elem(map, i);
- if (elem->id == IPSET_INVALID_ID) {
- ret = before != 0 ? -IPSET_ERR_REF_EXIST
- : -IPSET_ERR_EXIST;
- break;
- } else if (elem->id == id &&
- (before == 0 ||
- (before > 0 && id_eq(map, i + 1, refid))))
- ret = list_set_del(map, i);
- else if (elem->id == refid &&
- before < 0 && id_eq(map, i + 1, id))
- ret = list_set_del(map, i + 1);
- }
- break;
- default:
- break;
- }
+ ret = adtfn(set, &e, &ext, &ext, flags);
finish:
- if (refid != IPSET_INVALID_ID)
- ip_set_put_byindex(refid);
+ if (e.refid != IPSET_INVALID_ID)
+ ip_set_put_byindex(e.refid);
if (adt != IPSET_ADD || ret)
- ip_set_put_byindex(id);
+ ip_set_put_byindex(e.id);
return ip_set_eexist(ret, flags) ? 0 : ret;
}
@@ -375,14 +450,14 @@ static void
list_set_flush(struct ip_set *set)
{
struct list_set *map = set->data;
- struct set_elem *elem;
+ struct set_elem *e;
u32 i;
for (i = 0; i < map->size; i++) {
- elem = list_set_elem(map, i);
- if (elem->id != IPSET_INVALID_ID) {
- ip_set_put_byindex(elem->id);
- elem->id = IPSET_INVALID_ID;
+ e = list_set_elem(map, i);
+ if (e->id != IPSET_INVALID_ID) {
+ ip_set_put_byindex(e->id);
+ e->id = IPSET_INVALID_ID;
}
}
}
@@ -392,7 +467,7 @@ list_set_destroy(struct ip_set *set)
{
struct list_set *map = set->data;
- if (with_timeout(map->timeout))
+ if (SET_WITH_TIMEOUT(set))
del_timer_sync(&map->gc);
list_set_flush(set);
kfree(map);
@@ -410,8 +485,11 @@ list_set_head(struct ip_set *set, struct sk_buff *skb)
if (!nested)
goto nla_put_failure;
if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) ||
- (with_timeout(map->timeout) &&
+ (SET_WITH_TIMEOUT(set) &&
nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) ||
+ (SET_WITH_COUNTER(set) &&
+ nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS,
+ htonl(IPSET_FLAG_WITH_COUNTERS))) ||
nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
htonl(sizeof(*map) + map->size * map->dsize)))
@@ -440,7 +518,8 @@ list_set_list(const struct ip_set *set,
e = list_set_elem(map, i);
if (e->id == IPSET_INVALID_ID)
goto finish;
- if (with_timeout(map->timeout) && list_set_expired(map, i))
+ if (SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(e, map)))
continue;
nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
if (!nested) {
@@ -453,13 +532,14 @@ list_set_list(const struct ip_set *set,
if (nla_put_string(skb, IPSET_ATTR_NAME,
ip_set_name_byindex(e->id)))
goto nla_put_failure;
- if (with_timeout(map->timeout)) {
- const struct set_telem *te =
- (const struct set_telem *) e;
- __be32 to = htonl(ip_set_timeout_get(te->timeout));
- if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, to))
- goto nla_put_failure;
- }
+ if (SET_WITH_TIMEOUT(set) &&
+ nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
+ htonl(ip_set_timeout_get(
+ ext_timeout(e, map)))))
+ goto nla_put_failure;
+ if (SET_WITH_COUNTER(set) &&
+ ip_set_put_counter(skb, ext_counter(e, map)))
+ goto nla_put_failure;
ipset_nest_end(skb, nested);
}
finish:
@@ -485,12 +565,18 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b)
const struct list_set *y = b->data;
return x->size == y->size &&
- x->timeout == y->timeout;
+ x->timeout == y->timeout &&
+ a->extensions == b->extensions;
}
-static const struct ip_set_type_variant list_set = {
+static const struct ip_set_type_variant set_variant = {
.kadt = list_set_kadt,
.uadt = list_set_uadt,
+ .adt = {
+ [IPSET_ADD] = list_set_uadd,
+ [IPSET_DEL] = list_set_udel,
+ [IPSET_TEST] = list_set_utest,
+ },
.destroy = list_set_destroy,
.flush = list_set_flush,
.head = list_set_head,
@@ -505,7 +591,7 @@ list_set_gc(unsigned long ul_set)
struct list_set *map = set->data;
write_lock_bh(&set->lock);
- cleanup_entries(map);
+ set_cleanup_entries(set);
write_unlock_bh(&set->lock);
map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
@@ -513,20 +599,20 @@ list_set_gc(unsigned long ul_set)
}
static void
-list_set_gc_init(struct ip_set *set)
+list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
{
struct list_set *map = set->data;
init_timer(&map->gc);
map->gc.data = (unsigned long) set;
- map->gc.function = list_set_gc;
+ map->gc.function = gc;
map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
add_timer(&map->gc);
}
/* Create list:set type of sets */
-static bool
+static struct list_set *
init_list_set(struct ip_set *set, u32 size, size_t dsize,
unsigned long timeout)
{
@@ -536,7 +622,7 @@ init_list_set(struct ip_set *set, u32 size, size_t dsize,
map = kzalloc(sizeof(*map) + size * dsize, GFP_KERNEL);
if (!map)
- return false;
+ return NULL;
map->size = size;
map->dsize = dsize;
@@ -548,16 +634,19 @@ init_list_set(struct ip_set *set, u32 size, size_t dsize,
e->id = IPSET_INVALID_ID;
}
- return true;
+ return map;
}
static int
list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
{
- u32 size = IP_SET_LIST_DEFAULT_SIZE;
+ struct list_set *map;
+ u32 size = IP_SET_LIST_DEFAULT_SIZE, cadt_flags = 0;
+ unsigned long timeout = 0;
if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_SIZE) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_SIZE])
@@ -565,18 +654,46 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
if (size < IP_SET_LIST_MIN_SIZE)
size = IP_SET_LIST_MIN_SIZE;
- if (tb[IPSET_ATTR_TIMEOUT]) {
- if (!init_list_set(set, size, sizeof(struct set_telem),
- ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT])))
+ if (tb[IPSET_ATTR_CADT_FLAGS])
+ cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+ if (tb[IPSET_ATTR_TIMEOUT])
+ timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+ set->variant = &set_variant;
+ if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
+ set->extensions |= IPSET_EXT_COUNTER;
+ if (tb[IPSET_ATTR_TIMEOUT]) {
+ map = init_list_set(set, size,
+ sizeof(struct setct_elem), timeout);
+ if (!map)
+ return -ENOMEM;
+ set->extensions |= IPSET_EXT_TIMEOUT;
+ map->offset[IPSET_OFFSET_TIMEOUT] =
+ offsetof(struct setct_elem, timeout);
+ map->offset[IPSET_OFFSET_COUNTER] =
+ offsetof(struct setct_elem, counter);
+ list_set_gc_init(set, list_set_gc);
+ } else {
+ map = init_list_set(set, size,
+ sizeof(struct setc_elem), 0);
+ if (!map)
+ return -ENOMEM;
+ map->offset[IPSET_OFFSET_COUNTER] =
+ offsetof(struct setc_elem, counter);
+ }
+ } else if (tb[IPSET_ATTR_TIMEOUT]) {
+ map = init_list_set(set, size,
+ sizeof(struct sett_elem), timeout);
+ if (!map)
return -ENOMEM;
-
- list_set_gc_init(set);
+ set->extensions |= IPSET_EXT_TIMEOUT;
+ map->offset[IPSET_OFFSET_TIMEOUT] =
+ offsetof(struct sett_elem, timeout);
+ list_set_gc_init(set, list_set_gc);
} else {
- if (!init_list_set(set, size, sizeof(struct set_elem),
- IPSET_NO_TIMEOUT))
+ map = init_list_set(set, size, sizeof(struct set_elem), 0);
+ if (!map)
return -ENOMEM;
}
- set->variant = &list_set;
return 0;
}
@@ -592,6 +709,7 @@ static struct ip_set_type list_set_type __read_mostly = {
.create_policy = {
[IPSET_ATTR_SIZE] = { .type = NLA_U32 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
},
.adt_policy = {
[IPSET_ATTR_NAME] = { .type = NLA_STRING,
@@ -601,6 +719,8 @@ static struct ip_set_type list_set_type __read_mostly = {
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
+ [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
+ [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 0b779d7df881..dfd7b65b3d2a 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -58,6 +58,18 @@ static inline void ip_vs_app_put(struct ip_vs_app *app)
module_put(app->module);
}
+static void ip_vs_app_inc_destroy(struct ip_vs_app *inc)
+{
+ kfree(inc->timeout_table);
+ kfree(inc);
+}
+
+static void ip_vs_app_inc_rcu_free(struct rcu_head *head)
+{
+ struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head);
+
+ ip_vs_app_inc_destroy(inc);
+}
/*
* Allocate/initialize app incarnation and register it in proto apps.
@@ -106,8 +118,7 @@ ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
return 0;
out:
- kfree(inc->timeout_table);
- kfree(inc);
+ ip_vs_app_inc_destroy(inc);
return ret;
}
@@ -131,8 +142,7 @@ ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
list_del(&inc->a_list);
- kfree(inc->timeout_table);
- kfree(inc);
+ call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free);
}
@@ -144,9 +154,9 @@ int ip_vs_app_inc_get(struct ip_vs_app *inc)
{
int result;
- atomic_inc(&inc->usecnt);
- if (unlikely((result = ip_vs_app_get(inc->app)) != 1))
- atomic_dec(&inc->usecnt);
+ result = ip_vs_app_get(inc->app);
+ if (result)
+ atomic_inc(&inc->usecnt);
return result;
}
@@ -156,8 +166,8 @@ int ip_vs_app_inc_get(struct ip_vs_app *inc)
*/
void ip_vs_app_inc_put(struct ip_vs_app *inc)
{
- ip_vs_app_put(inc->app);
atomic_dec(&inc->usecnt);
+ ip_vs_app_put(inc->app);
}
@@ -218,6 +228,7 @@ out_unlock:
/*
* ip_vs_app unregistration routine
* We are sure there are no app incarnations attached to services
+ * Caller should use synchronize_rcu() or rcu_barrier()
*/
void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
{
@@ -341,14 +352,14 @@ static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
unsigned int flag, __u32 seq, int diff)
{
/* spinlock is to keep updating cp->flags atomic */
- spin_lock(&cp->lock);
+ spin_lock_bh(&cp->lock);
if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
vseq->previous_delta = vseq->delta;
vseq->delta += diff;
vseq->init_seq = seq;
cp->flags |= flag;
}
- spin_unlock(&cp->lock);
+ spin_unlock_bh(&cp->lock);
}
static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 704e514e02ab..a083bda322b6 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -79,51 +79,21 @@ static unsigned int ip_vs_conn_rnd __read_mostly;
struct ip_vs_aligned_lock
{
- rwlock_t l;
+ spinlock_t l;
} __attribute__((__aligned__(SMP_CACHE_BYTES)));
/* lock array for conn table */
static struct ip_vs_aligned_lock
__ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned;
-static inline void ct_read_lock(unsigned int key)
-{
- read_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-
-static inline void ct_read_unlock(unsigned int key)
-{
- read_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-
-static inline void ct_write_lock(unsigned int key)
-{
- write_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-
-static inline void ct_write_unlock(unsigned int key)
-{
- write_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-
-static inline void ct_read_lock_bh(unsigned int key)
-{
- read_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-
-static inline void ct_read_unlock_bh(unsigned int key)
-{
- read_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-
static inline void ct_write_lock_bh(unsigned int key)
{
- write_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+ spin_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
}
static inline void ct_write_unlock_bh(unsigned int key)
{
- write_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+ spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
}
@@ -197,13 +167,13 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
/* Hash by protocol, client address and port */
hash = ip_vs_conn_hashkey_conn(cp);
- ct_write_lock(hash);
+ ct_write_lock_bh(hash);
spin_lock(&cp->lock);
if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
- hlist_add_head(&cp->c_list, &ip_vs_conn_tab[hash]);
cp->flags |= IP_VS_CONN_F_HASHED;
atomic_inc(&cp->refcnt);
+ hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]);
ret = 1;
} else {
pr_err("%s(): request for already hashed, called from %pF\n",
@@ -212,7 +182,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
}
spin_unlock(&cp->lock);
- ct_write_unlock(hash);
+ ct_write_unlock_bh(hash);
return ret;
}
@@ -220,7 +190,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
/*
* UNhashes ip_vs_conn from ip_vs_conn_tab.
- * returns bool success.
+ * returns bool success. Caller should hold conn reference.
*/
static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
{
@@ -230,11 +200,11 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
/* unhash it and decrease its reference counter */
hash = ip_vs_conn_hashkey_conn(cp);
- ct_write_lock(hash);
+ ct_write_lock_bh(hash);
spin_lock(&cp->lock);
if (cp->flags & IP_VS_CONN_F_HASHED) {
- hlist_del(&cp->c_list);
+ hlist_del_rcu(&cp->c_list);
cp->flags &= ~IP_VS_CONN_F_HASHED;
atomic_dec(&cp->refcnt);
ret = 1;
@@ -242,7 +212,37 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
ret = 0;
spin_unlock(&cp->lock);
- ct_write_unlock(hash);
+ ct_write_unlock_bh(hash);
+
+ return ret;
+}
+
+/* Try to unlink ip_vs_conn from ip_vs_conn_tab.
+ * returns bool success.
+ */
+static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp)
+{
+ unsigned int hash;
+ bool ret;
+
+ hash = ip_vs_conn_hashkey_conn(cp);
+
+ ct_write_lock_bh(hash);
+ spin_lock(&cp->lock);
+
+ if (cp->flags & IP_VS_CONN_F_HASHED) {
+ ret = false;
+ /* Decrease refcnt and unlink conn only if we are last user */
+ if (atomic_cmpxchg(&cp->refcnt, 1, 0) == 1) {
+ hlist_del_rcu(&cp->c_list);
+ cp->flags &= ~IP_VS_CONN_F_HASHED;
+ ret = true;
+ }
+ } else
+ ret = atomic_read(&cp->refcnt) ? false : true;
+
+ spin_unlock(&cp->lock);
+ ct_write_unlock_bh(hash);
return ret;
}
@@ -262,24 +262,25 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
hash = ip_vs_conn_hashkey_param(p, false);
- ct_read_lock(hash);
+ rcu_read_lock();
- hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
- if (cp->af == p->af &&
- p->cport == cp->cport && p->vport == cp->vport &&
+ hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
+ if (p->cport == cp->cport && p->vport == cp->vport &&
+ cp->af == p->af &&
ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
p->protocol == cp->protocol &&
ip_vs_conn_net_eq(cp, p->net)) {
+ if (!__ip_vs_conn_get(cp))
+ continue;
/* HIT */
- atomic_inc(&cp->refcnt);
- ct_read_unlock(hash);
+ rcu_read_unlock();
return cp;
}
}
- ct_read_unlock(hash);
+ rcu_read_unlock();
return NULL;
}
@@ -346,14 +347,16 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
hash = ip_vs_conn_hashkey_param(p, false);
- ct_read_lock(hash);
+ rcu_read_lock();
- hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
- if (!ip_vs_conn_net_eq(cp, p->net))
- continue;
- if (p->pe_data && p->pe->ct_match) {
- if (p->pe == cp->pe && p->pe->ct_match(p, cp))
- goto out;
+ hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
+ if (unlikely(p->pe_data && p->pe->ct_match)) {
+ if (!ip_vs_conn_net_eq(cp, p->net))
+ continue;
+ if (p->pe == cp->pe && p->pe->ct_match(p, cp)) {
+ if (__ip_vs_conn_get(cp))
+ goto out;
+ }
continue;
}
@@ -363,17 +366,18 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
* p->vaddr is a fwmark */
ip_vs_addr_equal(p->protocol == IPPROTO_IP ? AF_UNSPEC :
p->af, p->vaddr, &cp->vaddr) &&
- p->cport == cp->cport && p->vport == cp->vport &&
+ p->vport == cp->vport && p->cport == cp->cport &&
cp->flags & IP_VS_CONN_F_TEMPLATE &&
- p->protocol == cp->protocol)
- goto out;
+ p->protocol == cp->protocol &&
+ ip_vs_conn_net_eq(cp, p->net)) {
+ if (__ip_vs_conn_get(cp))
+ goto out;
+ }
}
cp = NULL;
out:
- if (cp)
- atomic_inc(&cp->refcnt);
- ct_read_unlock(hash);
+ rcu_read_unlock();
IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n",
ip_vs_proto_name(p->protocol),
@@ -398,23 +402,24 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
*/
hash = ip_vs_conn_hashkey_param(p, true);
- ct_read_lock(hash);
+ rcu_read_lock();
- hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
- if (cp->af == p->af &&
- p->vport == cp->cport && p->cport == cp->dport &&
+ hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
+ if (p->vport == cp->cport && p->cport == cp->dport &&
+ cp->af == p->af &&
ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
p->protocol == cp->protocol &&
ip_vs_conn_net_eq(cp, p->net)) {
+ if (!__ip_vs_conn_get(cp))
+ continue;
/* HIT */
- atomic_inc(&cp->refcnt);
ret = cp;
break;
}
}
- ct_read_unlock(hash);
+ rcu_read_unlock();
IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n",
ip_vs_proto_name(p->protocol),
@@ -457,13 +462,13 @@ void ip_vs_conn_put(struct ip_vs_conn *cp)
void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
{
if (ip_vs_conn_unhash(cp)) {
- spin_lock(&cp->lock);
+ spin_lock_bh(&cp->lock);
if (cp->flags & IP_VS_CONN_F_NO_CPORT) {
atomic_dec(&ip_vs_conn_no_cport_cnt);
cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
cp->cport = cport;
}
- spin_unlock(&cp->lock);
+ spin_unlock_bh(&cp->lock);
/* hash on new dport */
ip_vs_conn_hash(cp);
@@ -549,7 +554,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
return;
/* Increase the refcnt counter of the dest */
- atomic_inc(&dest->refcnt);
+ ip_vs_dest_hold(dest);
conn_flags = atomic_read(&dest->conn_flags);
if (cp->protocol != IPPROTO_UDP)
@@ -606,20 +611,22 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
* Check if there is a destination for the connection, if so
* bind the connection to the destination.
*/
-struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
+void ip_vs_try_bind_dest(struct ip_vs_conn *cp)
{
struct ip_vs_dest *dest;
+ rcu_read_lock();
dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
cp->dport, &cp->vaddr, cp->vport,
cp->protocol, cp->fwmark, cp->flags);
if (dest) {
struct ip_vs_proto_data *pd;
- spin_lock(&cp->lock);
+ spin_lock_bh(&cp->lock);
if (cp->dest) {
- spin_unlock(&cp->lock);
- return dest;
+ spin_unlock_bh(&cp->lock);
+ rcu_read_unlock();
+ return;
}
/* Applications work depending on the forwarding method
@@ -628,7 +635,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
ip_vs_unbind_app(cp);
ip_vs_bind_dest(cp, dest);
- spin_unlock(&cp->lock);
+ spin_unlock_bh(&cp->lock);
/* Update its packet transmitter */
cp->packet_xmit = NULL;
@@ -643,7 +650,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
if (pd && atomic_read(&pd->appcnt))
ip_vs_bind_app(cp, pd->pp);
}
- return dest;
+ rcu_read_unlock();
}
@@ -695,12 +702,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
}
- /*
- * Simply decrease the refcnt of the dest, because the
- * dest will be either in service's destination list
- * or in the trash.
- */
- atomic_dec(&dest->refcnt);
+ ip_vs_dest_put(dest);
}
static int expire_quiescent_template(struct netns_ipvs *ipvs,
@@ -757,41 +759,36 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
* Simply decrease the refcnt of the template,
* don't restart its timer.
*/
- atomic_dec(&ct->refcnt);
+ __ip_vs_conn_put(ct);
return 0;
}
return 1;
}
+static void ip_vs_conn_rcu_free(struct rcu_head *head)
+{
+ struct ip_vs_conn *cp = container_of(head, struct ip_vs_conn,
+ rcu_head);
+
+ ip_vs_pe_put(cp->pe);
+ kfree(cp->pe_data);
+ kmem_cache_free(ip_vs_conn_cachep, cp);
+}
+
static void ip_vs_conn_expire(unsigned long data)
{
struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
struct net *net = ip_vs_conn_net(cp);
struct netns_ipvs *ipvs = net_ipvs(net);
- cp->timeout = 60*HZ;
-
- /*
- * hey, I'm using it
- */
- atomic_inc(&cp->refcnt);
-
/*
* do I control anybody?
*/
if (atomic_read(&cp->n_control))
goto expire_later;
- /*
- * unhash it if it is hashed in the conn table
- */
- if (!ip_vs_conn_unhash(cp) && !(cp->flags & IP_VS_CONN_F_ONE_PACKET))
- goto expire_later;
-
- /*
- * refcnt==1 implies I'm the only one referrer
- */
- if (likely(atomic_read(&cp->refcnt) == 1)) {
+ /* Unlink conn if not referenced anymore */
+ if (likely(ip_vs_conn_unlink(cp))) {
/* delete the timer if it is activated by other users */
del_timer(&cp->timer);
@@ -810,38 +807,41 @@ static void ip_vs_conn_expire(unsigned long data)
ip_vs_conn_drop_conntrack(cp);
}
- ip_vs_pe_put(cp->pe);
- kfree(cp->pe_data);
if (unlikely(cp->app != NULL))
ip_vs_unbind_app(cp);
ip_vs_unbind_dest(cp);
if (cp->flags & IP_VS_CONN_F_NO_CPORT)
atomic_dec(&ip_vs_conn_no_cport_cnt);
+ call_rcu(&cp->rcu_head, ip_vs_conn_rcu_free);
atomic_dec(&ipvs->conn_count);
-
- kmem_cache_free(ip_vs_conn_cachep, cp);
return;
}
- /* hash it back to the table */
- ip_vs_conn_hash(cp);
-
expire_later:
- IP_VS_DBG(7, "delayed: conn->refcnt-1=%d conn->n_control=%d\n",
- atomic_read(&cp->refcnt)-1,
+ IP_VS_DBG(7, "delayed: conn->refcnt=%d conn->n_control=%d\n",
+ atomic_read(&cp->refcnt),
atomic_read(&cp->n_control));
+ atomic_inc(&cp->refcnt);
+ cp->timeout = 60*HZ;
+
if (ipvs->sync_state & IP_VS_STATE_MASTER)
ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs));
ip_vs_conn_put(cp);
}
-
+/* Modify timer, so that it expires as soon as possible.
+ * Can be called without reference only if under RCU lock.
+ */
void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
{
- if (del_timer(&cp->timer))
- mod_timer(&cp->timer, jiffies);
+ /* Using mod_timer_pending will ensure the timer is not
+ * modified after the final del_timer in ip_vs_conn_expire.
+ */
+ if (timer_pending(&cp->timer) &&
+ time_after(cp->timer.expires, jiffies))
+ mod_timer_pending(&cp->timer, jiffies);
}
@@ -858,7 +858,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net,
p->protocol);
- cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
+ cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);
if (cp == NULL) {
IP_VS_ERR_RL("%s(): no memory\n", __func__);
return NULL;
@@ -869,13 +869,13 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
ip_vs_conn_net_set(cp, p->net);
cp->af = p->af;
cp->protocol = p->protocol;
- ip_vs_addr_copy(p->af, &cp->caddr, p->caddr);
+ ip_vs_addr_set(p->af, &cp->caddr, p->caddr);
cp->cport = p->cport;
- ip_vs_addr_copy(p->af, &cp->vaddr, p->vaddr);
+ ip_vs_addr_set(p->af, &cp->vaddr, p->vaddr);
cp->vport = p->vport;
/* proto should only be IPPROTO_IP if d_addr is a fwmark */
- ip_vs_addr_copy(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
- &cp->daddr, daddr);
+ ip_vs_addr_set(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
+ &cp->daddr, daddr);
cp->dport = dport;
cp->flags = flags;
cp->fwmark = fwmark;
@@ -884,6 +884,10 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
cp->pe = p->pe;
cp->pe_data = p->pe_data;
cp->pe_data_len = p->pe_data_len;
+ } else {
+ cp->pe = NULL;
+ cp->pe_data = NULL;
+ cp->pe_data_len = 0;
}
spin_lock_init(&cp->lock);
@@ -894,18 +898,28 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
*/
atomic_set(&cp->refcnt, 1);
+ cp->control = NULL;
atomic_set(&cp->n_control, 0);
atomic_set(&cp->in_pkts, 0);
+ cp->packet_xmit = NULL;
+ cp->app = NULL;
+ cp->app_data = NULL;
+ /* reset struct ip_vs_seq */
+ cp->in_seq.delta = 0;
+ cp->out_seq.delta = 0;
+
atomic_inc(&ipvs->conn_count);
if (flags & IP_VS_CONN_F_NO_CPORT)
atomic_inc(&ip_vs_conn_no_cport_cnt);
/* Bind the connection with a destination server */
+ cp->dest = NULL;
ip_vs_bind_dest(cp, dest);
/* Set its state and timeout */
cp->state = 0;
+ cp->old_state = 0;
cp->timeout = 3*HZ;
cp->sync_endtime = jiffies & ~3UL;
@@ -952,24 +966,29 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
struct ip_vs_iter_state *iter = seq->private;
for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
- ct_read_lock_bh(idx);
- hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
+ hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
+ /* __ip_vs_conn_get() is not needed by
+ * ip_vs_conn_seq_show and ip_vs_conn_sync_seq_show
+ */
if (pos-- == 0) {
iter->l = &ip_vs_conn_tab[idx];
return cp;
}
}
- ct_read_unlock_bh(idx);
+ rcu_read_unlock();
+ rcu_read_lock();
}
return NULL;
}
static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(RCU)
{
struct ip_vs_iter_state *iter = seq->private;
iter->l = NULL;
+ rcu_read_lock();
return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
}
@@ -977,6 +996,7 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct ip_vs_conn *cp = v;
struct ip_vs_iter_state *iter = seq->private;
+ struct hlist_node *e;
struct hlist_head *l = iter->l;
int idx;
@@ -985,31 +1005,27 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
return ip_vs_conn_array(seq, 0);
/* more on same hash chain? */
- if (cp->c_list.next)
- return hlist_entry(cp->c_list.next, struct ip_vs_conn, c_list);
+ e = rcu_dereference(hlist_next_rcu(&cp->c_list));
+ if (e)
+ return hlist_entry(e, struct ip_vs_conn, c_list);
idx = l - ip_vs_conn_tab;
- ct_read_unlock_bh(idx);
-
while (++idx < ip_vs_conn_tab_size) {
- ct_read_lock_bh(idx);
- hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
+ hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
iter->l = &ip_vs_conn_tab[idx];
return cp;
}
- ct_read_unlock_bh(idx);
+ rcu_read_unlock();
+ rcu_read_lock();
}
iter->l = NULL;
return NULL;
}
static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
+ __releases(RCU)
{
- struct ip_vs_iter_state *iter = seq->private;
- struct hlist_head *l = iter->l;
-
- if (l)
- ct_read_unlock_bh(l - ip_vs_conn_tab);
+ rcu_read_unlock();
}
static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
@@ -1188,7 +1204,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
void ip_vs_random_dropentry(struct net *net)
{
int idx;
- struct ip_vs_conn *cp;
+ struct ip_vs_conn *cp, *cp_c;
/*
* Randomly scan 1/32 of the whole table every second
@@ -1199,9 +1215,9 @@ void ip_vs_random_dropentry(struct net *net)
/*
* Lock is actually needed in this loop.
*/
- ct_write_lock_bh(hash);
+ rcu_read_lock();
- hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+ hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
if (cp->flags & IP_VS_CONN_F_TEMPLATE)
/* connection template */
continue;
@@ -1228,12 +1244,15 @@ void ip_vs_random_dropentry(struct net *net)
IP_VS_DBG(4, "del connection\n");
ip_vs_conn_expire_now(cp);
- if (cp->control) {
+ cp_c = cp->control;
+ /* cp->control is valid only with reference to cp */
+ if (cp_c && __ip_vs_conn_get(cp)) {
IP_VS_DBG(4, "del conn template\n");
- ip_vs_conn_expire_now(cp->control);
+ ip_vs_conn_expire_now(cp_c);
+ __ip_vs_conn_put(cp);
}
}
- ct_write_unlock_bh(hash);
+ rcu_read_unlock();
}
}
@@ -1244,7 +1263,7 @@ void ip_vs_random_dropentry(struct net *net)
static void ip_vs_conn_flush(struct net *net)
{
int idx;
- struct ip_vs_conn *cp;
+ struct ip_vs_conn *cp, *cp_c;
struct netns_ipvs *ipvs = net_ipvs(net);
flush_again:
@@ -1252,19 +1271,22 @@ flush_again:
/*
* Lock is actually needed in this loop.
*/
- ct_write_lock_bh(idx);
+ rcu_read_lock();
- hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
+ hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
if (!ip_vs_conn_net_eq(cp, net))
continue;
IP_VS_DBG(4, "del connection\n");
ip_vs_conn_expire_now(cp);
- if (cp->control) {
+ cp_c = cp->control;
+ /* cp->control is valid only with reference to cp */
+ if (cp_c && __ip_vs_conn_get(cp)) {
IP_VS_DBG(4, "del conn template\n");
- ip_vs_conn_expire_now(cp->control);
+ ip_vs_conn_expire_now(cp_c);
+ __ip_vs_conn_put(cp);
}
}
- ct_write_unlock_bh(idx);
+ rcu_read_unlock();
}
/* the counter may be not NULL, because maybe some conn entries
@@ -1331,7 +1353,7 @@ int __init ip_vs_conn_init(void)
INIT_HLIST_HEAD(&ip_vs_conn_tab[idx]);
for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++) {
- rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
+ spin_lock_init(&__ip_vs_conntbl_lock_array[idx].l);
}
/* calculate the random value for connection hash */
@@ -1342,6 +1364,8 @@ int __init ip_vs_conn_init(void)
void ip_vs_conn_cleanup(void)
{
+ /* Wait all ip_vs_conn_rcu_free() callbacks to complete */
+ rcu_barrier();
/* Release the empty cache */
kmem_cache_destroy(ip_vs_conn_cachep);
vfree(ip_vs_conn_tab);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 61f49d241712..23b8eb53a569 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -69,10 +69,7 @@ EXPORT_SYMBOL(ip_vs_conn_put);
EXPORT_SYMBOL(ip_vs_get_debug_level);
#endif
-int ip_vs_net_id __read_mostly;
-#ifdef IP_VS_GENERIC_NETNS
-EXPORT_SYMBOL(ip_vs_net_id);
-#endif
+static int ip_vs_net_id __read_mostly;
/* netns cnt used for uniqueness */
static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);
@@ -206,7 +203,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
{
ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
vport, p);
- p->pe = svc->pe;
+ p->pe = rcu_dereference(svc->pe);
if (p->pe && p->pe->fill_param)
return p->pe->fill_param(p, skb);
@@ -238,7 +235,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
/* Mask saddr with the netmask to adjust template granularity */
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6)
- ipv6_addr_prefix(&snet.in6, &iph->saddr.in6, svc->netmask);
+ ipv6_addr_prefix(&snet.in6, &iph->saddr.in6,
+ (__force __u32) svc->netmask);
else
#endif
snet.ip = iph->saddr.ip & svc->netmask;
@@ -299,12 +297,15 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
/* Check if a template already exists */
ct = ip_vs_ct_in_get(&param);
if (!ct || !ip_vs_check_template(ct)) {
+ struct ip_vs_scheduler *sched;
+
/*
* No template found or the dest of the connection
* template is not available.
* return *ignored=0 i.e. ICMP and NF_DROP
*/
- dest = svc->scheduler->schedule(svc, skb);
+ sched = rcu_dereference(svc->scheduler);
+ dest = sched->schedule(svc, skb);
if (!dest) {
IP_VS_DBG(1, "p-schedule: no dest found.\n");
kfree(param.pe_data);
@@ -394,6 +395,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
{
struct ip_vs_protocol *pp = pd->pp;
struct ip_vs_conn *cp = NULL;
+ struct ip_vs_scheduler *sched;
struct ip_vs_dest *dest;
__be16 _ports[2], *pptr;
unsigned int flags;
@@ -449,7 +451,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
return NULL;
}
- dest = svc->scheduler->schedule(svc, skb);
+ sched = rcu_dereference(svc->scheduler);
+ dest = sched->schedule(svc, skb);
if (dest == NULL) {
IP_VS_DBG(1, "Schedule: no dest found.\n");
return NULL;
@@ -507,7 +510,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
if (pptr == NULL) {
- ip_vs_service_put(svc);
return NF_DROP;
}
@@ -533,8 +535,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
IP_VS_CONN_F_ONE_PACKET : 0;
union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } };
- ip_vs_service_put(svc);
-
/* create a new connection entry */
IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
{
@@ -571,12 +571,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
* listed in the ipvs table), pass the packets, because it is
* not ipvs job to decide to drop the packets.
*/
- if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) {
- ip_vs_service_put(svc);
+ if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT))
return NF_ACCEPT;
- }
-
- ip_vs_service_put(svc);
/*
* Notify the client that the destination is unreachable, and
@@ -588,9 +584,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6) {
if (!skb->dev) {
- struct net *net = dev_net(skb_dst(skb)->dev);
+ struct net *net_ = dev_net(skb_dst(skb)->dev);
- skb->dev = net->loopback_dev;
+ skb->dev = net_->loopback_dev;
}
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
} else
@@ -643,8 +639,11 @@ static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum)
static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
{
- int err = ip_defrag(skb, user);
+ int err;
+ local_bh_disable();
+ err = ip_defrag(skb, user);
+ local_bh_enable();
if (!err)
ip_send_check(ip_hdr(skb));
@@ -1002,6 +1001,32 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
return th->rst;
}
+static inline bool is_new_conn(const struct sk_buff *skb,
+ struct ip_vs_iphdr *iph)
+{
+ switch (iph->protocol) {
+ case IPPROTO_TCP: {
+ struct tcphdr _tcph, *th;
+
+ th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
+ if (th == NULL)
+ return false;
+ return th->syn;
+ }
+ case IPPROTO_SCTP: {
+ sctp_chunkhdr_t *sch, schunk;
+
+ sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t),
+ sizeof(schunk), &schunk);
+ if (sch == NULL)
+ return false;
+ return sch->type == SCTP_CID_INIT;
+ }
+ default:
+ return false;
+ }
+}
+
/* Handle response packets: rewrite addresses and send away...
*/
static unsigned int
@@ -1164,9 +1189,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
sizeof(_ports), _ports, &iph);
if (pptr == NULL)
return NF_ACCEPT; /* Not for me */
- if (ip_vs_lookup_real_service(net, af, iph.protocol,
- &iph.saddr,
- pptr[0])) {
+ if (ip_vs_has_real_service(net, af, iph.protocol, &iph.saddr,
+ pptr[0])) {
/*
* Notify the real server: there is no
* existing entry if it is not RST
@@ -1181,9 +1205,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
iph.len)))) {
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
- struct net *net =
- dev_net(skb_dst(skb)->dev);
-
if (!skb->dev)
skb->dev = net->loopback_dev;
icmpv6_send(skb,
@@ -1226,13 +1247,7 @@ ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- unsigned int verdict;
-
- /* Disable BH in LOCAL_OUT until all places are fixed */
- local_bh_disable();
- verdict = ip_vs_out(hooknum, skb, AF_INET);
- local_bh_enable();
- return verdict;
+ return ip_vs_out(hooknum, skb, AF_INET);
}
#ifdef CONFIG_IP_VS_IPV6
@@ -1259,13 +1274,7 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- unsigned int verdict;
-
- /* Disable BH in LOCAL_OUT until all places are fixed */
- local_bh_disable();
- verdict = ip_vs_out(hooknum, skb, AF_INET6);
- local_bh_enable();
- return verdict;
+ return ip_vs_out(hooknum, skb, AF_INET6);
}
#endif
@@ -1401,10 +1410,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
goto ignore_ipip;
/* Prefer the resulting PMTU */
if (dest) {
- spin_lock(&dest->dst_lock);
- if (dest->dst_cache)
- mtu = dst_mtu(dest->dst_cache);
- spin_unlock(&dest->dst_lock);
+ struct ip_vs_dest_dst *dest_dst;
+
+ rcu_read_lock();
+ dest_dst = rcu_dereference(dest->dest_dst);
+ if (dest_dst)
+ mtu = dst_mtu(dest_dst->dst_cache);
+ rcu_read_unlock();
}
if (mtu > 68 + sizeof(struct iphdr))
mtu -= sizeof(struct iphdr);
@@ -1430,7 +1442,8 @@ ignore_ipip:
/* do the statistics and put it back */
ip_vs_in_stats(cp, skb);
- if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
+ if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol ||
+ IPPROTO_SCTP == cih->protocol)
offset += 2 * sizeof(__u16);
verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum, &ciph);
@@ -1626,6 +1639,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
* Check if the packet belongs to an existing connection entry
*/
cp = pp->conn_in_get(af, skb, &iph, 0);
+
+ if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp && cp->dest &&
+ unlikely(!atomic_read(&cp->dest->weight)) && !iph.fragoffs &&
+ is_new_conn(skb, &iph)) {
+ ip_vs_conn_expire_now(cp);
+ __ip_vs_conn_put(cp);
+ cp = NULL;
+ }
+
if (unlikely(!cp) && !iph.fragoffs) {
/* No (second) fragments need to enter here, as nf_defrag_ipv6
* replayed fragment zero will already have created the cp
@@ -1720,13 +1742,7 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- unsigned int verdict;
-
- /* Disable BH in LOCAL_OUT until all places are fixed */
- local_bh_disable();
- verdict = ip_vs_in(hooknum, skb, AF_INET);
- local_bh_enable();
- return verdict;
+ return ip_vs_in(hooknum, skb, AF_INET);
}
#ifdef CONFIG_IP_VS_IPV6
@@ -1785,13 +1801,7 @@ ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- unsigned int verdict;
-
- /* Disable BH in LOCAL_OUT until all places are fixed */
- local_bh_disable();
- verdict = ip_vs_in(hooknum, skb, AF_INET6);
- local_bh_enable();
- return verdict;
+ return ip_vs_in(hooknum, skb, AF_INET6);
}
#endif
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 9e2d1cccd1eb..9e6c2a075a4c 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -55,9 +55,6 @@
/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
static DEFINE_MUTEX(__ip_vs_mutex);
-/* lock for service table */
-static DEFINE_RWLOCK(__ip_vs_svc_lock);
-
/* sysctl variables */
#ifdef CONFIG_IP_VS_DEBUG
@@ -71,7 +68,7 @@ int ip_vs_get_debug_level(void)
/* Protos */
-static void __ip_vs_del_service(struct ip_vs_service *svc);
+static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup);
#ifdef CONFIG_IP_VS_IPV6
@@ -257,9 +254,9 @@ ip_vs_use_count_dec(void)
#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
/* the service table hashed by <protocol, addr, port> */
-static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
+static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
/* the service table hashed by fwmark */
-static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
+static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
/*
@@ -271,16 +268,18 @@ ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
{
register unsigned int porth = ntohs(port);
__be32 addr_fold = addr->ip;
+ __u32 ahash;
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
addr_fold = addr->ip6[0]^addr->ip6[1]^
addr->ip6[2]^addr->ip6[3];
#endif
- addr_fold ^= ((size_t)net>>8);
+ ahash = ntohl(addr_fold);
+ ahash ^= ((size_t) net >> 8);
- return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
- & IP_VS_SVC_TAB_MASK;
+ return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) &
+ IP_VS_SVC_TAB_MASK;
}
/*
@@ -312,13 +311,13 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
*/
hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
&svc->addr, svc->port);
- list_add(&svc->s_list, &ip_vs_svc_table[hash]);
+ hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]);
} else {
/*
* Hash it by fwmark in svc_fwm_table
*/
hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
- list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
+ hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
}
svc->flags |= IP_VS_SVC_F_HASHED;
@@ -342,10 +341,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
if (svc->fwmark == 0) {
/* Remove it from the svc_table table */
- list_del(&svc->s_list);
+ hlist_del_rcu(&svc->s_list);
} else {
/* Remove it from the svc_fwm_table table */
- list_del(&svc->f_list);
+ hlist_del_rcu(&svc->f_list);
}
svc->flags &= ~IP_VS_SVC_F_HASHED;
@@ -367,7 +366,7 @@ __ip_vs_service_find(struct net *net, int af, __u16 protocol,
/* Check for "full" addressed entries */
hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
- list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
+ hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) {
if ((svc->af == af)
&& ip_vs_addr_equal(af, &svc->addr, vaddr)
&& (svc->port == vport)
@@ -394,7 +393,7 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
/* Check for fwmark addressed entries */
hash = ip_vs_svc_fwm_hashkey(net, fwmark);
- list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
+ hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) {
if (svc->fwmark == fwmark && svc->af == af
&& net_eq(svc->net, net)) {
/* HIT */
@@ -405,15 +404,14 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
return NULL;
}
+/* Find service, called under RCU lock */
struct ip_vs_service *
-ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
- const union nf_inet_addr *vaddr, __be16 vport)
+ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
+ const union nf_inet_addr *vaddr, __be16 vport)
{
struct ip_vs_service *svc;
struct netns_ipvs *ipvs = net_ipvs(net);
- read_lock(&__ip_vs_svc_lock);
-
/*
* Check the table hashed by fwmark first
*/
@@ -449,10 +447,6 @@ ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
}
out:
- if (svc)
- atomic_inc(&svc->usecnt);
- read_unlock(&__ip_vs_svc_lock);
-
IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
fwmark, ip_vs_proto_name(protocol),
IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
@@ -469,6 +463,13 @@ __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
dest->svc = svc;
}
+static void ip_vs_service_free(struct ip_vs_service *svc)
+{
+ if (svc->stats.cpustats)
+ free_percpu(svc->stats.cpustats);
+ kfree(svc);
+}
+
static void
__ip_vs_unbind_svc(struct ip_vs_dest *dest)
{
@@ -476,12 +477,11 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest)
dest->svc = NULL;
if (atomic_dec_and_test(&svc->refcnt)) {
- IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
+ IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
svc->fwmark,
IP_VS_DBG_ADDR(svc->af, &svc->addr),
- ntohs(svc->port), atomic_read(&svc->usecnt));
- free_percpu(svc->stats.cpustats);
- kfree(svc);
+ ntohs(svc->port));
+ ip_vs_service_free(svc);
}
}
@@ -506,17 +506,13 @@ static inline unsigned int ip_vs_rs_hashkey(int af,
& IP_VS_RTAB_MASK;
}
-/*
- * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
- * should be called with locked tables.
- */
-static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
+/* Hash ip_vs_dest in rs_table by <proto,addr,port>. */
+static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
{
unsigned int hash;
- if (!list_empty(&dest->d_list)) {
- return 0;
- }
+ if (dest->in_rs_table)
+ return;
/*
* Hash by proto,addr,port,
@@ -524,64 +520,51 @@ static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
*/
hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
- list_add(&dest->d_list, &ipvs->rs_table[hash]);
-
- return 1;
+ hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]);
+ dest->in_rs_table = 1;
}
-/*
- * UNhashes ip_vs_dest from rs_table.
- * should be called with locked tables.
- */
-static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
+/* Unhash ip_vs_dest from rs_table. */
+static void ip_vs_rs_unhash(struct ip_vs_dest *dest)
{
/*
* Remove it from the rs_table table.
*/
- if (!list_empty(&dest->d_list)) {
- list_del_init(&dest->d_list);
+ if (dest->in_rs_table) {
+ hlist_del_rcu(&dest->d_list);
+ dest->in_rs_table = 0;
}
-
- return 1;
}
-/*
- * Lookup real service by <proto,addr,port> in the real service table.
- */
-struct ip_vs_dest *
-ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
- const union nf_inet_addr *daddr,
- __be16 dport)
+/* Check if real service by <proto,addr,port> is present */
+bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
+ const union nf_inet_addr *daddr, __be16 dport)
{
struct netns_ipvs *ipvs = net_ipvs(net);
unsigned int hash;
struct ip_vs_dest *dest;
- /*
- * Check for "full" addressed entries
- * Return the first found entry
- */
+ /* Check for "full" addressed entries */
hash = ip_vs_rs_hashkey(af, daddr, dport);
- read_lock(&ipvs->rs_lock);
- list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
- if ((dest->af == af)
- && ip_vs_addr_equal(af, &dest->addr, daddr)
- && (dest->port == dport)
- && ((dest->protocol == protocol) ||
- dest->vfwmark)) {
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
+ if (dest->port == dport &&
+ dest->af == af &&
+ ip_vs_addr_equal(af, &dest->addr, daddr) &&
+ (dest->protocol == protocol || dest->vfwmark)) {
/* HIT */
- read_unlock(&ipvs->rs_lock);
- return dest;
+ rcu_read_unlock();
+ return true;
}
}
- read_unlock(&ipvs->rs_lock);
+ rcu_read_unlock();
- return NULL;
+ return false;
}
-/*
- * Lookup destination by {addr,port} in the given service
+/* Lookup destination by {addr,port} in the given service
+ * Called under RCU lock.
*/
static struct ip_vs_dest *
ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
@@ -592,7 +575,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
/*
* Find the destination for the given service
*/
- list_for_each_entry(dest, &svc->destinations, n_list) {
+ list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
if ((dest->af == svc->af)
&& ip_vs_addr_equal(svc->af, &dest->addr, daddr)
&& (dest->port == dport)) {
@@ -606,13 +589,11 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
/*
* Find destination by {daddr,dport,vaddr,protocol}
- * Cretaed to be used in ip_vs_process_message() in
+ * Created to be used in ip_vs_process_message() in
* the backup synchronization daemon. It finds the
* destination to be bound to the received connection
* on the backup.
- *
- * ip_vs_lookup_real_service() looked promissing, but
- * seems not working as expected.
+ * Called under RCU lock, no refcnt is returned.
*/
struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
const union nf_inet_addr *daddr,
@@ -625,7 +606,7 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
struct ip_vs_service *svc;
__be16 port = dport;
- svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
+ svc = ip_vs_service_find(net, af, fwmark, protocol, vaddr, vport);
if (!svc)
return NULL;
if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
@@ -633,12 +614,31 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
dest = ip_vs_lookup_dest(svc, daddr, port);
if (!dest)
dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
- if (dest)
- atomic_inc(&dest->refcnt);
- ip_vs_service_put(svc);
return dest;
}
+void ip_vs_dest_dst_rcu_free(struct rcu_head *head)
+{
+ struct ip_vs_dest_dst *dest_dst = container_of(head,
+ struct ip_vs_dest_dst,
+ rcu_head);
+
+ dst_release(dest_dst->dst_cache);
+ kfree(dest_dst);
+}
+
+/* Release dest_dst and dst_cache for dest in user context */
+static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest)
+{
+ struct ip_vs_dest_dst *old;
+
+ old = rcu_dereference_protected(dest->dest_dst, 1);
+ if (old) {
+ RCU_INIT_POINTER(dest->dest_dst, NULL);
+ call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free);
+ }
+}
+
/*
* Lookup dest by {svc,addr,port} in the destination trash.
* The destination trash is used to hold the destinations that are removed
@@ -653,19 +653,25 @@ static struct ip_vs_dest *
ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
__be16 dport)
{
- struct ip_vs_dest *dest, *nxt;
+ struct ip_vs_dest *dest;
struct netns_ipvs *ipvs = net_ipvs(svc->net);
/*
* Find the destination in trash
*/
- list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
+ spin_lock_bh(&ipvs->dest_trash_lock);
+ list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
"dest->refcnt=%d\n",
dest->vfwmark,
IP_VS_DBG_ADDR(svc->af, &dest->addr),
ntohs(dest->port),
atomic_read(&dest->refcnt));
+ /* We can not reuse dest while in grace period
+ * because conns still can use dest->svc
+ */
+ if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
+ continue;
if (dest->af == svc->af &&
ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
dest->port == dport &&
@@ -675,29 +681,27 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
(ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
dest->vport == svc->port))) {
/* HIT */
- return dest;
- }
-
- /*
- * Try to purge the destination from trash if not referenced
- */
- if (atomic_read(&dest->refcnt) == 1) {
- IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
- "from trash\n",
- dest->vfwmark,
- IP_VS_DBG_ADDR(svc->af, &dest->addr),
- ntohs(dest->port));
- list_del(&dest->n_list);
- ip_vs_dst_reset(dest);
- __ip_vs_unbind_svc(dest);
- free_percpu(dest->stats.cpustats);
- kfree(dest);
+ list_del(&dest->t_list);
+ ip_vs_dest_hold(dest);
+ goto out;
}
}
- return NULL;
+ dest = NULL;
+
+out:
+ spin_unlock_bh(&ipvs->dest_trash_lock);
+
+ return dest;
}
+static void ip_vs_dest_free(struct ip_vs_dest *dest)
+{
+ __ip_vs_dst_cache_reset(dest);
+ __ip_vs_unbind_svc(dest);
+ free_percpu(dest->stats.cpustats);
+ kfree(dest);
+}
/*
* Clean up all the destinations in the trash
@@ -706,19 +710,18 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
* When the ip_vs_control_clearup is activated by ipvs module exit,
* the service tables must have been flushed and all the connections
* are expired, and the refcnt of each destination in the trash must
- * be 1, so we simply release them here.
+ * be 0, so we simply release them here.
*/
static void ip_vs_trash_cleanup(struct net *net)
{
struct ip_vs_dest *dest, *nxt;
struct netns_ipvs *ipvs = net_ipvs(net);
- list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
- list_del(&dest->n_list);
- ip_vs_dst_reset(dest);
- __ip_vs_unbind_svc(dest);
- free_percpu(dest->stats.cpustats);
- kfree(dest);
+ del_timer_sync(&ipvs->dest_trash_timer);
+ /* No need to use dest_trash_lock */
+ list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) {
+ list_del(&dest->t_list);
+ ip_vs_dest_free(dest);
}
}
@@ -768,6 +771,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
struct ip_vs_dest_user_kern *udest, int add)
{
struct netns_ipvs *ipvs = net_ipvs(svc->net);
+ struct ip_vs_scheduler *sched;
int conn_flags;
/* set the weight and the flags */
@@ -783,9 +787,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
* Put the real service in rs_table if not present.
* For now only for NAT!
*/
- write_lock_bh(&ipvs->rs_lock);
ip_vs_rs_hash(ipvs, dest);
- write_unlock_bh(&ipvs->rs_lock);
}
atomic_set(&dest->conn_flags, conn_flags);
@@ -809,27 +811,20 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
dest->l_threshold = udest->l_threshold;
spin_lock_bh(&dest->dst_lock);
- ip_vs_dst_reset(dest);
+ __ip_vs_dst_cache_reset(dest);
spin_unlock_bh(&dest->dst_lock);
- if (add)
- ip_vs_start_estimator(svc->net, &dest->stats);
-
- write_lock_bh(&__ip_vs_svc_lock);
-
- /* Wait until all other svc users go away */
- IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
-
+ sched = rcu_dereference_protected(svc->scheduler, 1);
if (add) {
- list_add(&dest->n_list, &svc->destinations);
+ ip_vs_start_estimator(svc->net, &dest->stats);
+ list_add_rcu(&dest->n_list, &svc->destinations);
svc->num_dests++;
+ if (sched->add_dest)
+ sched->add_dest(svc, dest);
+ } else {
+ if (sched->upd_dest)
+ sched->upd_dest(svc, dest);
}
-
- /* call the update_service, because server weight may be changed */
- if (svc->scheduler->update_service)
- svc->scheduler->update_service(svc);
-
- write_unlock_bh(&__ip_vs_svc_lock);
}
@@ -881,7 +876,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
atomic_set(&dest->persistconns, 0);
atomic_set(&dest->refcnt, 1);
- INIT_LIST_HEAD(&dest->d_list);
+ INIT_HLIST_NODE(&dest->d_list);
spin_lock_init(&dest->dst_lock);
spin_lock_init(&dest->stats.lock);
__ip_vs_update_dest(svc, dest, udest, 1);
@@ -923,10 +918,10 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
- /*
- * Check if the dest already exists in the list
- */
+ /* We use function that requires RCU lock */
+ rcu_read_lock();
dest = ip_vs_lookup_dest(svc, &daddr, dport);
+ rcu_read_unlock();
if (dest != NULL) {
IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
@@ -948,11 +943,6 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
ntohs(dest->vport));
- /*
- * Get the destination from the trash
- */
- list_del(&dest->n_list);
-
__ip_vs_update_dest(svc, dest, udest, 1);
ret = 0;
} else {
@@ -992,10 +982,10 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
- /*
- * Lookup the destination list
- */
+ /* We use function that requires RCU lock */
+ rcu_read_lock();
dest = ip_vs_lookup_dest(svc, &daddr, dport);
+ rcu_read_unlock();
if (dest == NULL) {
IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
@@ -1008,11 +998,21 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
return 0;
}
+static void ip_vs_dest_wait_readers(struct rcu_head *head)
+{
+ struct ip_vs_dest *dest = container_of(head, struct ip_vs_dest,
+ rcu_head);
+
+ /* End of grace period after unlinking */
+ clear_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
+}
+
/*
* Delete a destination (must be already unlinked from the service)
*/
-static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
+static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest,
+ bool cleanup)
{
struct netns_ipvs *ipvs = net_ipvs(net);
@@ -1021,38 +1021,24 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
/*
* Remove it from the d-linked list with the real services.
*/
- write_lock_bh(&ipvs->rs_lock);
ip_vs_rs_unhash(dest);
- write_unlock_bh(&ipvs->rs_lock);
- /*
- * Decrease the refcnt of the dest, and free the dest
- * if nobody refers to it (refcnt=0). Otherwise, throw
- * the destination into the trash.
- */
- if (atomic_dec_and_test(&dest->refcnt)) {
- IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
- dest->vfwmark,
- IP_VS_DBG_ADDR(dest->af, &dest->addr),
- ntohs(dest->port));
- ip_vs_dst_reset(dest);
- /* simply decrease svc->refcnt here, let the caller check
- and release the service if nobody refers to it.
- Only user context can release destination and service,
- and only one user context can update virtual service at a
- time, so the operation here is OK */
- atomic_dec(&dest->svc->refcnt);
- free_percpu(dest->stats.cpustats);
- kfree(dest);
- } else {
- IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
- "dest->refcnt=%d\n",
- IP_VS_DBG_ADDR(dest->af, &dest->addr),
- ntohs(dest->port),
- atomic_read(&dest->refcnt));
- list_add(&dest->n_list, &ipvs->dest_trash);
- atomic_inc(&dest->refcnt);
+ if (!cleanup) {
+ set_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
+ call_rcu(&dest->rcu_head, ip_vs_dest_wait_readers);
}
+
+ spin_lock_bh(&ipvs->dest_trash_lock);
+ IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
+ IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
+ atomic_read(&dest->refcnt));
+ if (list_empty(&ipvs->dest_trash) && !cleanup)
+ mod_timer(&ipvs->dest_trash_timer,
+ jiffies + IP_VS_DEST_TRASH_PERIOD);
+ /* dest lives in trash without reference */
+ list_add(&dest->t_list, &ipvs->dest_trash);
+ spin_unlock_bh(&ipvs->dest_trash_lock);
+ ip_vs_dest_put(dest);
}
@@ -1068,14 +1054,16 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
/*
* Remove it from the d-linked destination list.
*/
- list_del(&dest->n_list);
+ list_del_rcu(&dest->n_list);
svc->num_dests--;
- /*
- * Call the update_service function of its scheduler
- */
- if (svcupd && svc->scheduler->update_service)
- svc->scheduler->update_service(svc);
+ if (svcupd) {
+ struct ip_vs_scheduler *sched;
+
+ sched = rcu_dereference_protected(svc->scheduler, 1);
+ if (sched->del_dest)
+ sched->del_dest(svc, dest);
+ }
}
@@ -1090,37 +1078,56 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
EnterFunction(2);
+ /* We use function that requires RCU lock */
+ rcu_read_lock();
dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
+ rcu_read_unlock();
if (dest == NULL) {
IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
return -ENOENT;
}
- write_lock_bh(&__ip_vs_svc_lock);
-
- /*
- * Wait until all other svc users go away.
- */
- IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
-
/*
* Unlink dest from the service
*/
__ip_vs_unlink_dest(svc, dest, 1);
- write_unlock_bh(&__ip_vs_svc_lock);
-
/*
* Delete the destination
*/
- __ip_vs_del_dest(svc->net, dest);
+ __ip_vs_del_dest(svc->net, dest, false);
LeaveFunction(2);
return 0;
}
+static void ip_vs_dest_trash_expire(unsigned long data)
+{
+ struct net *net = (struct net *) data;
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ip_vs_dest *dest, *next;
+
+ spin_lock(&ipvs->dest_trash_lock);
+ list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
+ /* Skip if dest is in grace period */
+ if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
+ continue;
+ if (atomic_read(&dest->refcnt) > 0)
+ continue;
+ IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n",
+ dest->vfwmark,
+ IP_VS_DBG_ADDR(dest->svc->af, &dest->addr),
+ ntohs(dest->port));
+ list_del(&dest->t_list);
+ ip_vs_dest_free(dest);
+ }
+ if (!list_empty(&ipvs->dest_trash))
+ mod_timer(&ipvs->dest_trash_timer,
+ jiffies + IP_VS_DEST_TRASH_PERIOD);
+ spin_unlock(&ipvs->dest_trash_lock);
+}
/*
* Add a service into the service hash table
@@ -1157,9 +1164,13 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
}
#ifdef CONFIG_IP_VS_IPV6
- if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
- ret = -EINVAL;
- goto out_err;
+ if (u->af == AF_INET6) {
+ __u32 plen = (__force __u32) u->netmask;
+
+ if (plen < 1 || plen > 128) {
+ ret = -EINVAL;
+ goto out_err;
+ }
}
#endif
@@ -1176,7 +1187,6 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
}
/* I'm the first user of the service */
- atomic_set(&svc->usecnt, 0);
atomic_set(&svc->refcnt, 0);
svc->af = u->af;
@@ -1190,7 +1200,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
svc->net = net;
INIT_LIST_HEAD(&svc->destinations);
- rwlock_init(&svc->sched_lock);
+ spin_lock_init(&svc->sched_lock);
spin_lock_init(&svc->stats.lock);
/* Bind the scheduler */
@@ -1200,7 +1210,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
sched = NULL;
/* Bind the ct retriever */
- ip_vs_bind_pe(svc, pe);
+ RCU_INIT_POINTER(svc->pe, pe);
pe = NULL;
/* Update the virtual service counters */
@@ -1216,9 +1226,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
ipvs->num_services++;
/* Hash the service into the service table */
- write_lock_bh(&__ip_vs_svc_lock);
ip_vs_svc_hash(svc);
- write_unlock_bh(&__ip_vs_svc_lock);
*svc_p = svc;
/* Now there is a service - full throttle */
@@ -1228,15 +1236,8 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
out_err:
if (svc != NULL) {
- ip_vs_unbind_scheduler(svc);
- if (svc->inc) {
- local_bh_disable();
- ip_vs_app_inc_put(svc->inc);
- local_bh_enable();
- }
- if (svc->stats.cpustats)
- free_percpu(svc->stats.cpustats);
- kfree(svc);
+ ip_vs_unbind_scheduler(svc, sched);
+ ip_vs_service_free(svc);
}
ip_vs_scheduler_put(sched);
ip_vs_pe_put(pe);
@@ -1280,18 +1281,27 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
}
#ifdef CONFIG_IP_VS_IPV6
- if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
- ret = -EINVAL;
- goto out;
+ if (u->af == AF_INET6) {
+ __u32 plen = (__force __u32) u->netmask;
+
+ if (plen < 1 || plen > 128) {
+ ret = -EINVAL;
+ goto out;
+ }
}
#endif
- write_lock_bh(&__ip_vs_svc_lock);
-
- /*
- * Wait until all other svc users go away.
- */
- IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
+ old_sched = rcu_dereference_protected(svc->scheduler, 1);
+ if (sched != old_sched) {
+ /* Bind the new scheduler */
+ ret = ip_vs_bind_scheduler(svc, sched);
+ if (ret) {
+ old_sched = sched;
+ goto out;
+ }
+ /* Unbind the old scheduler on success */
+ ip_vs_unbind_scheduler(svc, old_sched);
+ }
/*
* Set the flags and timeout value
@@ -1300,57 +1310,30 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
svc->timeout = u->timeout * HZ;
svc->netmask = u->netmask;
- old_sched = svc->scheduler;
- if (sched != old_sched) {
- /*
- * Unbind the old scheduler
- */
- if ((ret = ip_vs_unbind_scheduler(svc))) {
- old_sched = sched;
- goto out_unlock;
- }
+ old_pe = rcu_dereference_protected(svc->pe, 1);
+ if (pe != old_pe)
+ rcu_assign_pointer(svc->pe, pe);
- /*
- * Bind the new scheduler
- */
- if ((ret = ip_vs_bind_scheduler(svc, sched))) {
- /*
- * If ip_vs_bind_scheduler fails, restore the old
- * scheduler.
- * The main reason of failure is out of memory.
- *
- * The question is if the old scheduler can be
- * restored all the time. TODO: if it cannot be
- * restored some time, we must delete the service,
- * otherwise the system may crash.
- */
- ip_vs_bind_scheduler(svc, old_sched);
- old_sched = sched;
- goto out_unlock;
- }
- }
-
- old_pe = svc->pe;
- if (pe != old_pe) {
- ip_vs_unbind_pe(svc);
- ip_vs_bind_pe(svc, pe);
- }
-
-out_unlock:
- write_unlock_bh(&__ip_vs_svc_lock);
out:
ip_vs_scheduler_put(old_sched);
ip_vs_pe_put(old_pe);
return ret;
}
+static void ip_vs_service_rcu_free(struct rcu_head *head)
+{
+ struct ip_vs_service *svc;
+
+ svc = container_of(head, struct ip_vs_service, rcu_head);
+ ip_vs_service_free(svc);
+}
/*
* Delete a service from the service list
* - The service must be unlinked, unlocked and not referenced!
* - We are called under _bh lock
*/
-static void __ip_vs_del_service(struct ip_vs_service *svc)
+static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
{
struct ip_vs_dest *dest, *nxt;
struct ip_vs_scheduler *old_sched;
@@ -1366,27 +1349,20 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
ip_vs_stop_estimator(svc->net, &svc->stats);
/* Unbind scheduler */
- old_sched = svc->scheduler;
- ip_vs_unbind_scheduler(svc);
+ old_sched = rcu_dereference_protected(svc->scheduler, 1);
+ ip_vs_unbind_scheduler(svc, old_sched);
ip_vs_scheduler_put(old_sched);
- /* Unbind persistence engine */
- old_pe = svc->pe;
- ip_vs_unbind_pe(svc);
+ /* Unbind persistence engine, keep svc->pe */
+ old_pe = rcu_dereference_protected(svc->pe, 1);
ip_vs_pe_put(old_pe);
- /* Unbind app inc */
- if (svc->inc) {
- ip_vs_app_inc_put(svc->inc);
- svc->inc = NULL;
- }
-
/*
* Unlink the whole destination list
*/
list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
__ip_vs_unlink_dest(svc, dest, 0);
- __ip_vs_del_dest(svc->net, dest);
+ __ip_vs_del_dest(svc->net, dest, cleanup);
}
/*
@@ -1400,13 +1376,12 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
/*
* Free the service if nobody refers to it
*/
- if (atomic_read(&svc->refcnt) == 0) {
- IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
+ if (atomic_dec_and_test(&svc->refcnt)) {
+ IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
svc->fwmark,
IP_VS_DBG_ADDR(svc->af, &svc->addr),
- ntohs(svc->port), atomic_read(&svc->usecnt));
- free_percpu(svc->stats.cpustats);
- kfree(svc);
+ ntohs(svc->port));
+ call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
}
/* decrease the module use count */
@@ -1416,23 +1391,16 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
/*
* Unlink a service from list and try to delete it if its refcnt reached 0
*/
-static void ip_vs_unlink_service(struct ip_vs_service *svc)
+static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup)
{
+ /* Hold svc to avoid double release from dest_trash */
+ atomic_inc(&svc->refcnt);
/*
* Unhash it from the service table
*/
- write_lock_bh(&__ip_vs_svc_lock);
-
ip_vs_svc_unhash(svc);
- /*
- * Wait until all the svc users go away.
- */
- IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
-
- __ip_vs_del_service(svc);
-
- write_unlock_bh(&__ip_vs_svc_lock);
+ __ip_vs_del_service(svc, cleanup);
}
/*
@@ -1442,7 +1410,7 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
{
if (svc == NULL)
return -EEXIST;
- ip_vs_unlink_service(svc);
+ ip_vs_unlink_service(svc, false);
return 0;
}
@@ -1451,19 +1419,20 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
/*
* Flush all the virtual services
*/
-static int ip_vs_flush(struct net *net)
+static int ip_vs_flush(struct net *net, bool cleanup)
{
int idx;
- struct ip_vs_service *svc, *nxt;
+ struct ip_vs_service *svc;
+ struct hlist_node *n;
/*
* Flush the service table hashed by <netns,protocol,addr,port>
*/
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
- s_list) {
+ hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx],
+ s_list) {
if (net_eq(svc->net, net))
- ip_vs_unlink_service(svc);
+ ip_vs_unlink_service(svc, cleanup);
}
}
@@ -1471,10 +1440,10 @@ static int ip_vs_flush(struct net *net)
* Flush the service table hashed by fwmark
*/
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- list_for_each_entry_safe(svc, nxt,
- &ip_vs_svc_fwm_table[idx], f_list) {
+ hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx],
+ f_list) {
if (net_eq(svc->net, net))
- ip_vs_unlink_service(svc);
+ ip_vs_unlink_service(svc, cleanup);
}
}
@@ -1490,32 +1459,32 @@ void ip_vs_service_net_cleanup(struct net *net)
EnterFunction(2);
/* Check for "full" addressed entries */
mutex_lock(&__ip_vs_mutex);
- ip_vs_flush(net);
+ ip_vs_flush(net, true);
mutex_unlock(&__ip_vs_mutex);
LeaveFunction(2);
}
-/*
- * Release dst hold by dst_cache
- */
+
+/* Put all references for device (dst_cache) */
static inline void
-__ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev)
+ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
{
+ struct ip_vs_dest_dst *dest_dst;
+
spin_lock_bh(&dest->dst_lock);
- if (dest->dst_cache && dest->dst_cache->dev == dev) {
+ dest_dst = rcu_dereference_protected(dest->dest_dst, 1);
+ if (dest_dst && dest_dst->dst_cache->dev == dev) {
IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
dev->name,
IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port),
atomic_read(&dest->refcnt));
- ip_vs_dst_reset(dest);
+ __ip_vs_dst_cache_reset(dest);
}
spin_unlock_bh(&dest->dst_lock);
}
-/*
- * Netdev event receiver
- * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to
- * a device that is "unregister" it must be released.
+/* Netdev event receiver
+ * Currently only NETDEV_DOWN is handled to release refs to cached dsts
*/
static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
void *ptr)
@@ -1527,35 +1496,37 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
struct ip_vs_dest *dest;
unsigned int idx;
- if (event != NETDEV_UNREGISTER || !ipvs)
+ if (event != NETDEV_DOWN || !ipvs)
return NOTIFY_DONE;
IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
EnterFunction(2);
mutex_lock(&__ip_vs_mutex);
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+ hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
if (net_eq(svc->net, net)) {
list_for_each_entry(dest, &svc->destinations,
n_list) {
- __ip_vs_dev_reset(dest, dev);
+ ip_vs_forget_dev(dest, dev);
}
}
}
- list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+ hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
if (net_eq(svc->net, net)) {
list_for_each_entry(dest, &svc->destinations,
n_list) {
- __ip_vs_dev_reset(dest, dev);
+ ip_vs_forget_dev(dest, dev);
}
}
}
}
- list_for_each_entry(dest, &ipvs->dest_trash, n_list) {
- __ip_vs_dev_reset(dest, dev);
+ spin_lock_bh(&ipvs->dest_trash_lock);
+ list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
+ ip_vs_forget_dev(dest, dev);
}
+ spin_unlock_bh(&ipvs->dest_trash_lock);
mutex_unlock(&__ip_vs_mutex);
LeaveFunction(2);
return NOTIFY_DONE;
@@ -1568,12 +1539,10 @@ static int ip_vs_zero_service(struct ip_vs_service *svc)
{
struct ip_vs_dest *dest;
- write_lock_bh(&__ip_vs_svc_lock);
list_for_each_entry(dest, &svc->destinations, n_list) {
ip_vs_zero_stats(&dest->stats);
}
ip_vs_zero_stats(&svc->stats);
- write_unlock_bh(&__ip_vs_svc_lock);
return 0;
}
@@ -1583,14 +1552,14 @@ static int ip_vs_zero_all(struct net *net)
struct ip_vs_service *svc;
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+ hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
if (net_eq(svc->net, net))
ip_vs_zero_service(svc);
}
}
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+ hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
if (net_eq(svc->net, net))
ip_vs_zero_service(svc);
}
@@ -1918,7 +1887,7 @@ static struct ctl_table vs_vars[] = {
struct ip_vs_iter {
struct seq_net_private p; /* Do not move this, netns depends upon it*/
- struct list_head *table;
+ struct hlist_head *table;
int bucket;
};
@@ -1951,7 +1920,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
/* look in hash by protocol */
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+ hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) {
if (net_eq(svc->net, net) && pos-- == 0) {
iter->table = ip_vs_svc_table;
iter->bucket = idx;
@@ -1962,7 +1931,8 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
/* keep looking in fwmark */
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+ hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx],
+ f_list) {
if (net_eq(svc->net, net) && pos-- == 0) {
iter->table = ip_vs_svc_fwm_table;
iter->bucket = idx;
@@ -1975,17 +1945,16 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
}
static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
-__acquires(__ip_vs_svc_lock)
+ __acquires(RCU)
{
-
- read_lock_bh(&__ip_vs_svc_lock);
+ rcu_read_lock();
return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
}
static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct list_head *e;
+ struct hlist_node *e;
struct ip_vs_iter *iter;
struct ip_vs_service *svc;
@@ -1998,13 +1967,14 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
if (iter->table == ip_vs_svc_table) {
/* next service in table hashed by protocol */
- if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
- return list_entry(e, struct ip_vs_service, s_list);
-
+ e = rcu_dereference(hlist_next_rcu(&svc->s_list));
+ if (e)
+ return hlist_entry(e, struct ip_vs_service, s_list);
while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
- list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
- s_list) {
+ hlist_for_each_entry_rcu(svc,
+ &ip_vs_svc_table[iter->bucket],
+ s_list) {
return svc;
}
}
@@ -2015,13 +1985,15 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
/* next service in hashed by fwmark */
- if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
- return list_entry(e, struct ip_vs_service, f_list);
+ e = rcu_dereference(hlist_next_rcu(&svc->f_list));
+ if (e)
+ return hlist_entry(e, struct ip_vs_service, f_list);
scan_fwmark:
while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
- list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
- f_list)
+ hlist_for_each_entry_rcu(svc,
+ &ip_vs_svc_fwm_table[iter->bucket],
+ f_list)
return svc;
}
@@ -2029,9 +2001,9 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
-__releases(__ip_vs_svc_lock)
+ __releases(RCU)
{
- read_unlock_bh(&__ip_vs_svc_lock);
+ rcu_read_unlock();
}
@@ -2049,6 +2021,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
const struct ip_vs_service *svc = v;
const struct ip_vs_iter *iter = seq->private;
const struct ip_vs_dest *dest;
+ struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
if (iter->table == ip_vs_svc_table) {
#ifdef CONFIG_IP_VS_IPV6
@@ -2057,18 +2030,18 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
ip_vs_proto_name(svc->protocol),
&svc->addr.in6,
ntohs(svc->port),
- svc->scheduler->name);
+ sched->name);
else
#endif
seq_printf(seq, "%s %08X:%04X %s %s ",
ip_vs_proto_name(svc->protocol),
ntohl(svc->addr.ip),
ntohs(svc->port),
- svc->scheduler->name,
+ sched->name,
(svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
} else {
seq_printf(seq, "FWM %08X %s %s",
- svc->fwmark, svc->scheduler->name,
+ svc->fwmark, sched->name,
(svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
}
@@ -2079,7 +2052,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
else
seq_putc(seq, '\n');
- list_for_each_entry(dest, &svc->destinations, n_list) {
+ list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
#ifdef CONFIG_IP_VS_IPV6
if (dest->af == AF_INET6)
seq_printf(seq,
@@ -2173,7 +2146,7 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
{
struct net *net = seq_file_single_net(seq);
struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
- struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
+ struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats;
struct ip_vs_stats_user rates;
int i;
@@ -2389,7 +2362,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
if (cmd == IP_VS_SO_SET_FLUSH) {
/* Flush the virtual service */
- ret = ip_vs_flush(net);
+ ret = ip_vs_flush(net, false);
goto out_unlock;
} else if (cmd == IP_VS_SO_SET_TIMEOUT) {
/* Set timeout values for (tcp tcpfin udp) */
@@ -2424,11 +2397,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
}
/* Lookup the exact service by <protocol, addr, port> or fwmark */
+ rcu_read_lock();
if (usvc.fwmark == 0)
svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
&usvc.addr, usvc.port);
else
svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
+ rcu_read_unlock();
if (cmd != IP_VS_SO_SET_ADD
&& (svc == NULL || svc->protocol != usvc.protocol)) {
@@ -2480,11 +2455,14 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
static void
ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
{
+ struct ip_vs_scheduler *sched;
+
+ sched = rcu_dereference_protected(src->scheduler, 1);
dst->protocol = src->protocol;
dst->addr = src->addr.ip;
dst->port = src->port;
dst->fwmark = src->fwmark;
- strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
+ strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name));
dst->flags = src->flags;
dst->timeout = src->timeout / HZ;
dst->netmask = src->netmask;
@@ -2503,7 +2481,7 @@ __ip_vs_get_service_entries(struct net *net,
int ret = 0;
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+ hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
/* Only expose IPv4 entries to old interface */
if (svc->af != AF_INET || !net_eq(svc->net, net))
continue;
@@ -2522,7 +2500,7 @@ __ip_vs_get_service_entries(struct net *net,
}
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+ hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
/* Only expose IPv4 entries to old interface */
if (svc->af != AF_INET || !net_eq(svc->net, net))
continue;
@@ -2551,17 +2529,20 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
union nf_inet_addr addr = { .ip = get->addr };
int ret = 0;
+ rcu_read_lock();
if (get->fwmark)
svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
else
svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
get->port);
+ rcu_read_unlock();
if (svc) {
int count = 0;
struct ip_vs_dest *dest;
struct ip_vs_dest_entry entry;
+ memset(&entry, 0, sizeof(entry));
list_for_each_entry(dest, &svc->destinations, n_list) {
if (count >= get->num_dests)
break;
@@ -2738,12 +2719,14 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
entry = (struct ip_vs_service_entry *)arg;
addr.ip = entry->addr;
+ rcu_read_lock();
if (entry->fwmark)
svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
else
svc = __ip_vs_service_find(net, AF_INET,
entry->protocol, &addr,
entry->port);
+ rcu_read_unlock();
if (svc) {
ip_vs_copy_service(entry, svc);
if (copy_to_user(user, entry, sizeof(*entry)) != 0)
@@ -2900,6 +2883,8 @@ nla_put_failure:
static int ip_vs_genl_fill_service(struct sk_buff *skb,
struct ip_vs_service *svc)
{
+ struct ip_vs_scheduler *sched;
+ struct ip_vs_pe *pe;
struct nlattr *nl_service;
struct ip_vs_flags flags = { .flags = svc->flags,
.mask = ~0 };
@@ -2916,16 +2901,17 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
} else {
if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) ||
nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) ||
- nla_put_u16(skb, IPVS_SVC_ATTR_PORT, svc->port))
+ nla_put_be16(skb, IPVS_SVC_ATTR_PORT, svc->port))
goto nla_put_failure;
}
- if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name) ||
- (svc->pe &&
- nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) ||
+ sched = rcu_dereference_protected(svc->scheduler, 1);
+ pe = rcu_dereference_protected(svc->pe, 1);
+ if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) ||
+ (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) ||
nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
- nla_put_u32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
+ nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
goto nla_put_failure;
if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
goto nla_put_failure;
@@ -2971,7 +2957,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
mutex_lock(&__ip_vs_mutex);
for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
- list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
+ hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
if (++idx <= start || !net_eq(svc->net, net))
continue;
if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
@@ -2982,7 +2968,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
}
for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
- list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
+ hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
if (++idx <= start || !net_eq(svc->net, net))
continue;
if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
@@ -3038,15 +3024,17 @@ static int ip_vs_genl_parse_service(struct net *net,
} else {
usvc->protocol = nla_get_u16(nla_protocol);
nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
- usvc->port = nla_get_u16(nla_port);
+ usvc->port = nla_get_be16(nla_port);
usvc->fwmark = 0;
}
+ rcu_read_lock();
if (usvc->fwmark)
svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
else
svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
&usvc->addr, usvc->port);
+ rcu_read_unlock();
*ret_svc = svc;
/* If a full entry was requested, check for the additional fields */
@@ -3076,7 +3064,7 @@ static int ip_vs_genl_parse_service(struct net *net,
usvc->sched_name = nla_data(nla_sched);
usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
usvc->timeout = nla_get_u32(nla_timeout);
- usvc->netmask = nla_get_u32(nla_netmask);
+ usvc->netmask = nla_get_be32(nla_netmask);
}
return 0;
@@ -3102,7 +3090,7 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
return -EMSGSIZE;
if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) ||
- nla_put_u16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
+ nla_put_be16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD,
(atomic_read(&dest->conn_flags) &
IP_VS_CONN_F_FWD_MASK)) ||
@@ -3211,7 +3199,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
memset(udest, 0, sizeof(*udest));
nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
- udest->port = nla_get_u16(nla_port);
+ udest->port = nla_get_be16(nla_port);
/* If a full entry was requested, check for the additional fields */
if (full_entry) {
@@ -3236,8 +3224,8 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
return 0;
}
-static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
- const char *mcast_ifn, __be32 syncid)
+static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state,
+ const char *mcast_ifn, __u32 syncid)
{
struct nlattr *nl_daemon;
@@ -3258,8 +3246,8 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
- const char *mcast_ifn, __be32 syncid,
+static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state,
+ const char *mcast_ifn, __u32 syncid,
struct netlink_callback *cb)
{
void *hdr;
@@ -3398,7 +3386,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
mutex_lock(&__ip_vs_mutex);
if (cmd == IPVS_CMD_FLUSH) {
- ret = ip_vs_flush(net);
+ ret = ip_vs_flush(net, false);
goto out;
} else if (cmd == IPVS_CMD_SET_CONFIG) {
ret = ip_vs_genl_set_config(net, info->attrs);
@@ -3790,13 +3778,14 @@ int __net_init ip_vs_control_net_init(struct net *net)
int idx;
struct netns_ipvs *ipvs = net_ipvs(net);
- rwlock_init(&ipvs->rs_lock);
-
/* Initialize rs_table */
for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
- INIT_LIST_HEAD(&ipvs->rs_table[idx]);
+ INIT_HLIST_HEAD(&ipvs->rs_table[idx]);
INIT_LIST_HEAD(&ipvs->dest_trash);
+ spin_lock_init(&ipvs->dest_trash_lock);
+ setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire,
+ (unsigned long) net);
atomic_set(&ipvs->ftpsvc_counter, 0);
atomic_set(&ipvs->nullsvc_counter, 0);
@@ -3826,6 +3815,10 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
+ /* Some dest can be in grace period even before cleanup, we have to
+ * defer ip_vs_trash_cleanup until ip_vs_dest_wait_readers is called.
+ */
+ rcu_barrier();
ip_vs_trash_cleanup(net);
ip_vs_stop_estimator(net, &ipvs->tot_stats);
ip_vs_control_net_cleanup_sysctl(net);
@@ -3871,10 +3864,10 @@ int __init ip_vs_control_init(void)
EnterFunction(2);
- /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
+ /* Initialize svc_table, ip_vs_svc_fwm_table */
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
- INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
+ INIT_HLIST_HEAD(&ip_vs_svc_table[idx]);
+ INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]);
}
smp_wmb(); /* Do we really need it now ? */
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index 7f3b0cc00b7a..ccab120df45e 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -51,7 +51,7 @@
* IPVS DH bucket
*/
struct ip_vs_dh_bucket {
- struct ip_vs_dest *dest; /* real server (cache) */
+ struct ip_vs_dest __rcu *dest; /* real server (cache) */
};
/*
@@ -64,6 +64,10 @@ struct ip_vs_dh_bucket {
#define IP_VS_DH_TAB_SIZE (1 << IP_VS_DH_TAB_BITS)
#define IP_VS_DH_TAB_MASK (IP_VS_DH_TAB_SIZE - 1)
+struct ip_vs_dh_state {
+ struct ip_vs_dh_bucket buckets[IP_VS_DH_TAB_SIZE];
+ struct rcu_head rcu_head;
+};
/*
* Returns hash value for IPVS DH entry
@@ -85,10 +89,9 @@ static inline unsigned int ip_vs_dh_hashkey(int af, const union nf_inet_addr *ad
* Get ip_vs_dest associated with supplied parameters.
*/
static inline struct ip_vs_dest *
-ip_vs_dh_get(int af, struct ip_vs_dh_bucket *tbl,
- const union nf_inet_addr *addr)
+ip_vs_dh_get(int af, struct ip_vs_dh_state *s, const union nf_inet_addr *addr)
{
- return (tbl[ip_vs_dh_hashkey(af, addr)]).dest;
+ return rcu_dereference(s->buckets[ip_vs_dh_hashkey(af, addr)].dest);
}
@@ -96,25 +99,30 @@ ip_vs_dh_get(int af, struct ip_vs_dh_bucket *tbl,
* Assign all the hash buckets of the specified table with the service.
*/
static int
-ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc)
+ip_vs_dh_reassign(struct ip_vs_dh_state *s, struct ip_vs_service *svc)
{
int i;
struct ip_vs_dh_bucket *b;
struct list_head *p;
struct ip_vs_dest *dest;
+ bool empty;
- b = tbl;
+ b = &s->buckets[0];
p = &svc->destinations;
+ empty = list_empty(p);
for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
- if (list_empty(p)) {
- b->dest = NULL;
- } else {
+ dest = rcu_dereference_protected(b->dest, 1);
+ if (dest)
+ ip_vs_dest_put(dest);
+ if (empty)
+ RCU_INIT_POINTER(b->dest, NULL);
+ else {
if (p == &svc->destinations)
p = p->next;
dest = list_entry(p, struct ip_vs_dest, n_list);
- atomic_inc(&dest->refcnt);
- b->dest = dest;
+ ip_vs_dest_hold(dest);
+ RCU_INIT_POINTER(b->dest, dest);
p = p->next;
}
@@ -127,16 +135,18 @@ ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc)
/*
* Flush all the hash buckets of the specified table.
*/
-static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl)
+static void ip_vs_dh_flush(struct ip_vs_dh_state *s)
{
int i;
struct ip_vs_dh_bucket *b;
+ struct ip_vs_dest *dest;
- b = tbl;
+ b = &s->buckets[0];
for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
- if (b->dest) {
- atomic_dec(&b->dest->refcnt);
- b->dest = NULL;
+ dest = rcu_dereference_protected(b->dest, 1);
+ if (dest) {
+ ip_vs_dest_put(dest);
+ RCU_INIT_POINTER(b->dest, NULL);
}
b++;
}
@@ -145,51 +155,46 @@ static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl)
static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
{
- struct ip_vs_dh_bucket *tbl;
+ struct ip_vs_dh_state *s;
/* allocate the DH table for this service */
- tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE,
- GFP_KERNEL);
- if (tbl == NULL)
+ s = kzalloc(sizeof(struct ip_vs_dh_state), GFP_KERNEL);
+ if (s == NULL)
return -ENOMEM;
- svc->sched_data = tbl;
+ svc->sched_data = s;
IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for "
"current service\n",
sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
- /* assign the hash buckets with the updated service */
- ip_vs_dh_assign(tbl, svc);
+ /* assign the hash buckets with current dests */
+ ip_vs_dh_reassign(s, svc);
return 0;
}
-static int ip_vs_dh_done_svc(struct ip_vs_service *svc)
+static void ip_vs_dh_done_svc(struct ip_vs_service *svc)
{
- struct ip_vs_dh_bucket *tbl = svc->sched_data;
+ struct ip_vs_dh_state *s = svc->sched_data;
/* got to clean up hash buckets here */
- ip_vs_dh_flush(tbl);
+ ip_vs_dh_flush(s);
/* release the table itself */
- kfree(svc->sched_data);
+ kfree_rcu(s, rcu_head);
IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n",
sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
-
- return 0;
}
-static int ip_vs_dh_update_svc(struct ip_vs_service *svc)
+static int ip_vs_dh_dest_changed(struct ip_vs_service *svc,
+ struct ip_vs_dest *dest)
{
- struct ip_vs_dh_bucket *tbl = svc->sched_data;
-
- /* got to clean up hash buckets here */
- ip_vs_dh_flush(tbl);
+ struct ip_vs_dh_state *s = svc->sched_data;
/* assign the hash buckets with the updated service */
- ip_vs_dh_assign(tbl, svc);
+ ip_vs_dh_reassign(s, svc);
return 0;
}
@@ -212,19 +217,20 @@ static struct ip_vs_dest *
ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
struct ip_vs_dest *dest;
- struct ip_vs_dh_bucket *tbl;
+ struct ip_vs_dh_state *s;
struct ip_vs_iphdr iph;
ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
- tbl = (struct ip_vs_dh_bucket *)svc->sched_data;
- dest = ip_vs_dh_get(svc->af, tbl, &iph.daddr);
+ s = (struct ip_vs_dh_state *) svc->sched_data;
+ dest = ip_vs_dh_get(svc->af, s, &iph.daddr);
if (!dest
|| !(dest->flags & IP_VS_DEST_F_AVAILABLE)
|| atomic_read(&dest->weight) <= 0
|| is_overloaded(dest)) {
+ ip_vs_scheduler_err(svc, "no destination available");
return NULL;
}
@@ -248,7 +254,8 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler =
.n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
.init_service = ip_vs_dh_init_svc,
.done_service = ip_vs_dh_done_svc,
- .update_service = ip_vs_dh_update_svc,
+ .add_dest = ip_vs_dh_dest_changed,
+ .del_dest = ip_vs_dh_dest_changed,
.schedule = ip_vs_dh_schedule,
};
@@ -262,6 +269,7 @@ static int __init ip_vs_dh_init(void)
static void __exit ip_vs_dh_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_dh_scheduler);
+ synchronize_rcu();
}
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 0fac6017b6fb..6bee6d0c73a5 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -56,7 +56,7 @@
* Make a summary from each cpu
*/
static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
- struct ip_vs_cpu_stats *stats)
+ struct ip_vs_cpu_stats __percpu *stats)
{
int i;
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 4f53a5f04437..77c173282f38 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -267,10 +267,12 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
* hopefully it will succeed on the retransmitted
* packet.
*/
+ rcu_read_lock();
ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
iph->ihl * 4,
start-data, end-start,
buf, buf_len);
+ rcu_read_unlock();
if (ret) {
ip_vs_nfct_expect_related(skb, ct, n_cp,
IPPROTO_TCP, 0, 0);
@@ -480,6 +482,7 @@ static int __init ip_vs_ftp_init(void)
int rv;
rv = register_pernet_subsys(&ip_vs_ftp_ops);
+ /* rcu_barrier() is called by netns on error */
return rv;
}
@@ -489,6 +492,7 @@ static int __init ip_vs_ftp_init(void)
static void __exit ip_vs_ftp_exit(void)
{
unregister_pernet_subsys(&ip_vs_ftp_ops);
+ /* rcu_barrier() is called by netns */
}
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index fdd89b9564ea..5ea26bd87743 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -90,11 +90,12 @@
* IP address and its destination server
*/
struct ip_vs_lblc_entry {
- struct list_head list;
+ struct hlist_node list;
int af; /* address family */
union nf_inet_addr addr; /* destination IP address */
- struct ip_vs_dest *dest; /* real server (cache) */
+ struct ip_vs_dest __rcu *dest; /* real server (cache) */
unsigned long lastuse; /* last used time */
+ struct rcu_head rcu_head;
};
@@ -102,12 +103,14 @@ struct ip_vs_lblc_entry {
* IPVS lblc hash table
*/
struct ip_vs_lblc_table {
- struct list_head bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */
+ struct rcu_head rcu_head;
+ struct hlist_head bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */
+ struct timer_list periodic_timer; /* collect stale entries */
atomic_t entries; /* number of entries */
int max_size; /* maximum size of entries */
- struct timer_list periodic_timer; /* collect stale entries */
int rover; /* rover for expire check */
int counter; /* counter for no expire */
+ bool dead;
};
@@ -129,13 +132,16 @@ static ctl_table vs_vars_table[] = {
static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
{
- list_del(&en->list);
+ struct ip_vs_dest *dest;
+
+ hlist_del_rcu(&en->list);
/*
* We don't kfree dest because it is referred either by its service
* or the trash dest list.
*/
- atomic_dec(&en->dest->refcnt);
- kfree(en);
+ dest = rcu_dereference_protected(en->dest, 1);
+ ip_vs_dest_put(dest);
+ kfree_rcu(en, rcu_head);
}
@@ -165,15 +171,12 @@ ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
{
unsigned int hash = ip_vs_lblc_hashkey(en->af, &en->addr);
- list_add(&en->list, &tbl->bucket[hash]);
+ hlist_add_head_rcu(&en->list, &tbl->bucket[hash]);
atomic_inc(&tbl->entries);
}
-/*
- * Get ip_vs_lblc_entry associated with supplied parameters. Called under read
- * lock
- */
+/* Get ip_vs_lblc_entry associated with supplied parameters. */
static inline struct ip_vs_lblc_entry *
ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl,
const union nf_inet_addr *addr)
@@ -181,7 +184,7 @@ ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl,
unsigned int hash = ip_vs_lblc_hashkey(af, addr);
struct ip_vs_lblc_entry *en;
- list_for_each_entry(en, &tbl->bucket[hash], list)
+ hlist_for_each_entry_rcu(en, &tbl->bucket[hash], list)
if (ip_vs_addr_equal(af, &en->addr, addr))
return en;
@@ -191,7 +194,7 @@ ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl,
/*
* Create or update an ip_vs_lblc_entry, which is a mapping of a destination IP
- * address to a server. Called under write lock.
+ * address to a server. Called under spin lock.
*/
static inline struct ip_vs_lblc_entry *
ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
@@ -209,14 +212,20 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
ip_vs_addr_copy(dest->af, &en->addr, daddr);
en->lastuse = jiffies;
- atomic_inc(&dest->refcnt);
- en->dest = dest;
+ ip_vs_dest_hold(dest);
+ RCU_INIT_POINTER(en->dest, dest);
ip_vs_lblc_hash(tbl, en);
- } else if (en->dest != dest) {
- atomic_dec(&en->dest->refcnt);
- atomic_inc(&dest->refcnt);
- en->dest = dest;
+ } else {
+ struct ip_vs_dest *old_dest;
+
+ old_dest = rcu_dereference_protected(en->dest, 1);
+ if (old_dest != dest) {
+ ip_vs_dest_put(old_dest);
+ ip_vs_dest_hold(dest);
+ /* No ordering constraints for refcnt */
+ RCU_INIT_POINTER(en->dest, dest);
+ }
}
return en;
@@ -226,17 +235,22 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
/*
* Flush all the entries of the specified table.
*/
-static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl)
+static void ip_vs_lblc_flush(struct ip_vs_service *svc)
{
- struct ip_vs_lblc_entry *en, *nxt;
+ struct ip_vs_lblc_table *tbl = svc->sched_data;
+ struct ip_vs_lblc_entry *en;
+ struct hlist_node *next;
int i;
+ spin_lock_bh(&svc->sched_lock);
+ tbl->dead = 1;
for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
- list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
+ hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
ip_vs_lblc_free(en);
atomic_dec(&tbl->entries);
}
}
+ spin_unlock_bh(&svc->sched_lock);
}
static int sysctl_lblc_expiration(struct ip_vs_service *svc)
@@ -252,15 +266,16 @@ static int sysctl_lblc_expiration(struct ip_vs_service *svc)
static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
{
struct ip_vs_lblc_table *tbl = svc->sched_data;
- struct ip_vs_lblc_entry *en, *nxt;
+ struct ip_vs_lblc_entry *en;
+ struct hlist_node *next;
unsigned long now = jiffies;
int i, j;
for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLC_TAB_MASK;
- write_lock(&svc->sched_lock);
- list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+ spin_lock(&svc->sched_lock);
+ hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
if (time_before(now,
en->lastuse +
sysctl_lblc_expiration(svc)))
@@ -269,7 +284,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
ip_vs_lblc_free(en);
atomic_dec(&tbl->entries);
}
- write_unlock(&svc->sched_lock);
+ spin_unlock(&svc->sched_lock);
}
tbl->rover = j;
}
@@ -293,7 +308,8 @@ static void ip_vs_lblc_check_expire(unsigned long data)
unsigned long now = jiffies;
int goal;
int i, j;
- struct ip_vs_lblc_entry *en, *nxt;
+ struct ip_vs_lblc_entry *en;
+ struct hlist_node *next;
if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
/* do full expiration check */
@@ -314,8 +330,8 @@ static void ip_vs_lblc_check_expire(unsigned long data)
for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLC_TAB_MASK;
- write_lock(&svc->sched_lock);
- list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+ spin_lock(&svc->sched_lock);
+ hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
continue;
@@ -323,7 +339,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
atomic_dec(&tbl->entries);
goal--;
}
- write_unlock(&svc->sched_lock);
+ spin_unlock(&svc->sched_lock);
if (goal <= 0)
break;
}
@@ -354,11 +370,12 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
* Initialize the hash buckets
*/
for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
- INIT_LIST_HEAD(&tbl->bucket[i]);
+ INIT_HLIST_HEAD(&tbl->bucket[i]);
}
tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;
tbl->rover = 0;
tbl->counter = 1;
+ tbl->dead = 0;
/*
* Hook periodic timer for garbage collection
@@ -371,7 +388,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
}
-static int ip_vs_lblc_done_svc(struct ip_vs_service *svc)
+static void ip_vs_lblc_done_svc(struct ip_vs_service *svc)
{
struct ip_vs_lblc_table *tbl = svc->sched_data;
@@ -379,14 +396,12 @@ static int ip_vs_lblc_done_svc(struct ip_vs_service *svc)
del_timer_sync(&tbl->periodic_timer);
/* got to clean up table entries here */
- ip_vs_lblc_flush(tbl);
+ ip_vs_lblc_flush(svc);
/* release the table itself */
- kfree(tbl);
+ kfree_rcu(tbl, rcu_head);
IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n",
sizeof(*tbl));
-
- return 0;
}
@@ -408,7 +423,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
* The server with weight=0 is quiesced and will not receive any
* new connection.
*/
- list_for_each_entry(dest, &svc->destinations, n_list) {
+ list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
continue;
if (atomic_read(&dest->weight) > 0) {
@@ -423,7 +438,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
* Find the destination with the least load.
*/
nextstage:
- list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+ list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
continue;
@@ -457,7 +472,7 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
struct ip_vs_dest *d;
- list_for_each_entry(d, &svc->destinations, n_list) {
+ list_for_each_entry_rcu(d, &svc->destinations, n_list) {
if (atomic_read(&d->activeconns)*2
< atomic_read(&d->weight)) {
return 1;
@@ -484,7 +499,6 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
/* First look in our cache */
- read_lock(&svc->sched_lock);
en = ip_vs_lblc_get(svc->af, tbl, &iph.daddr);
if (en) {
/* We only hold a read lock, but this is atomic */
@@ -499,14 +513,11 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
* free up entries from the trash at any time.
*/
- if (en->dest->flags & IP_VS_DEST_F_AVAILABLE)
- dest = en->dest;
+ dest = rcu_dereference(en->dest);
+ if ((dest->flags & IP_VS_DEST_F_AVAILABLE) &&
+ atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
+ goto out;
}
- read_unlock(&svc->sched_lock);
-
- /* If the destination has a weight and is not overloaded, use it */
- if (dest && atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
- goto out;
/* No cache entry or it is invalid, time to schedule */
dest = __ip_vs_lblc_schedule(svc);
@@ -516,9 +527,10 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
}
/* If we fail to create a cache entry, we'll just use the valid dest */
- write_lock(&svc->sched_lock);
- ip_vs_lblc_new(tbl, &iph.daddr, dest);
- write_unlock(&svc->sched_lock);
+ spin_lock_bh(&svc->sched_lock);
+ if (!tbl->dead)
+ ip_vs_lblc_new(tbl, &iph.daddr, dest);
+ spin_unlock_bh(&svc->sched_lock);
out:
IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n",
@@ -621,6 +633,7 @@ static void __exit ip_vs_lblc_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
unregister_pernet_subsys(&ip_vs_lblc_ops);
+ synchronize_rcu();
}
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index c03b6a3ade2f..50123c2ab484 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -89,40 +89,44 @@
*/
struct ip_vs_dest_set_elem {
struct list_head list; /* list link */
- struct ip_vs_dest *dest; /* destination server */
+ struct ip_vs_dest __rcu *dest; /* destination server */
+ struct rcu_head rcu_head;
};
struct ip_vs_dest_set {
atomic_t size; /* set size */
unsigned long lastmod; /* last modified time */
struct list_head list; /* destination list */
- rwlock_t lock; /* lock for this list */
};
-static struct ip_vs_dest_set_elem *
-ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
+static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
+ struct ip_vs_dest *dest, bool check)
{
struct ip_vs_dest_set_elem *e;
- list_for_each_entry(e, &set->list, list) {
- if (e->dest == dest)
- /* already existed */
- return NULL;
+ if (check) {
+ list_for_each_entry(e, &set->list, list) {
+ struct ip_vs_dest *d;
+
+ d = rcu_dereference_protected(e->dest, 1);
+ if (d == dest)
+ /* already existed */
+ return;
+ }
}
e = kmalloc(sizeof(*e), GFP_ATOMIC);
if (e == NULL)
- return NULL;
+ return;
- atomic_inc(&dest->refcnt);
- e->dest = dest;
+ ip_vs_dest_hold(dest);
+ RCU_INIT_POINTER(e->dest, dest);
- list_add(&e->list, &set->list);
+ list_add_rcu(&e->list, &set->list);
atomic_inc(&set->size);
set->lastmod = jiffies;
- return e;
}
static void
@@ -131,13 +135,16 @@ ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
struct ip_vs_dest_set_elem *e;
list_for_each_entry(e, &set->list, list) {
- if (e->dest == dest) {
+ struct ip_vs_dest *d;
+
+ d = rcu_dereference_protected(e->dest, 1);
+ if (d == dest) {
/* HIT */
atomic_dec(&set->size);
set->lastmod = jiffies;
- atomic_dec(&e->dest->refcnt);
- list_del(&e->list);
- kfree(e);
+ ip_vs_dest_put(dest);
+ list_del_rcu(&e->list);
+ kfree_rcu(e, rcu_head);
break;
}
}
@@ -147,17 +154,18 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
{
struct ip_vs_dest_set_elem *e, *ep;
- write_lock(&set->lock);
list_for_each_entry_safe(e, ep, &set->list, list) {
+ struct ip_vs_dest *d;
+
+ d = rcu_dereference_protected(e->dest, 1);
/*
* We don't kfree dest because it is referred either
* by its service or by the trash dest list.
*/
- atomic_dec(&e->dest->refcnt);
- list_del(&e->list);
- kfree(e);
+ ip_vs_dest_put(d);
+ list_del_rcu(&e->list);
+ kfree_rcu(e, rcu_head);
}
- write_unlock(&set->lock);
}
/* get weighted least-connection node in the destination set */
@@ -171,8 +179,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
return NULL;
/* select the first destination server, whose weight > 0 */
- list_for_each_entry(e, &set->list, list) {
- least = e->dest;
+ list_for_each_entry_rcu(e, &set->list, list) {
+ least = rcu_dereference(e->dest);
if (least->flags & IP_VS_DEST_F_OVERLOAD)
continue;
@@ -186,8 +194,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
/* find the destination with the weighted least load */
nextstage:
- list_for_each_entry(e, &set->list, list) {
- dest = e->dest;
+ list_for_each_entry_continue_rcu(e, &set->list, list) {
+ dest = rcu_dereference(e->dest);
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
continue;
@@ -224,7 +232,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
/* select the first destination server, whose weight > 0 */
list_for_each_entry(e, &set->list, list) {
- most = e->dest;
+ most = rcu_dereference_protected(e->dest, 1);
if (atomic_read(&most->weight) > 0) {
moh = ip_vs_dest_conn_overhead(most);
goto nextstage;
@@ -234,8 +242,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
/* find the destination with the weighted most load */
nextstage:
- list_for_each_entry(e, &set->list, list) {
- dest = e->dest;
+ list_for_each_entry_continue(e, &set->list, list) {
+ dest = rcu_dereference_protected(e->dest, 1);
doh = ip_vs_dest_conn_overhead(dest);
/* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
if ((moh * atomic_read(&dest->weight) <
@@ -262,11 +270,12 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
* IP address and its destination server set
*/
struct ip_vs_lblcr_entry {
- struct list_head list;
+ struct hlist_node list;
int af; /* address family */
union nf_inet_addr addr; /* destination IP address */
struct ip_vs_dest_set set; /* destination server set */
unsigned long lastuse; /* last used time */
+ struct rcu_head rcu_head;
};
@@ -274,12 +283,14 @@ struct ip_vs_lblcr_entry {
* IPVS lblcr hash table
*/
struct ip_vs_lblcr_table {
- struct list_head bucket[IP_VS_LBLCR_TAB_SIZE]; /* hash bucket */
+ struct rcu_head rcu_head;
+ struct hlist_head bucket[IP_VS_LBLCR_TAB_SIZE]; /* hash bucket */
atomic_t entries; /* number of entries */
int max_size; /* maximum size of entries */
struct timer_list periodic_timer; /* collect stale entries */
int rover; /* rover for expire check */
int counter; /* counter for no expire */
+ bool dead;
};
@@ -302,9 +313,9 @@ static ctl_table vs_vars_table[] = {
static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
{
- list_del(&en->list);
+ hlist_del_rcu(&en->list);
ip_vs_dest_set_eraseall(&en->set);
- kfree(en);
+ kfree_rcu(en, rcu_head);
}
@@ -334,15 +345,12 @@ ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
{
unsigned int hash = ip_vs_lblcr_hashkey(en->af, &en->addr);
- list_add(&en->list, &tbl->bucket[hash]);
+ hlist_add_head_rcu(&en->list, &tbl->bucket[hash]);
atomic_inc(&tbl->entries);
}
-/*
- * Get ip_vs_lblcr_entry associated with supplied parameters. Called under
- * read lock.
- */
+/* Get ip_vs_lblcr_entry associated with supplied parameters. */
static inline struct ip_vs_lblcr_entry *
ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,
const union nf_inet_addr *addr)
@@ -350,7 +358,7 @@ ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,
unsigned int hash = ip_vs_lblcr_hashkey(af, addr);
struct ip_vs_lblcr_entry *en;
- list_for_each_entry(en, &tbl->bucket[hash], list)
+ hlist_for_each_entry_rcu(en, &tbl->bucket[hash], list)
if (ip_vs_addr_equal(af, &en->addr, addr))
return en;
@@ -360,7 +368,7 @@ ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,
/*
* Create or update an ip_vs_lblcr_entry, which is a mapping of a destination
- * IP address to a server. Called under write lock.
+ * IP address to a server. Called under spin lock.
*/
static inline struct ip_vs_lblcr_entry *
ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
@@ -381,14 +389,14 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
/* initialize its dest set */
atomic_set(&(en->set.size), 0);
INIT_LIST_HEAD(&en->set.list);
- rwlock_init(&en->set.lock);
+
+ ip_vs_dest_set_insert(&en->set, dest, false);
ip_vs_lblcr_hash(tbl, en);
+ return en;
}
- write_lock(&en->set.lock);
- ip_vs_dest_set_insert(&en->set, dest);
- write_unlock(&en->set.lock);
+ ip_vs_dest_set_insert(&en->set, dest, true);
return en;
}
@@ -397,17 +405,21 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
/*
* Flush all the entries of the specified table.
*/
-static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl)
+static void ip_vs_lblcr_flush(struct ip_vs_service *svc)
{
+ struct ip_vs_lblcr_table *tbl = svc->sched_data;
int i;
- struct ip_vs_lblcr_entry *en, *nxt;
+ struct ip_vs_lblcr_entry *en;
+ struct hlist_node *next;
- /* No locking required, only called during cleanup. */
+ spin_lock_bh(&svc->sched_lock);
+ tbl->dead = 1;
for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
- list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
+ hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
ip_vs_lblcr_free(en);
}
}
+ spin_unlock_bh(&svc->sched_lock);
}
static int sysctl_lblcr_expiration(struct ip_vs_service *svc)
@@ -425,13 +437,14 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
struct ip_vs_lblcr_table *tbl = svc->sched_data;
unsigned long now = jiffies;
int i, j;
- struct ip_vs_lblcr_entry *en, *nxt;
+ struct ip_vs_lblcr_entry *en;
+ struct hlist_node *next;
for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
- write_lock(&svc->sched_lock);
- list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+ spin_lock(&svc->sched_lock);
+ hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
if (time_after(en->lastuse +
sysctl_lblcr_expiration(svc), now))
continue;
@@ -439,7 +452,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
ip_vs_lblcr_free(en);
atomic_dec(&tbl->entries);
}
- write_unlock(&svc->sched_lock);
+ spin_unlock(&svc->sched_lock);
}
tbl->rover = j;
}
@@ -463,7 +476,8 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
unsigned long now = jiffies;
int goal;
int i, j;
- struct ip_vs_lblcr_entry *en, *nxt;
+ struct ip_vs_lblcr_entry *en;
+ struct hlist_node *next;
if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
/* do full expiration check */
@@ -484,8 +498,8 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
- write_lock(&svc->sched_lock);
- list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+ spin_lock(&svc->sched_lock);
+ hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
if (time_before(now, en->lastuse+ENTRY_TIMEOUT))
continue;
@@ -493,7 +507,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
atomic_dec(&tbl->entries);
goal--;
}
- write_unlock(&svc->sched_lock);
+ spin_unlock(&svc->sched_lock);
if (goal <= 0)
break;
}
@@ -523,11 +537,12 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
* Initialize the hash buckets
*/
for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
- INIT_LIST_HEAD(&tbl->bucket[i]);
+ INIT_HLIST_HEAD(&tbl->bucket[i]);
}
tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
tbl->rover = 0;
tbl->counter = 1;
+ tbl->dead = 0;
/*
* Hook periodic timer for garbage collection
@@ -540,7 +555,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
}
-static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
+static void ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
{
struct ip_vs_lblcr_table *tbl = svc->sched_data;
@@ -548,14 +563,12 @@ static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
del_timer_sync(&tbl->periodic_timer);
/* got to clean up table entries here */
- ip_vs_lblcr_flush(tbl);
+ ip_vs_lblcr_flush(svc);
/* release the table itself */
- kfree(tbl);
+ kfree_rcu(tbl, rcu_head);
IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n",
sizeof(*tbl));
-
- return 0;
}
@@ -577,7 +590,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
* The server with weight=0 is quiesced and will not receive any
* new connection.
*/
- list_for_each_entry(dest, &svc->destinations, n_list) {
+ list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
continue;
@@ -593,7 +606,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
* Find the destination with the least load.
*/
nextstage:
- list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+ list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
continue;
@@ -627,7 +640,7 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
struct ip_vs_dest *d;
- list_for_each_entry(d, &svc->destinations, n_list) {
+ list_for_each_entry_rcu(d, &svc->destinations, n_list) {
if (atomic_read(&d->activeconns)*2
< atomic_read(&d->weight)) {
return 1;
@@ -646,7 +659,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
struct ip_vs_lblcr_table *tbl = svc->sched_data;
struct ip_vs_iphdr iph;
- struct ip_vs_dest *dest = NULL;
+ struct ip_vs_dest *dest;
struct ip_vs_lblcr_entry *en;
ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
@@ -654,53 +667,46 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
/* First look in our cache */
- read_lock(&svc->sched_lock);
en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr);
if (en) {
- /* We only hold a read lock, but this is atomic */
en->lastuse = jiffies;
/* Get the least loaded destination */
- read_lock(&en->set.lock);
dest = ip_vs_dest_set_min(&en->set);
- read_unlock(&en->set.lock);
/* More than one destination + enough time passed by, cleanup */
if (atomic_read(&en->set.size) > 1 &&
- time_after(jiffies, en->set.lastmod +
+ time_after(jiffies, en->set.lastmod +
sysctl_lblcr_expiration(svc))) {
- struct ip_vs_dest *m;
+ spin_lock_bh(&svc->sched_lock);
+ if (atomic_read(&en->set.size) > 1) {
+ struct ip_vs_dest *m;
- write_lock(&en->set.lock);
- m = ip_vs_dest_set_max(&en->set);
- if (m)
- ip_vs_dest_set_erase(&en->set, m);
- write_unlock(&en->set.lock);
+ m = ip_vs_dest_set_max(&en->set);
+ if (m)
+ ip_vs_dest_set_erase(&en->set, m);
+ }
+ spin_unlock_bh(&svc->sched_lock);
}
/* If the destination is not overloaded, use it */
- if (dest && !is_overloaded(dest, svc)) {
- read_unlock(&svc->sched_lock);
+ if (dest && !is_overloaded(dest, svc))
goto out;
- }
/* The cache entry is invalid, time to schedule */
dest = __ip_vs_lblcr_schedule(svc);
if (!dest) {
ip_vs_scheduler_err(svc, "no destination available");
- read_unlock(&svc->sched_lock);
return NULL;
}
/* Update our cache entry */
- write_lock(&en->set.lock);
- ip_vs_dest_set_insert(&en->set, dest);
- write_unlock(&en->set.lock);
- }
- read_unlock(&svc->sched_lock);
-
- if (dest)
+ spin_lock_bh(&svc->sched_lock);
+ if (!tbl->dead)
+ ip_vs_dest_set_insert(&en->set, dest, true);
+ spin_unlock_bh(&svc->sched_lock);
goto out;
+ }
/* No cache entry, time to schedule */
dest = __ip_vs_lblcr_schedule(svc);
@@ -710,9 +716,10 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
}
/* If we fail to create a cache entry, we'll just use the valid dest */
- write_lock(&svc->sched_lock);
- ip_vs_lblcr_new(tbl, &iph.daddr, dest);
- write_unlock(&svc->sched_lock);
+ spin_lock_bh(&svc->sched_lock);
+ if (!tbl->dead)
+ ip_vs_lblcr_new(tbl, &iph.daddr, dest);
+ spin_unlock_bh(&svc->sched_lock);
out:
IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n",
@@ -814,6 +821,7 @@ static void __exit ip_vs_lblcr_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
unregister_pernet_subsys(&ip_vs_lblcr_ops);
+ synchronize_rcu();
}
diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
index f391819c0cca..5128e338a749 100644
--- a/net/netfilter/ipvs/ip_vs_lc.c
+++ b/net/netfilter/ipvs/ip_vs_lc.c
@@ -42,7 +42,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
* served, but no new connection is assigned to the server.
*/
- list_for_each_entry(dest, &svc->destinations, n_list) {
+ list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
if ((dest->flags & IP_VS_DEST_F_OVERLOAD) ||
atomic_read(&dest->weight) == 0)
continue;
@@ -84,6 +84,7 @@ static int __init ip_vs_lc_init(void)
static void __exit ip_vs_lc_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_lc_scheduler);
+ synchronize_rcu();
}
module_init(ip_vs_lc_init);
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index 984d9c137d84..646cfd4baa73 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -75,7 +75,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
* new connections.
*/
- list_for_each_entry(dest, &svc->destinations, n_list) {
+ list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
if (dest->flags & IP_VS_DEST_F_OVERLOAD ||
!atomic_read(&dest->weight))
@@ -133,6 +133,7 @@ static int __init ip_vs_nq_init(void)
static void __exit ip_vs_nq_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_nq_scheduler);
+ synchronize_rcu();
}
module_init(ip_vs_nq_init);
diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c
index 5cf859ccb31b..1a82b29ce8ea 100644
--- a/net/netfilter/ipvs/ip_vs_pe.c
+++ b/net/netfilter/ipvs/ip_vs_pe.c
@@ -13,20 +13,8 @@
/* IPVS pe list */
static LIST_HEAD(ip_vs_pe);
-/* lock for service table */
-static DEFINE_SPINLOCK(ip_vs_pe_lock);
-
-/* Bind a service with a pe */
-void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe)
-{
- svc->pe = pe;
-}
-
-/* Unbind a service from its pe */
-void ip_vs_unbind_pe(struct ip_vs_service *svc)
-{
- svc->pe = NULL;
-}
+/* semaphore for IPVS PEs. */
+static DEFINE_MUTEX(ip_vs_pe_mutex);
/* Get pe in the pe list by name */
struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
@@ -36,9 +24,8 @@ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__,
pe_name);
- spin_lock_bh(&ip_vs_pe_lock);
-
- list_for_each_entry(pe, &ip_vs_pe, n_list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(pe, &ip_vs_pe, n_list) {
/* Test and get the modules atomically */
if (pe->module &&
!try_module_get(pe->module)) {
@@ -47,14 +34,14 @@ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
}
if (strcmp(pe_name, pe->name)==0) {
/* HIT */
- spin_unlock_bh(&ip_vs_pe_lock);
+ rcu_read_unlock();
return pe;
}
if (pe->module)
module_put(pe->module);
}
+ rcu_read_unlock();
- spin_unlock_bh(&ip_vs_pe_lock);
return NULL;
}
@@ -83,22 +70,13 @@ int register_ip_vs_pe(struct ip_vs_pe *pe)
/* increase the module use count */
ip_vs_use_count_inc();
- spin_lock_bh(&ip_vs_pe_lock);
-
- if (!list_empty(&pe->n_list)) {
- spin_unlock_bh(&ip_vs_pe_lock);
- ip_vs_use_count_dec();
- pr_err("%s(): [%s] pe already linked\n",
- __func__, pe->name);
- return -EINVAL;
- }
-
+ mutex_lock(&ip_vs_pe_mutex);
/* Make sure that the pe with this name doesn't exist
* in the pe list.
*/
list_for_each_entry(tmp, &ip_vs_pe, n_list) {
if (strcmp(tmp->name, pe->name) == 0) {
- spin_unlock_bh(&ip_vs_pe_lock);
+ mutex_unlock(&ip_vs_pe_mutex);
ip_vs_use_count_dec();
pr_err("%s(): [%s] pe already existed "
"in the system\n", __func__, pe->name);
@@ -106,8 +84,8 @@ int register_ip_vs_pe(struct ip_vs_pe *pe)
}
}
/* Add it into the d-linked pe list */
- list_add(&pe->n_list, &ip_vs_pe);
- spin_unlock_bh(&ip_vs_pe_lock);
+ list_add_rcu(&pe->n_list, &ip_vs_pe);
+ mutex_unlock(&ip_vs_pe_mutex);
pr_info("[%s] pe registered.\n", pe->name);
@@ -118,17 +96,10 @@ EXPORT_SYMBOL_GPL(register_ip_vs_pe);
/* Unregister a pe from the pe list */
int unregister_ip_vs_pe(struct ip_vs_pe *pe)
{
- spin_lock_bh(&ip_vs_pe_lock);
- if (list_empty(&pe->n_list)) {
- spin_unlock_bh(&ip_vs_pe_lock);
- pr_err("%s(): [%s] pe is not in the list. failed\n",
- __func__, pe->name);
- return -EINVAL;
- }
-
+ mutex_lock(&ip_vs_pe_mutex);
/* Remove it from the d-linked pe list */
- list_del(&pe->n_list);
- spin_unlock_bh(&ip_vs_pe_lock);
+ list_del_rcu(&pe->n_list);
+ mutex_unlock(&ip_vs_pe_mutex);
/* decrease the module use count */
ip_vs_use_count_dec();
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 12475ef88daf..9ef22bdce9f1 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -13,7 +13,8 @@ static const char *ip_vs_dbg_callid(char *buf, size_t buf_len,
const char *callid, size_t callid_len,
int *idx)
{
- size_t len = min(min(callid_len, (size_t)64), buf_len - *idx - 1);
+ size_t max_len = 64;
+ size_t len = min3(max_len, callid_len, buf_len - *idx - 1);
memcpy(buf + *idx, callid, len);
buf[*idx+len] = '\0';
*idx += len + 1;
@@ -37,14 +38,10 @@ static int get_callid(const char *dptr, unsigned int dataoff,
if (ret > 0)
break;
if (!ret)
- return 0;
+ return -EINVAL;
dataoff += *matchoff;
}
- /* Empty callid is useless */
- if (!*matchlen)
- return -EINVAL;
-
/* Too large is useless */
if (*matchlen > IP_VS_PEDATA_MAXLEN)
return -EINVAL;
@@ -172,6 +169,7 @@ static int __init ip_vs_sip_init(void)
static void __exit ip_vs_sip_cleanup(void)
{
unregister_ip_vs_pe(&ip_vs_sip_pe);
+ synchronize_rcu();
}
module_init(ip_vs_sip_init);
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index cd1d7298f7ba..86464881cd20 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -27,9 +27,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
if (sch == NULL)
return 0;
net = skb_net(skb);
+ rcu_read_lock();
if ((sch->type == SCTP_CID_INIT) &&
- (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
- &iph->daddr, sh->dest))) {
+ (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
+ &iph->daddr, sh->dest))) {
int ignored;
if (ip_vs_todrop(net_ipvs(net))) {
@@ -37,7 +38,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
* It seems that we are very loaded.
* We have to drop this packet :(
*/
- ip_vs_service_put(svc);
+ rcu_read_unlock();
*verdict = NF_DROP;
return 0;
}
@@ -49,14 +50,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
if (!*cpp && ignored <= 0) {
if (!ignored)
*verdict = ip_vs_leave(svc, skb, pd, iph);
- else {
- ip_vs_service_put(svc);
+ else
*verdict = NF_DROP;
- }
+ rcu_read_unlock();
return 0;
}
- ip_vs_service_put(svc);
}
+ rcu_read_unlock();
/* NF_ACCEPT */
return 1;
}
@@ -208,7 +208,7 @@ enum ipvs_sctp_event_t {
IP_VS_SCTP_EVE_LAST
};
-static enum ipvs_sctp_event_t sctp_events[255] = {
+static enum ipvs_sctp_event_t sctp_events[256] = {
IP_VS_SCTP_EVE_DATA_CLI,
IP_VS_SCTP_EVE_INIT_CLI,
IP_VS_SCTP_EVE_INIT_ACK_CLI,
@@ -994,9 +994,9 @@ static void
sctp_state_transition(struct ip_vs_conn *cp, int direction,
const struct sk_buff *skb, struct ip_vs_proto_data *pd)
{
- spin_lock(&cp->lock);
+ spin_lock_bh(&cp->lock);
set_sctp_state(pd, cp, direction, skb);
- spin_unlock(&cp->lock);
+ spin_unlock_bh(&cp->lock);
}
static inline __u16 sctp_app_hashkey(__be16 port)
@@ -1016,30 +1016,25 @@ static int sctp_register_app(struct net *net, struct ip_vs_app *inc)
hash = sctp_app_hashkey(port);
- spin_lock_bh(&ipvs->sctp_app_lock);
list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
if (i->port == port) {
ret = -EEXIST;
goto out;
}
}
- list_add(&inc->p_list, &ipvs->sctp_apps[hash]);
+ list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]);
atomic_inc(&pd->appcnt);
out:
- spin_unlock_bh(&ipvs->sctp_app_lock);
return ret;
}
static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
- spin_lock_bh(&ipvs->sctp_app_lock);
atomic_dec(&pd->appcnt);
- list_del(&inc->p_list);
- spin_unlock_bh(&ipvs->sctp_app_lock);
+ list_del_rcu(&inc->p_list);
}
static int sctp_app_conn_bind(struct ip_vs_conn *cp)
@@ -1055,12 +1050,12 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */
hash = sctp_app_hashkey(cp->vport);
- spin_lock(&ipvs->sctp_app_lock);
- list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- spin_unlock(&ipvs->sctp_app_lock);
+ rcu_read_unlock();
IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -1076,7 +1071,7 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
goto out;
}
}
- spin_unlock(&ipvs->sctp_app_lock);
+ rcu_read_unlock();
out:
return result;
}
@@ -1090,7 +1085,6 @@ static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
struct netns_ipvs *ipvs = net_ipvs(net);
ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
- spin_lock_init(&ipvs->sctp_app_lock);
pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
sizeof(sctp_timeouts));
if (!pd->timeout_table)
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 9af653a75825..50a15944c6c1 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -47,9 +47,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
}
net = skb_net(skb);
/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
+ rcu_read_lock();
if (th->syn &&
- (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
- &iph->daddr, th->dest))) {
+ (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
+ &iph->daddr, th->dest))) {
int ignored;
if (ip_vs_todrop(net_ipvs(net))) {
@@ -57,7 +58,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
* It seems that we are very loaded.
* We have to drop this packet :(
*/
- ip_vs_service_put(svc);
+ rcu_read_unlock();
*verdict = NF_DROP;
return 0;
}
@@ -70,14 +71,13 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
if (!*cpp && ignored <= 0) {
if (!ignored)
*verdict = ip_vs_leave(svc, skb, pd, iph);
- else {
- ip_vs_service_put(svc);
+ else
*verdict = NF_DROP;
- }
+ rcu_read_unlock();
return 0;
}
- ip_vs_service_put(svc);
}
+ rcu_read_unlock();
/* NF_ACCEPT */
return 1;
}
@@ -557,9 +557,9 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
if (th == NULL)
return;
- spin_lock(&cp->lock);
+ spin_lock_bh(&cp->lock);
set_tcp_state(pd, cp, direction, th);
- spin_unlock(&cp->lock);
+ spin_unlock_bh(&cp->lock);
}
static inline __u16 tcp_app_hashkey(__be16 port)
@@ -580,18 +580,16 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
hash = tcp_app_hashkey(port);
- spin_lock_bh(&ipvs->tcp_app_lock);
list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) {
if (i->port == port) {
ret = -EEXIST;
goto out;
}
}
- list_add(&inc->p_list, &ipvs->tcp_apps[hash]);
+ list_add_rcu(&inc->p_list, &ipvs->tcp_apps[hash]);
atomic_inc(&pd->appcnt);
out:
- spin_unlock_bh(&ipvs->tcp_app_lock);
return ret;
}
@@ -599,13 +597,10 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
static void
tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
- spin_lock_bh(&ipvs->tcp_app_lock);
atomic_dec(&pd->appcnt);
- list_del(&inc->p_list);
- spin_unlock_bh(&ipvs->tcp_app_lock);
+ list_del_rcu(&inc->p_list);
}
@@ -624,12 +619,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */
hash = tcp_app_hashkey(cp->vport);
- spin_lock(&ipvs->tcp_app_lock);
- list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(inc, &ipvs->tcp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- spin_unlock(&ipvs->tcp_app_lock);
+ rcu_read_unlock();
IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -646,7 +641,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
goto out;
}
}
- spin_unlock(&ipvs->tcp_app_lock);
+ rcu_read_unlock();
out:
return result;
@@ -660,11 +655,11 @@ void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
{
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
- spin_lock(&cp->lock);
+ spin_lock_bh(&cp->lock);
cp->state = IP_VS_TCP_S_LISTEN;
cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]
: tcp_timeouts[IP_VS_TCP_S_LISTEN]);
- spin_unlock(&cp->lock);
+ spin_unlock_bh(&cp->lock);
}
/* ---------------------------------------------
@@ -676,7 +671,6 @@ static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
struct netns_ipvs *ipvs = net_ipvs(net);
ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE);
- spin_lock_init(&ipvs->tcp_app_lock);
pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
sizeof(tcp_timeouts));
if (!pd->timeout_table)
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 503a842c90d2..b62a3c0ff9bf 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -44,8 +44,9 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
return 0;
}
net = skb_net(skb);
- svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
- &iph->daddr, uh->dest);
+ rcu_read_lock();
+ svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
+ &iph->daddr, uh->dest);
if (svc) {
int ignored;
@@ -54,7 +55,7 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
* It seems that we are very loaded.
* We have to drop this packet :(
*/
- ip_vs_service_put(svc);
+ rcu_read_unlock();
*verdict = NF_DROP;
return 0;
}
@@ -67,14 +68,13 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
if (!*cpp && ignored <= 0) {
if (!ignored)
*verdict = ip_vs_leave(svc, skb, pd, iph);
- else {
- ip_vs_service_put(svc);
+ else
*verdict = NF_DROP;
- }
+ rcu_read_unlock();
return 0;
}
- ip_vs_service_put(svc);
}
+ rcu_read_unlock();
/* NF_ACCEPT */
return 1;
}
@@ -359,19 +359,16 @@ static int udp_register_app(struct net *net, struct ip_vs_app *inc)
hash = udp_app_hashkey(port);
-
- spin_lock_bh(&ipvs->udp_app_lock);
list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) {
if (i->port == port) {
ret = -EEXIST;
goto out;
}
}
- list_add(&inc->p_list, &ipvs->udp_apps[hash]);
+ list_add_rcu(&inc->p_list, &ipvs->udp_apps[hash]);
atomic_inc(&pd->appcnt);
out:
- spin_unlock_bh(&ipvs->udp_app_lock);
return ret;
}
@@ -380,12 +377,9 @@ static void
udp_unregister_app(struct net *net, struct ip_vs_app *inc)
{
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
- struct netns_ipvs *ipvs = net_ipvs(net);
- spin_lock_bh(&ipvs->udp_app_lock);
atomic_dec(&pd->appcnt);
- list_del(&inc->p_list);
- spin_unlock_bh(&ipvs->udp_app_lock);
+ list_del_rcu(&inc->p_list);
}
@@ -403,12 +397,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */
hash = udp_app_hashkey(cp->vport);
- spin_lock(&ipvs->udp_app_lock);
- list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- spin_unlock(&ipvs->udp_app_lock);
+ rcu_read_unlock();
IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -425,7 +419,7 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
goto out;
}
}
- spin_unlock(&ipvs->udp_app_lock);
+ rcu_read_unlock();
out:
return result;
@@ -467,7 +461,6 @@ static int __udp_init(struct net *net, struct ip_vs_proto_data *pd)
struct netns_ipvs *ipvs = net_ipvs(net);
ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE);
- spin_lock_init(&ipvs->udp_app_lock);
pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
sizeof(udp_timeouts));
if (!pd->timeout_table)
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
index c49b388d1085..c35986c793d9 100644
--- a/net/netfilter/ipvs/ip_vs_rr.c
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -35,9 +35,18 @@ static int ip_vs_rr_init_svc(struct ip_vs_service *svc)
}
-static int ip_vs_rr_update_svc(struct ip_vs_service *svc)
+static int ip_vs_rr_del_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest)
{
- svc->sched_data = &svc->destinations;
+ struct list_head *p;
+
+ spin_lock_bh(&svc->sched_lock);
+ p = (struct list_head *) svc->sched_data;
+ /* dest is already unlinked, so p->prev is not valid but
+ * p->next is valid, use it to reach previous entry.
+ */
+ if (p == &dest->n_list)
+ svc->sched_data = p->next->prev;
+ spin_unlock_bh(&svc->sched_lock);
return 0;
}
@@ -48,36 +57,41 @@ static int ip_vs_rr_update_svc(struct ip_vs_service *svc)
static struct ip_vs_dest *
ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
- struct list_head *p, *q;
- struct ip_vs_dest *dest;
+ struct list_head *p;
+ struct ip_vs_dest *dest, *last;
+ int pass = 0;
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
- write_lock(&svc->sched_lock);
- p = (struct list_head *)svc->sched_data;
- p = p->next;
- q = p;
+ spin_lock_bh(&svc->sched_lock);
+ p = (struct list_head *) svc->sched_data;
+ last = dest = list_entry(p, struct ip_vs_dest, n_list);
+
do {
- /* skip list head */
- if (q == &svc->destinations) {
- q = q->next;
- continue;
+ list_for_each_entry_continue_rcu(dest,
+ &svc->destinations,
+ n_list) {
+ if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+ atomic_read(&dest->weight) > 0)
+ /* HIT */
+ goto out;
+ if (dest == last)
+ goto stop;
}
-
- dest = list_entry(q, struct ip_vs_dest, n_list);
- if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
- atomic_read(&dest->weight) > 0)
- /* HIT */
- goto out;
- q = q->next;
- } while (q != p);
- write_unlock(&svc->sched_lock);
+ pass++;
+ /* Previous dest could be unlinked, do not loop forever.
+ * If we stay at head there is no need for 2nd pass.
+ */
+ } while (pass < 2 && p != &svc->destinations);
+
+stop:
+ spin_unlock_bh(&svc->sched_lock);
ip_vs_scheduler_err(svc, "no destination available");
return NULL;
out:
- svc->sched_data = q;
- write_unlock(&svc->sched_lock);
+ svc->sched_data = &dest->n_list;
+ spin_unlock_bh(&svc->sched_lock);
IP_VS_DBG_BUF(6, "RR: server %s:%u "
"activeconns %d refcnt %d weight %d\n",
IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
@@ -94,7 +108,8 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = {
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list),
.init_service = ip_vs_rr_init_svc,
- .update_service = ip_vs_rr_update_svc,
+ .add_dest = NULL,
+ .del_dest = ip_vs_rr_del_dest,
.schedule = ip_vs_rr_schedule,
};
@@ -106,6 +121,7 @@ static int __init ip_vs_rr_init(void)
static void __exit ip_vs_rr_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_rr_scheduler);
+ synchronize_rcu();
}
module_init(ip_vs_rr_init);
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index d6bf20d6cdbe..4dbcda6258bc 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -35,8 +35,8 @@ EXPORT_SYMBOL(ip_vs_scheduler_err);
*/
static LIST_HEAD(ip_vs_schedulers);
-/* lock for service table */
-static DEFINE_SPINLOCK(ip_vs_sched_lock);
+/* semaphore for schedulers */
+static DEFINE_MUTEX(ip_vs_sched_mutex);
/*
@@ -47,8 +47,6 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
{
int ret;
- svc->scheduler = scheduler;
-
if (scheduler->init_service) {
ret = scheduler->init_service(svc);
if (ret) {
@@ -56,7 +54,7 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
return ret;
}
}
-
+ rcu_assign_pointer(svc->scheduler, scheduler);
return 0;
}
@@ -64,22 +62,19 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
/*
* Unbind a service with its scheduler
*/
-int ip_vs_unbind_scheduler(struct ip_vs_service *svc)
+void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
+ struct ip_vs_scheduler *sched)
{
- struct ip_vs_scheduler *sched = svc->scheduler;
+ struct ip_vs_scheduler *cur_sched;
- if (!sched)
- return 0;
+ cur_sched = rcu_dereference_protected(svc->scheduler, 1);
+ /* This check proves that old 'sched' was installed */
+ if (!cur_sched)
+ return;
- if (sched->done_service) {
- if (sched->done_service(svc) != 0) {
- pr_err("%s(): done error\n", __func__);
- return -EINVAL;
- }
- }
-
- svc->scheduler = NULL;
- return 0;
+ if (sched->done_service)
+ sched->done_service(svc);
+ /* svc->scheduler can not be set to NULL */
}
@@ -92,7 +87,7 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name);
- spin_lock_bh(&ip_vs_sched_lock);
+ mutex_lock(&ip_vs_sched_mutex);
list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
/*
@@ -106,14 +101,14 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
}
if (strcmp(sched_name, sched->name)==0) {
/* HIT */
- spin_unlock_bh(&ip_vs_sched_lock);
+ mutex_unlock(&ip_vs_sched_mutex);
return sched;
}
if (sched->module)
module_put(sched->module);
}
- spin_unlock_bh(&ip_vs_sched_lock);
+ mutex_unlock(&ip_vs_sched_mutex);
return NULL;
}
@@ -153,21 +148,21 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg)
{
+ struct ip_vs_scheduler *sched;
+
+ sched = rcu_dereference(svc->scheduler);
if (svc->fwmark) {
IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n",
- svc->scheduler->name, svc->fwmark,
- svc->fwmark, msg);
+ sched->name, svc->fwmark, svc->fwmark, msg);
#ifdef CONFIG_IP_VS_IPV6
} else if (svc->af == AF_INET6) {
IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n",
- svc->scheduler->name,
- ip_vs_proto_name(svc->protocol),
+ sched->name, ip_vs_proto_name(svc->protocol),
&svc->addr.in6, ntohs(svc->port), msg);
#endif
} else {
IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n",
- svc->scheduler->name,
- ip_vs_proto_name(svc->protocol),
+ sched->name, ip_vs_proto_name(svc->protocol),
&svc->addr.ip, ntohs(svc->port), msg);
}
}
@@ -192,10 +187,10 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
/* increase the module use count */
ip_vs_use_count_inc();
- spin_lock_bh(&ip_vs_sched_lock);
+ mutex_lock(&ip_vs_sched_mutex);
if (!list_empty(&scheduler->n_list)) {
- spin_unlock_bh(&ip_vs_sched_lock);
+ mutex_unlock(&ip_vs_sched_mutex);
ip_vs_use_count_dec();
pr_err("%s(): [%s] scheduler already linked\n",
__func__, scheduler->name);
@@ -208,7 +203,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
*/
list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
if (strcmp(scheduler->name, sched->name) == 0) {
- spin_unlock_bh(&ip_vs_sched_lock);
+ mutex_unlock(&ip_vs_sched_mutex);
ip_vs_use_count_dec();
pr_err("%s(): [%s] scheduler already existed "
"in the system\n", __func__, scheduler->name);
@@ -219,7 +214,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
* Add it into the d-linked scheduler list
*/
list_add(&scheduler->n_list, &ip_vs_schedulers);
- spin_unlock_bh(&ip_vs_sched_lock);
+ mutex_unlock(&ip_vs_sched_mutex);
pr_info("[%s] scheduler registered.\n", scheduler->name);
@@ -237,9 +232,9 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
return -EINVAL;
}
- spin_lock_bh(&ip_vs_sched_lock);
+ mutex_lock(&ip_vs_sched_mutex);
if (list_empty(&scheduler->n_list)) {
- spin_unlock_bh(&ip_vs_sched_lock);
+ mutex_unlock(&ip_vs_sched_mutex);
pr_err("%s(): [%s] scheduler is not in the list. failed\n",
__func__, scheduler->name);
return -EINVAL;
@@ -249,7 +244,7 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
* Remove it from the d-linked scheduler list
*/
list_del(&scheduler->n_list);
- spin_unlock_bh(&ip_vs_sched_lock);
+ mutex_unlock(&ip_vs_sched_mutex);
/* decrease the module use count */
ip_vs_use_count_dec();
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index 89ead246ed3d..f3205925359a 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -79,7 +79,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
* new connections.
*/
- list_for_each_entry(dest, &svc->destinations, n_list) {
+ list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
atomic_read(&dest->weight) > 0) {
least = dest;
@@ -94,7 +94,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
* Find the destination with the least load.
*/
nextstage:
- list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+ list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
continue;
doh = ip_vs_sed_dest_overhead(dest);
@@ -134,6 +134,7 @@ static int __init ip_vs_sed_init(void)
static void __exit ip_vs_sed_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_sed_scheduler);
+ synchronize_rcu();
}
module_init(ip_vs_sed_init);
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index e33126994628..a65edfe4b16c 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -53,7 +53,7 @@
* IPVS SH bucket
*/
struct ip_vs_sh_bucket {
- struct ip_vs_dest *dest; /* real server (cache) */
+ struct ip_vs_dest __rcu *dest; /* real server (cache) */
};
/*
@@ -66,6 +66,10 @@ struct ip_vs_sh_bucket {
#define IP_VS_SH_TAB_SIZE (1 << IP_VS_SH_TAB_BITS)
#define IP_VS_SH_TAB_MASK (IP_VS_SH_TAB_SIZE - 1)
+struct ip_vs_sh_state {
+ struct rcu_head rcu_head;
+ struct ip_vs_sh_bucket buckets[IP_VS_SH_TAB_SIZE];
+};
/*
* Returns hash value for IPVS SH entry
@@ -87,10 +91,9 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad
* Get ip_vs_dest associated with supplied parameters.
*/
static inline struct ip_vs_dest *
-ip_vs_sh_get(int af, struct ip_vs_sh_bucket *tbl,
- const union nf_inet_addr *addr)
+ip_vs_sh_get(int af, struct ip_vs_sh_state *s, const union nf_inet_addr *addr)
{
- return (tbl[ip_vs_sh_hashkey(af, addr)]).dest;
+ return rcu_dereference(s->buckets[ip_vs_sh_hashkey(af, addr)].dest);
}
@@ -98,27 +101,32 @@ ip_vs_sh_get(int af, struct ip_vs_sh_bucket *tbl,
* Assign all the hash buckets of the specified table with the service.
*/
static int
-ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
+ip_vs_sh_reassign(struct ip_vs_sh_state *s, struct ip_vs_service *svc)
{
int i;
struct ip_vs_sh_bucket *b;
struct list_head *p;
struct ip_vs_dest *dest;
int d_count;
+ bool empty;
- b = tbl;
+ b = &s->buckets[0];
p = &svc->destinations;
+ empty = list_empty(p);
d_count = 0;
for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
- if (list_empty(p)) {
- b->dest = NULL;
- } else {
+ dest = rcu_dereference_protected(b->dest, 1);
+ if (dest)
+ ip_vs_dest_put(dest);
+ if (empty)
+ RCU_INIT_POINTER(b->dest, NULL);
+ else {
if (p == &svc->destinations)
p = p->next;
dest = list_entry(p, struct ip_vs_dest, n_list);
- atomic_inc(&dest->refcnt);
- b->dest = dest;
+ ip_vs_dest_hold(dest);
+ RCU_INIT_POINTER(b->dest, dest);
IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n",
i, IP_VS_DBG_ADDR(svc->af, &dest->addr),
@@ -140,16 +148,18 @@ ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
/*
* Flush all the hash buckets of the specified table.
*/
-static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl)
+static void ip_vs_sh_flush(struct ip_vs_sh_state *s)
{
int i;
struct ip_vs_sh_bucket *b;
+ struct ip_vs_dest *dest;
- b = tbl;
+ b = &s->buckets[0];
for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
- if (b->dest) {
- atomic_dec(&b->dest->refcnt);
- b->dest = NULL;
+ dest = rcu_dereference_protected(b->dest, 1);
+ if (dest) {
+ ip_vs_dest_put(dest);
+ RCU_INIT_POINTER(b->dest, NULL);
}
b++;
}
@@ -158,51 +168,46 @@ static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl)
static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
{
- struct ip_vs_sh_bucket *tbl;
+ struct ip_vs_sh_state *s;
/* allocate the SH table for this service */
- tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE,
- GFP_KERNEL);
- if (tbl == NULL)
+ s = kzalloc(sizeof(struct ip_vs_sh_state), GFP_KERNEL);
+ if (s == NULL)
return -ENOMEM;
- svc->sched_data = tbl;
+ svc->sched_data = s;
IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for "
"current service\n",
sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
- /* assign the hash buckets with the updated service */
- ip_vs_sh_assign(tbl, svc);
+ /* assign the hash buckets with current dests */
+ ip_vs_sh_reassign(s, svc);
return 0;
}
-static int ip_vs_sh_done_svc(struct ip_vs_service *svc)
+static void ip_vs_sh_done_svc(struct ip_vs_service *svc)
{
- struct ip_vs_sh_bucket *tbl = svc->sched_data;
+ struct ip_vs_sh_state *s = svc->sched_data;
/* got to clean up hash buckets here */
- ip_vs_sh_flush(tbl);
+ ip_vs_sh_flush(s);
/* release the table itself */
- kfree(svc->sched_data);
+ kfree_rcu(s, rcu_head);
IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n",
sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
-
- return 0;
}
-static int ip_vs_sh_update_svc(struct ip_vs_service *svc)
+static int ip_vs_sh_dest_changed(struct ip_vs_service *svc,
+ struct ip_vs_dest *dest)
{
- struct ip_vs_sh_bucket *tbl = svc->sched_data;
-
- /* got to clean up hash buckets here */
- ip_vs_sh_flush(tbl);
+ struct ip_vs_sh_state *s = svc->sched_data;
/* assign the hash buckets with the updated service */
- ip_vs_sh_assign(tbl, svc);
+ ip_vs_sh_reassign(s, svc);
return 0;
}
@@ -225,15 +230,15 @@ static struct ip_vs_dest *
ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
struct ip_vs_dest *dest;
- struct ip_vs_sh_bucket *tbl;
+ struct ip_vs_sh_state *s;
struct ip_vs_iphdr iph;
ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
- tbl = (struct ip_vs_sh_bucket *)svc->sched_data;
- dest = ip_vs_sh_get(svc->af, tbl, &iph.saddr);
+ s = (struct ip_vs_sh_state *) svc->sched_data;
+ dest = ip_vs_sh_get(svc->af, s, &iph.saddr);
if (!dest
|| !(dest->flags & IP_VS_DEST_F_AVAILABLE)
|| atomic_read(&dest->weight) <= 0
@@ -262,7 +267,9 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler =
.n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list),
.init_service = ip_vs_sh_init_svc,
.done_service = ip_vs_sh_done_svc,
- .update_service = ip_vs_sh_update_svc,
+ .add_dest = ip_vs_sh_dest_changed,
+ .del_dest = ip_vs_sh_dest_changed,
+ .upd_dest = ip_vs_sh_dest_changed,
.schedule = ip_vs_sh_schedule,
};
@@ -276,6 +283,7 @@ static int __init ip_vs_sh_init(void)
static void __exit ip_vs_sh_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_sh_scheduler);
+ synchronize_rcu();
}
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 44fd10c539ac..f6046d9af8d3 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -246,7 +246,7 @@ struct ip_vs_sync_thread_data {
struct ip_vs_sync_mesg_v0 {
__u8 nr_conns;
__u8 syncid;
- __u16 size;
+ __be16 size;
/* ip_vs_sync_conn entries start here */
};
@@ -255,7 +255,7 @@ struct ip_vs_sync_mesg_v0 {
struct ip_vs_sync_mesg {
__u8 reserved; /* must be zero */
__u8 syncid;
- __u16 size;
+ __be16 size;
__u8 nr_conns;
__s8 version; /* SYNC_PROTO_VER */
__u16 spare;
@@ -335,7 +335,7 @@ ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
sb->mesg->reserved = 0; /* old nr_conns i.e. must be zero now */
sb->mesg->version = SYNC_PROTO_VER;
sb->mesg->syncid = ipvs->master_syncid;
- sb->mesg->size = sizeof(struct ip_vs_sync_mesg);
+ sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg));
sb->mesg->nr_conns = 0;
sb->mesg->spare = 0;
sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
@@ -418,7 +418,7 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
mesg->nr_conns = 0;
mesg->syncid = ipvs->master_syncid;
- mesg->size = sizeof(struct ip_vs_sync_mesg_v0);
+ mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0));
sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen;
sb->firstuse = jiffies;
@@ -531,9 +531,9 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
return;
- spin_lock(&ipvs->sync_buff_lock);
+ spin_lock_bh(&ipvs->sync_buff_lock);
if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
- spin_unlock(&ipvs->sync_buff_lock);
+ spin_unlock_bh(&ipvs->sync_buff_lock);
return;
}
@@ -552,7 +552,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
if (!buff) {
buff = ip_vs_sync_buff_create_v0(ipvs);
if (!buff) {
- spin_unlock(&ipvs->sync_buff_lock);
+ spin_unlock_bh(&ipvs->sync_buff_lock);
pr_err("ip_vs_sync_buff_create failed.\n");
return;
}
@@ -582,7 +582,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
}
m->nr_conns++;
- m->size += len;
+ m->size = htons(ntohs(m->size) + len);
buff->head += len;
/* check if there is a space for next one */
@@ -590,7 +590,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
sb_queue_tail(ipvs, ms);
ms->sync_buff = NULL;
}
- spin_unlock(&ipvs->sync_buff_lock);
+ spin_unlock_bh(&ipvs->sync_buff_lock);
/* synchronize its controller if it has */
cp = cp->control;
@@ -641,9 +641,9 @@ sloop:
pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
}
- spin_lock(&ipvs->sync_buff_lock);
+ spin_lock_bh(&ipvs->sync_buff_lock);
if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
- spin_unlock(&ipvs->sync_buff_lock);
+ spin_unlock_bh(&ipvs->sync_buff_lock);
return;
}
@@ -683,7 +683,7 @@ sloop:
if (!buff) {
buff = ip_vs_sync_buff_create(ipvs);
if (!buff) {
- spin_unlock(&ipvs->sync_buff_lock);
+ spin_unlock_bh(&ipvs->sync_buff_lock);
pr_err("ip_vs_sync_buff_create failed.\n");
return;
}
@@ -693,7 +693,7 @@ sloop:
p = buff->head;
buff->head += pad + len;
- m->size += pad + len;
+ m->size = htons(ntohs(m->size) + pad + len);
/* Add ev. padding from prev. sync_conn */
while (pad--)
*(p++) = 0;
@@ -750,7 +750,7 @@ sloop:
}
}
- spin_unlock(&ipvs->sync_buff_lock);
+ spin_unlock_bh(&ipvs->sync_buff_lock);
control:
/* synchronize its controller if it has */
@@ -843,7 +843,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
kfree(param->pe_data);
dest = cp->dest;
- spin_lock(&cp->lock);
+ spin_lock_bh(&cp->lock);
if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE &&
!(flags & IP_VS_CONN_F_TEMPLATE) && dest) {
if (flags & IP_VS_CONN_F_INACTIVE) {
@@ -857,24 +857,21 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
flags &= IP_VS_CONN_F_BACKUP_UPD_MASK;
flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK;
cp->flags = flags;
- spin_unlock(&cp->lock);
- if (!dest) {
- dest = ip_vs_try_bind_dest(cp);
- if (dest)
- atomic_dec(&dest->refcnt);
- }
+ spin_unlock_bh(&cp->lock);
+ if (!dest)
+ ip_vs_try_bind_dest(cp);
} else {
/*
* Find the appropriate destination for the connection.
* If it is not found the connection will remain unbound
* but still handled.
*/
+ rcu_read_lock();
dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
param->vport, protocol, fwmark, flags);
cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
- if (dest)
- atomic_dec(&dest->refcnt);
+ rcu_read_unlock();
if (!cp) {
if (param->pe_data)
kfree(param->pe_data);
@@ -1178,10 +1175,8 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,
IP_VS_DBG(2, "BACKUP, message header too short\n");
return;
}
- /* Convert size back to host byte order */
- m2->size = ntohs(m2->size);
- if (buflen != m2->size) {
+ if (buflen != ntohs(m2->size)) {
IP_VS_DBG(2, "BACKUP, bogus message size\n");
return;
}
@@ -1547,10 +1542,7 @@ ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
int msize;
int ret;
- msize = msg->size;
-
- /* Put size in network byte order */
- msg->size = htons(msg->size);
+ msize = ntohs(msg->size);
ret = ip_vs_send_async(sock, (char *)msg, msize);
if (ret >= 0 || ret == -EAGAIN)
@@ -1692,11 +1684,7 @@ static int sync_thread_backup(void *data)
break;
}
- /* disable bottom half, because it accesses the data
- shared by softirq while getting/creating conns */
- local_bh_disable();
ip_vs_process_message(tinfo->net, tinfo->buf, len);
- local_bh_enable();
}
}
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index bc1bfc48a17f..c60a81c4ce9a 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -51,7 +51,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
* new connections.
*/
- list_for_each_entry(dest, &svc->destinations, n_list) {
+ list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
atomic_read(&dest->weight) > 0) {
least = dest;
@@ -66,7 +66,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
* Find the destination with the least load.
*/
nextstage:
- list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+ list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
continue;
doh = ip_vs_dest_conn_overhead(dest);
@@ -106,6 +106,7 @@ static int __init ip_vs_wlc_init(void)
static void __exit ip_vs_wlc_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_wlc_scheduler);
+ synchronize_rcu();
}
module_init(ip_vs_wlc_init);
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index 231be7dd547a..0e68555bceb9 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -29,14 +29,45 @@
#include <net/ip_vs.h>
+/* The WRR algorithm depends on some caclulations:
+ * - mw: maximum weight
+ * - di: weight step, greatest common divisor from all weights
+ * - cw: current required weight
+ * As result, all weights are in the [di..mw] range with a step=di.
+ *
+ * First, we start with cw = mw and select dests with weight >= cw.
+ * Then cw is reduced with di and all dests are checked again.
+ * Last pass should be with cw = di. We have mw/di passes in total:
+ *
+ * pass 1: cw = max weight
+ * pass 2: cw = max weight - di
+ * pass 3: cw = max weight - 2 * di
+ * ...
+ * last pass: cw = di
+ *
+ * Weights are supposed to be >= di but we run in parallel with
+ * weight changes, it is possible some dest weight to be reduced
+ * below di, bad if it is the only available dest.
+ *
+ * So, we modify how mw is calculated, now it is reduced with (di - 1),
+ * so that last cw is 1 to catch such dests with weight below di:
+ * pass 1: cw = max weight - (di - 1)
+ * pass 2: cw = max weight - di - (di - 1)
+ * pass 3: cw = max weight - 2 * di - (di - 1)
+ * ...
+ * last pass: cw = 1
+ *
+ */
+
/*
* current destination pointer for weighted round-robin scheduling
*/
struct ip_vs_wrr_mark {
- struct list_head *cl; /* current list head */
+ struct ip_vs_dest *cl; /* current dest or head */
int cw; /* current weight */
int mw; /* maximum weight */
int di; /* decreasing interval */
+ struct rcu_head rcu_head;
};
@@ -88,36 +119,41 @@ static int ip_vs_wrr_init_svc(struct ip_vs_service *svc)
if (mark == NULL)
return -ENOMEM;
- mark->cl = &svc->destinations;
- mark->cw = 0;
- mark->mw = ip_vs_wrr_max_weight(svc);
+ mark->cl = list_entry(&svc->destinations, struct ip_vs_dest, n_list);
mark->di = ip_vs_wrr_gcd_weight(svc);
+ mark->mw = ip_vs_wrr_max_weight(svc) - (mark->di - 1);
+ mark->cw = mark->mw;
svc->sched_data = mark;
return 0;
}
-static int ip_vs_wrr_done_svc(struct ip_vs_service *svc)
+static void ip_vs_wrr_done_svc(struct ip_vs_service *svc)
{
+ struct ip_vs_wrr_mark *mark = svc->sched_data;
+
/*
* Release the mark variable
*/
- kfree(svc->sched_data);
-
- return 0;
+ kfree_rcu(mark, rcu_head);
}
-static int ip_vs_wrr_update_svc(struct ip_vs_service *svc)
+static int ip_vs_wrr_dest_changed(struct ip_vs_service *svc,
+ struct ip_vs_dest *dest)
{
struct ip_vs_wrr_mark *mark = svc->sched_data;
- mark->cl = &svc->destinations;
- mark->mw = ip_vs_wrr_max_weight(svc);
+ spin_lock_bh(&svc->sched_lock);
+ mark->cl = list_entry(&svc->destinations, struct ip_vs_dest, n_list);
mark->di = ip_vs_wrr_gcd_weight(svc);
- if (mark->cw > mark->mw)
- mark->cw = 0;
+ mark->mw = ip_vs_wrr_max_weight(svc) - (mark->di - 1);
+ if (mark->cw > mark->mw || !mark->cw)
+ mark->cw = mark->mw;
+ else if (mark->di > 1)
+ mark->cw = (mark->cw / mark->di) * mark->di + 1;
+ spin_unlock_bh(&svc->sched_lock);
return 0;
}
@@ -128,80 +164,79 @@ static int ip_vs_wrr_update_svc(struct ip_vs_service *svc)
static struct ip_vs_dest *
ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
- struct ip_vs_dest *dest;
+ struct ip_vs_dest *dest, *last, *stop = NULL;
struct ip_vs_wrr_mark *mark = svc->sched_data;
- struct list_head *p;
+ bool last_pass = false, restarted = false;
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
- /*
- * This loop will always terminate, because mark->cw in (0, max_weight]
- * and at least one server has its weight equal to max_weight.
- */
- write_lock(&svc->sched_lock);
- p = mark->cl;
+ spin_lock_bh(&svc->sched_lock);
+ dest = mark->cl;
+ /* No available dests? */
+ if (mark->mw == 0)
+ goto err_noavail;
+ last = dest;
+ /* Stop only after all dests were checked for weight >= 1 (last pass) */
while (1) {
- if (mark->cl == &svc->destinations) {
- /* it is at the head of the destination list */
-
- if (mark->cl == mark->cl->next) {
- /* no dest entry */
- ip_vs_scheduler_err(svc,
- "no destination available: "
- "no destinations present");
- dest = NULL;
- goto out;
- }
-
- mark->cl = svc->destinations.next;
- mark->cw -= mark->di;
- if (mark->cw <= 0) {
- mark->cw = mark->mw;
- /*
- * Still zero, which means no available servers.
- */
- if (mark->cw == 0) {
- mark->cl = &svc->destinations;
- ip_vs_scheduler_err(svc,
- "no destination available");
- dest = NULL;
- goto out;
- }
- }
- } else
- mark->cl = mark->cl->next;
-
- if (mark->cl != &svc->destinations) {
- /* not at the head of the list */
- dest = list_entry(mark->cl, struct ip_vs_dest, n_list);
+ list_for_each_entry_continue_rcu(dest,
+ &svc->destinations,
+ n_list) {
if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
- atomic_read(&dest->weight) >= mark->cw) {
- /* got it */
- break;
- }
+ atomic_read(&dest->weight) >= mark->cw)
+ goto found;
+ if (dest == stop)
+ goto err_over;
}
-
- if (mark->cl == p && mark->cw == mark->di) {
- /* back to the start, and no dest is found.
- It is only possible when all dests are OVERLOADED */
- dest = NULL;
- ip_vs_scheduler_err(svc,
- "no destination available: "
- "all destinations are overloaded");
- goto out;
+ mark->cw -= mark->di;
+ if (mark->cw <= 0) {
+ mark->cw = mark->mw;
+ /* Stop if we tried last pass from first dest:
+ * 1. last_pass: we started checks when cw > di but
+ * then all dests were checked for w >= 1
+ * 2. last was head: the first and only traversal
+ * was for weight >= 1, for all dests.
+ */
+ if (last_pass ||
+ &last->n_list == &svc->destinations)
+ goto err_over;
+ restarted = true;
+ }
+ last_pass = mark->cw <= mark->di;
+ if (last_pass && restarted &&
+ &last->n_list != &svc->destinations) {
+ /* First traversal was for w >= 1 but only
+ * for dests after 'last', now do the same
+ * for all dests up to 'last'.
+ */
+ stop = last;
}
}
+found:
IP_VS_DBG_BUF(6, "WRR: server %s:%u "
"activeconns %d refcnt %d weight %d\n",
IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
atomic_read(&dest->activeconns),
atomic_read(&dest->refcnt),
atomic_read(&dest->weight));
+ mark->cl = dest;
out:
- write_unlock(&svc->sched_lock);
+ spin_unlock_bh(&svc->sched_lock);
return dest;
+
+err_noavail:
+ mark->cl = dest;
+ dest = NULL;
+ ip_vs_scheduler_err(svc, "no destination available");
+ goto out;
+
+err_over:
+ mark->cl = dest;
+ dest = NULL;
+ ip_vs_scheduler_err(svc, "no destination available: "
+ "all destinations are overloaded");
+ goto out;
}
@@ -212,7 +247,9 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = {
.n_list = LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list),
.init_service = ip_vs_wrr_init_svc,
.done_service = ip_vs_wrr_done_svc,
- .update_service = ip_vs_wrr_update_svc,
+ .add_dest = ip_vs_wrr_dest_changed,
+ .del_dest = ip_vs_wrr_dest_changed,
+ .upd_dest = ip_vs_wrr_dest_changed,
.schedule = ip_vs_wrr_schedule,
};
@@ -224,6 +261,7 @@ static int __init ip_vs_wrr_init(void)
static void __exit ip_vs_wrr_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_wrr_scheduler);
+ synchronize_rcu();
}
module_init(ip_vs_wrr_init);
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index ee6b7a9f1ec2..b75ff6429a04 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -17,6 +17,8 @@
* - not all connections have destination server, for example,
* connections in backup server when fwmark is used
* - bypass connections use daddr from packet
+ * - we can use dst without ref while sending in RCU section, we use
+ * ref when returning NF_ACCEPT for NAT-ed packet via loopback
* LOCAL_OUT rules:
* - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING)
* - skb->pkt_type is not set yet
@@ -51,39 +53,54 @@ enum {
*/
IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */
IP_VS_RT_MODE_KNOWN_NH = 16,/* Route via remote addr */
+ IP_VS_RT_MODE_TUNNEL = 32,/* Tunnel mode */
};
+static inline struct ip_vs_dest_dst *ip_vs_dest_dst_alloc(void)
+{
+ return kmalloc(sizeof(struct ip_vs_dest_dst), GFP_ATOMIC);
+}
+
+static inline void ip_vs_dest_dst_free(struct ip_vs_dest_dst *dest_dst)
+{
+ kfree(dest_dst);
+}
+
/*
* Destination cache to speed up outgoing route lookup
*/
static inline void
-__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst,
- u32 dst_cookie)
+__ip_vs_dst_set(struct ip_vs_dest *dest, struct ip_vs_dest_dst *dest_dst,
+ struct dst_entry *dst, u32 dst_cookie)
{
- struct dst_entry *old_dst;
+ struct ip_vs_dest_dst *old;
+
+ old = rcu_dereference_protected(dest->dest_dst,
+ lockdep_is_held(&dest->dst_lock));
- old_dst = dest->dst_cache;
- dest->dst_cache = dst;
- dest->dst_rtos = rtos;
- dest->dst_cookie = dst_cookie;
- dst_release(old_dst);
+ if (dest_dst) {
+ dest_dst->dst_cache = dst;
+ dest_dst->dst_cookie = dst_cookie;
+ }
+ rcu_assign_pointer(dest->dest_dst, dest_dst);
+
+ if (old)
+ call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free);
}
-static inline struct dst_entry *
-__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
+static inline struct ip_vs_dest_dst *
+__ip_vs_dst_check(struct ip_vs_dest *dest)
{
- struct dst_entry *dst = dest->dst_cache;
+ struct ip_vs_dest_dst *dest_dst = rcu_dereference(dest->dest_dst);
+ struct dst_entry *dst;
- if (!dst)
+ if (!dest_dst)
return NULL;
- if ((dst->obsolete || rtos != dest->dst_rtos) &&
- dst->ops->check(dst, dest->dst_cookie) == NULL) {
- dest->dst_cache = NULL;
- dst_release(dst);
+ dst = dest_dst->dst_cache;
+ if (dst->obsolete &&
+ dst->ops->check(dst, dest_dst->dst_cookie) == NULL)
return NULL;
- }
- dst_hold(dst);
- return dst;
+ return dest_dst;
}
static inline bool
@@ -104,7 +121,7 @@ __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu)
/* Get route to daddr, update *saddr, optionally bind route to saddr */
static struct rtable *do_output_route4(struct net *net, __be32 daddr,
- u32 rtos, int rt_mode, __be32 *saddr)
+ int rt_mode, __be32 *saddr)
{
struct flowi4 fl4;
struct rtable *rt;
@@ -113,7 +130,6 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr,
memset(&fl4, 0, sizeof(fl4));
fl4.daddr = daddr;
fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
- fl4.flowi4_tos = rtos;
fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ?
FLOWI_FLAG_KNOWN_NH : 0;
@@ -124,7 +140,7 @@ retry:
if (PTR_ERR(rt) == -EINVAL && *saddr &&
rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {
*saddr = 0;
- flowi4_update_output(&fl4, 0, rtos, daddr, 0);
+ flowi4_update_output(&fl4, 0, 0, daddr, 0);
goto retry;
}
IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr);
@@ -132,7 +148,7 @@ retry:
} else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
ip_rt_put(rt);
*saddr = fl4.saddr;
- flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr);
+ flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr);
loop++;
goto retry;
}
@@ -141,113 +157,140 @@ retry:
}
/* Get route to destination or remote server */
-static struct rtable *
+static int
__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
- __be32 daddr, u32 rtos, int rt_mode, __be32 *ret_saddr)
+ __be32 daddr, int rt_mode, __be32 *ret_saddr)
{
struct net *net = dev_net(skb_dst(skb)->dev);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ip_vs_dest_dst *dest_dst;
struct rtable *rt; /* Route to the other host */
struct rtable *ort; /* Original route */
- int local;
+ struct iphdr *iph;
+ __be16 df;
+ int mtu;
+ int local, noref = 1;
if (dest) {
- spin_lock(&dest->dst_lock);
- if (!(rt = (struct rtable *)
- __ip_vs_dst_check(dest, rtos))) {
- rt = do_output_route4(net, dest->addr.ip, rtos,
- rt_mode, &dest->dst_saddr.ip);
+ dest_dst = __ip_vs_dst_check(dest);
+ if (likely(dest_dst))
+ rt = (struct rtable *) dest_dst->dst_cache;
+ else {
+ dest_dst = ip_vs_dest_dst_alloc();
+ spin_lock_bh(&dest->dst_lock);
+ if (!dest_dst) {
+ __ip_vs_dst_set(dest, NULL, NULL, 0);
+ spin_unlock_bh(&dest->dst_lock);
+ goto err_unreach;
+ }
+ rt = do_output_route4(net, dest->addr.ip, rt_mode,
+ &dest_dst->dst_saddr.ip);
if (!rt) {
- spin_unlock(&dest->dst_lock);
- return NULL;
+ __ip_vs_dst_set(dest, NULL, NULL, 0);
+ spin_unlock_bh(&dest->dst_lock);
+ ip_vs_dest_dst_free(dest_dst);
+ goto err_unreach;
}
- __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
- IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, "
- "rtos=%X\n",
- &dest->addr.ip, &dest->dst_saddr.ip,
- atomic_read(&rt->dst.__refcnt), rtos);
+ __ip_vs_dst_set(dest, dest_dst, &rt->dst, 0);
+ spin_unlock_bh(&dest->dst_lock);
+ IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n",
+ &dest->addr.ip, &dest_dst->dst_saddr.ip,
+ atomic_read(&rt->dst.__refcnt));
}
daddr = dest->addr.ip;
if (ret_saddr)
- *ret_saddr = dest->dst_saddr.ip;
- spin_unlock(&dest->dst_lock);
+ *ret_saddr = dest_dst->dst_saddr.ip;
} else {
__be32 saddr = htonl(INADDR_ANY);
+ noref = 0;
+
/* For such unconfigured boxes avoid many route lookups
* for performance reasons because we do not remember saddr
*/
rt_mode &= ~IP_VS_RT_MODE_CONNECT;
- rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr);
+ rt = do_output_route4(net, daddr, rt_mode, &saddr);
if (!rt)
- return NULL;
+ goto err_unreach;
if (ret_saddr)
*ret_saddr = saddr;
}
- local = rt->rt_flags & RTCF_LOCAL;
+ local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0;
if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
rt_mode)) {
IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
(rt->rt_flags & RTCF_LOCAL) ?
"local":"non-local", &daddr);
- ip_rt_put(rt);
- return NULL;
- }
- if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
- !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) {
- IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
- "requires NAT method, dest: %pI4\n",
- &ip_hdr(skb)->daddr, &daddr);
- ip_rt_put(rt);
- return NULL;
+ goto err_put;
}
- if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) {
- IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 "
- "to non-local address, dest: %pI4\n",
- &ip_hdr(skb)->saddr, &daddr);
- ip_rt_put(rt);
- return NULL;
+ iph = ip_hdr(skb);
+ if (likely(!local)) {
+ if (unlikely(ipv4_is_loopback(iph->saddr))) {
+ IP_VS_DBG_RL("Stopping traffic from loopback address "
+ "%pI4 to non-local address, dest: %pI4\n",
+ &iph->saddr, &daddr);
+ goto err_put;
+ }
+ } else {
+ ort = skb_rtable(skb);
+ if (!(rt_mode & IP_VS_RT_MODE_RDR) &&
+ !(ort->rt_flags & RTCF_LOCAL)) {
+ IP_VS_DBG_RL("Redirect from non-local address %pI4 to "
+ "local requires NAT method, dest: %pI4\n",
+ &iph->daddr, &daddr);
+ goto err_put;
+ }
+ /* skb to local stack, preserve old route */
+ if (!noref)
+ ip_rt_put(rt);
+ return local;
}
- return rt;
-}
-
-/* Reroute packet to local IPv4 stack after DNAT */
-static int
-__ip_vs_reroute_locally(struct sk_buff *skb)
-{
- struct rtable *rt = skb_rtable(skb);
- struct net_device *dev = rt->dst.dev;
- struct net *net = dev_net(dev);
- struct iphdr *iph = ip_hdr(skb);
-
- if (rt_is_input_route(rt)) {
- unsigned long orefdst = skb->_skb_refdst;
-
- if (ip_route_input(skb, iph->daddr, iph->saddr,
- iph->tos, skb->dev))
- return 0;
- refdst_drop(orefdst);
+ if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) {
+ mtu = dst_mtu(&rt->dst);
+ df = iph->frag_off & htons(IP_DF);
} else {
- struct flowi4 fl4 = {
- .daddr = iph->daddr,
- .saddr = iph->saddr,
- .flowi4_tos = RT_TOS(iph->tos),
- .flowi4_mark = skb->mark,
- };
-
- rt = ip_route_output_key(net, &fl4);
- if (IS_ERR(rt))
- return 0;
- if (!(rt->rt_flags & RTCF_LOCAL)) {
- ip_rt_put(rt);
- return 0;
+ struct sock *sk = skb->sk;
+
+ mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
+ if (mtu < 68) {
+ IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
+ goto err_put;
}
- /* Drop old route. */
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
+ ort = skb_rtable(skb);
+ if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
+ ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
+ /* MTU check allowed? */
+ df = sysctl_pmtu_disc(ipvs) ? iph->frag_off & htons(IP_DF) : 0;
}
- return 1;
+
+ /* MTU checking */
+ if (unlikely(df && skb->len > mtu && !skb_is_gso(skb))) {
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+ IP_VS_DBG(1, "frag needed for %pI4\n", &iph->saddr);
+ goto err_put;
+ }
+
+ skb_dst_drop(skb);
+ if (noref) {
+ if (!local)
+ skb_dst_set_noref_force(skb, &rt->dst);
+ else
+ skb_dst_set(skb, dst_clone(&rt->dst));
+ } else
+ skb_dst_set(skb, &rt->dst);
+
+ return local;
+
+err_put:
+ if (!noref)
+ ip_rt_put(rt);
+ return -1;
+
+err_unreach:
+ dst_link_failure(skb);
+ return -1;
}
#ifdef CONFIG_IP_VS_IPV6
@@ -294,44 +337,57 @@ out_err:
/*
* Get route to destination or remote server
*/
-static struct rt6_info *
+static int
__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
struct in6_addr *daddr, struct in6_addr *ret_saddr,
- int do_xfrm, int rt_mode)
+ struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode)
{
struct net *net = dev_net(skb_dst(skb)->dev);
+ struct ip_vs_dest_dst *dest_dst;
struct rt6_info *rt; /* Route to the other host */
struct rt6_info *ort; /* Original route */
struct dst_entry *dst;
- int local;
+ int mtu;
+ int local, noref = 1;
if (dest) {
- spin_lock(&dest->dst_lock);
- rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0);
- if (!rt) {
+ dest_dst = __ip_vs_dst_check(dest);
+ if (likely(dest_dst))
+ rt = (struct rt6_info *) dest_dst->dst_cache;
+ else {
u32 cookie;
+ dest_dst = ip_vs_dest_dst_alloc();
+ spin_lock_bh(&dest->dst_lock);
+ if (!dest_dst) {
+ __ip_vs_dst_set(dest, NULL, NULL, 0);
+ spin_unlock_bh(&dest->dst_lock);
+ goto err_unreach;
+ }
dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
- &dest->dst_saddr.in6,
+ &dest_dst->dst_saddr.in6,
do_xfrm);
if (!dst) {
- spin_unlock(&dest->dst_lock);
- return NULL;
+ __ip_vs_dst_set(dest, NULL, NULL, 0);
+ spin_unlock_bh(&dest->dst_lock);
+ ip_vs_dest_dst_free(dest_dst);
+ goto err_unreach;
}
rt = (struct rt6_info *) dst;
cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
- __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie);
+ __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie);
+ spin_unlock_bh(&dest->dst_lock);
IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
- &dest->addr.in6, &dest->dst_saddr.in6,
+ &dest->addr.in6, &dest_dst->dst_saddr.in6,
atomic_read(&rt->dst.__refcnt));
}
if (ret_saddr)
- *ret_saddr = dest->dst_saddr.in6;
- spin_unlock(&dest->dst_lock);
+ *ret_saddr = dest_dst->dst_saddr.in6;
} else {
+ noref = 0;
dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
if (!dst)
- return NULL;
+ goto err_unreach;
rt = (struct rt6_info *) dst;
}
@@ -340,86 +396,137 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
rt_mode)) {
IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n",
local ? "local":"non-local", daddr);
- dst_release(&rt->dst);
- return NULL;
+ goto err_put;
}
- if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
- !((ort = (struct rt6_info *) skb_dst(skb)) &&
- __ip_vs_is_local_route6(ort))) {
- IP_VS_DBG_RL("Redirect from non-local address %pI6c to local "
- "requires NAT method, dest: %pI6c\n",
- &ipv6_hdr(skb)->daddr, daddr);
- dst_release(&rt->dst);
- return NULL;
+ if (likely(!local)) {
+ if (unlikely((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
+ ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
+ IPV6_ADDR_LOOPBACK)) {
+ IP_VS_DBG_RL("Stopping traffic from loopback address "
+ "%pI6c to non-local address, "
+ "dest: %pI6c\n",
+ &ipv6_hdr(skb)->saddr, daddr);
+ goto err_put;
+ }
+ } else {
+ ort = (struct rt6_info *) skb_dst(skb);
+ if (!(rt_mode & IP_VS_RT_MODE_RDR) &&
+ !__ip_vs_is_local_route6(ort)) {
+ IP_VS_DBG_RL("Redirect from non-local address %pI6c "
+ "to local requires NAT method, "
+ "dest: %pI6c\n",
+ &ipv6_hdr(skb)->daddr, daddr);
+ goto err_put;
+ }
+ /* skb to local stack, preserve old route */
+ if (!noref)
+ dst_release(&rt->dst);
+ return local;
}
- if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
- ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
- IPV6_ADDR_LOOPBACK)) {
- IP_VS_DBG_RL("Stopping traffic from loopback address %pI6c "
- "to non-local address, dest: %pI6c\n",
- &ipv6_hdr(skb)->saddr, daddr);
- dst_release(&rt->dst);
- return NULL;
+
+ /* MTU checking */
+ if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL)))
+ mtu = dst_mtu(&rt->dst);
+ else {
+ struct sock *sk = skb->sk;
+
+ mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
+ if (mtu < IPV6_MIN_MTU) {
+ IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
+ IPV6_MIN_MTU);
+ goto err_put;
+ }
+ ort = (struct rt6_info *) skb_dst(skb);
+ if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
+ ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
}
- return rt;
+ if (unlikely(__mtu_check_toobig_v6(skb, mtu))) {
+ if (!skb->dev)
+ skb->dev = net->loopback_dev;
+ /* only send ICMP too big on first fragment */
+ if (!ipvsh->fragoffs)
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ IP_VS_DBG(1, "frag needed for %pI6c\n", &ipv6_hdr(skb)->saddr);
+ goto err_put;
+ }
+
+ skb_dst_drop(skb);
+ if (noref) {
+ if (!local)
+ skb_dst_set_noref_force(skb, &rt->dst);
+ else
+ skb_dst_set(skb, dst_clone(&rt->dst));
+ } else
+ skb_dst_set(skb, &rt->dst);
+
+ return local;
+
+err_put:
+ if (!noref)
+ dst_release(&rt->dst);
+ return -1;
+
+err_unreach:
+ dst_link_failure(skb);
+ return -1;
}
#endif
-/*
- * Release dest->dst_cache before a dest is removed
- */
-void
-ip_vs_dst_reset(struct ip_vs_dest *dest)
+/* return NF_ACCEPT to allow forwarding or other NF_xxx on error */
+static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
+ struct ip_vs_conn *cp)
{
- struct dst_entry *old_dst;
+ int ret = NF_ACCEPT;
+
+ skb->ipvs_property = 1;
+ if (unlikely(cp->flags & IP_VS_CONN_F_NFCT))
+ ret = ip_vs_confirm_conntrack(skb);
+ if (ret == NF_ACCEPT) {
+ nf_reset(skb);
+ skb_forward_csum(skb);
+ }
+ return ret;
+}
+
+/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */
+static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
+ struct ip_vs_conn *cp, int local)
+{
+ int ret = NF_STOLEN;
- old_dst = dest->dst_cache;
- dest->dst_cache = NULL;
- dst_release(old_dst);
- dest->dst_saddr.ip = 0;
+ skb->ipvs_property = 1;
+ if (likely(!(cp->flags & IP_VS_CONN_F_NFCT)))
+ ip_vs_notrack(skb);
+ else
+ ip_vs_update_conntrack(skb, cp, 1);
+ if (!local) {
+ skb_forward_csum(skb);
+ NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
+ dst_output);
+ } else
+ ret = NF_ACCEPT;
+ return ret;
}
-#define IP_VS_XMIT_TUNNEL(skb, cp) \
-({ \
- int __ret = NF_ACCEPT; \
- \
- (skb)->ipvs_property = 1; \
- if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \
- __ret = ip_vs_confirm_conntrack(skb); \
- if (__ret == NF_ACCEPT) { \
- nf_reset(skb); \
- skb_forward_csum(skb); \
- } \
- __ret; \
-})
-
-#define IP_VS_XMIT_NAT(pf, skb, cp, local) \
-do { \
- (skb)->ipvs_property = 1; \
- if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
- ip_vs_notrack(skb); \
- else \
- ip_vs_update_conntrack(skb, cp, 1); \
- if (local) \
- return NF_ACCEPT; \
- skb_forward_csum(skb); \
- NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
- skb_dst(skb)->dev, dst_output); \
-} while (0)
-
-#define IP_VS_XMIT(pf, skb, cp, local) \
-do { \
- (skb)->ipvs_property = 1; \
- if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
- ip_vs_notrack(skb); \
- if (local) \
- return NF_ACCEPT; \
- skb_forward_csum(skb); \
- NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
- skb_dst(skb)->dev, dst_output); \
-} while (0)
+/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */
+static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
+ struct ip_vs_conn *cp, int local)
+{
+ int ret = NF_STOLEN;
+
+ skb->ipvs_property = 1;
+ if (likely(!(cp->flags & IP_VS_CONN_F_NFCT)))
+ ip_vs_notrack(skb);
+ if (!local) {
+ skb_forward_csum(skb);
+ NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
+ dst_output);
+ } else
+ ret = NF_ACCEPT;
+ return ret;
+}
/*
@@ -430,7 +537,7 @@ ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
/* we do not touch skb and do not need pskb ptr */
- IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
+ return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
}
@@ -443,52 +550,29 @@ int
ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
- struct rtable *rt; /* Route to the other host */
struct iphdr *iph = ip_hdr(skb);
- int mtu;
EnterFunction(10);
- if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, RT_TOS(iph->tos),
- IP_VS_RT_MODE_NON_LOCAL, NULL)))
- goto tx_error_icmp;
-
- /* MTU checking */
- mtu = dst_mtu(&rt->dst);
- if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
- !skb_is_gso(skb)) {
- ip_rt_put(rt);
- icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
- IP_VS_DBG_RL("%s(): frag needed\n", __func__);
+ rcu_read_lock();
+ if (__ip_vs_get_out_rt(skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL,
+ NULL) < 0)
goto tx_error;
- }
- /*
- * Call ip_send_check because we are not sure it is called
- * after ip_defrag. Is copy-on-write needed?
- */
- if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
- ip_rt_put(rt);
- return NF_STOLEN;
- }
- ip_send_check(ip_hdr(skb));
-
- /* drop old route */
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
+ ip_send_check(iph);
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
- IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
+ ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
+ rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
- tx_error_icmp:
- dst_link_failure(skb);
tx_error:
kfree_skb(skb);
+ rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -496,60 +580,27 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
#ifdef CONFIG_IP_VS_IPV6
int
ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
- struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
+ struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
- struct rt6_info *rt; /* Route to the other host */
- int mtu;
-
EnterFunction(10);
- rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr.in6, NULL, 0,
- IP_VS_RT_MODE_NON_LOCAL);
- if (!rt)
- goto tx_error_icmp;
-
- /* MTU checking */
- mtu = dst_mtu(&rt->dst);
- if (__mtu_check_toobig_v6(skb, mtu)) {
- if (!skb->dev) {
- struct net *net = dev_net(skb_dst(skb)->dev);
-
- skb->dev = net->loopback_dev;
- }
- /* only send ICMP too big on first fragment */
- if (!iph->fragoffs)
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- dst_release(&rt->dst);
- IP_VS_DBG_RL("%s(): frag needed\n", __func__);
+ rcu_read_lock();
+ if (__ip_vs_get_out_rt_v6(skb, NULL, &ipvsh->daddr.in6, NULL,
+ ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0)
goto tx_error;
- }
-
- /*
- * Call ip_send_check because we are not sure it is called
- * after ip_defrag. Is copy-on-write needed?
- */
- skb = skb_share_check(skb, GFP_ATOMIC);
- if (unlikely(skb == NULL)) {
- dst_release(&rt->dst);
- return NF_STOLEN;
- }
-
- /* drop old route */
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
- IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
+ ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
+ rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
- tx_error_icmp:
- dst_link_failure(skb);
tx_error:
kfree_skb(skb);
+ rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -564,29 +615,30 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
struct rtable *rt; /* Route to the other host */
- int mtu;
- struct iphdr *iph = ip_hdr(skb);
- int local;
+ int local, rc, was_input;
EnterFunction(10);
+ rcu_read_lock();
/* check if it is a connection of no-client-port */
if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
__be16 _pt, *p;
- p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
+
+ p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt);
if (p == NULL)
goto tx_error;
ip_vs_conn_fill_cport(cp, *p);
IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
}
- if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
- RT_TOS(iph->tos),
- IP_VS_RT_MODE_LOCAL |
- IP_VS_RT_MODE_NON_LOCAL |
- IP_VS_RT_MODE_RDR, NULL)))
- goto tx_error_icmp;
- local = rt->rt_flags & RTCF_LOCAL;
+ was_input = rt_is_input_route(skb_rtable(skb));
+ local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+ IP_VS_RT_MODE_LOCAL |
+ IP_VS_RT_MODE_NON_LOCAL |
+ IP_VS_RT_MODE_RDR, NULL);
+ if (local < 0)
+ goto tx_error;
+ rt = skb_rtable(skb);
/*
* Avoid duplicate tuple in reply direction for NAT traffic
* to local address when connection is sync-ed
@@ -600,57 +652,31 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
"ip_vs_nat_xmit(): "
"stopping DNAT to local address");
- goto tx_error_put;
+ goto tx_error;
}
}
#endif
/* From world but DNAT to loopback address? */
- if (local && ipv4_is_loopback(cp->daddr.ip) &&
- rt_is_input_route(skb_rtable(skb))) {
+ if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) {
IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
"stopping DNAT to loopback address");
- goto tx_error_put;
- }
-
- /* MTU checking */
- mtu = dst_mtu(&rt->dst);
- if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
- !skb_is_gso(skb)) {
- icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
- IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
- "ip_vs_nat_xmit(): frag needed for");
- goto tx_error_put;
+ goto tx_error;
}
/* copy-on-write the packet before mangling it */
if (!skb_make_writable(skb, sizeof(struct iphdr)))
- goto tx_error_put;
+ goto tx_error;
if (skb_cow(skb, rt->dst.dev->hard_header_len))
- goto tx_error_put;
+ goto tx_error;
/* mangle the packet */
if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh))
- goto tx_error_put;
+ goto tx_error;
ip_hdr(skb)->daddr = cp->daddr.ip;
ip_send_check(ip_hdr(skb));
- if (!local) {
- /* drop old route */
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
- } else {
- ip_rt_put(rt);
- /*
- * Some IPv4 replies get local address from routes,
- * not from iph, so while we DNAT after routing
- * we need this second input/output route.
- */
- if (!__ip_vs_reroute_locally(skb))
- goto tx_error;
- }
-
IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");
/* FIXME: when application helper enlarges the packet and the length
@@ -660,49 +686,48 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
- IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
+ rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
+ rcu_read_unlock();
LeaveFunction(10);
- return NF_STOLEN;
+ return rc;
- tx_error_icmp:
- dst_link_failure(skb);
tx_error:
kfree_skb(skb);
+ rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
- tx_error_put:
- ip_rt_put(rt);
- goto tx_error;
}
#ifdef CONFIG_IP_VS_IPV6
int
ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
- struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
+ struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
struct rt6_info *rt; /* Route to the other host */
- int mtu;
- int local;
+ int local, rc;
EnterFunction(10);
+ rcu_read_lock();
/* check if it is a connection of no-client-port */
- if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !iph->fragoffs)) {
+ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) {
__be16 _pt, *p;
- p = skb_header_pointer(skb, iph->len, sizeof(_pt), &_pt);
+ p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt);
if (p == NULL)
goto tx_error;
ip_vs_conn_fill_cport(cp, *p);
IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
}
- if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
- 0, (IP_VS_RT_MODE_LOCAL |
- IP_VS_RT_MODE_NON_LOCAL |
- IP_VS_RT_MODE_RDR))))
- goto tx_error_icmp;
- local = __ip_vs_is_local_route6(rt);
+ local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
+ ipvsh, 0,
+ IP_VS_RT_MODE_LOCAL |
+ IP_VS_RT_MODE_NON_LOCAL |
+ IP_VS_RT_MODE_RDR);
+ if (local < 0)
+ goto tx_error;
+ rt = (struct rt6_info *) skb_dst(skb);
/*
* Avoid duplicate tuple in reply direction for NAT traffic
* to local address when connection is sync-ed
@@ -716,7 +741,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
"ip_vs_nat_xmit_v6(): "
"stopping DNAT to local address");
- goto tx_error_put;
+ goto tx_error;
}
}
#endif
@@ -727,46 +752,21 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
"ip_vs_nat_xmit_v6(): "
"stopping DNAT to loopback address");
- goto tx_error_put;
- }
-
- /* MTU checking */
- mtu = dst_mtu(&rt->dst);
- if (__mtu_check_toobig_v6(skb, mtu)) {
- if (!skb->dev) {
- struct net *net = dev_net(skb_dst(skb)->dev);
-
- skb->dev = net->loopback_dev;
- }
- /* only send ICMP too big on first fragment */
- if (!iph->fragoffs)
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
- "ip_vs_nat_xmit_v6(): frag needed for");
- goto tx_error_put;
+ goto tx_error;
}
/* copy-on-write the packet before mangling it */
if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
- goto tx_error_put;
+ goto tx_error;
if (skb_cow(skb, rt->dst.dev->hard_header_len))
- goto tx_error_put;
+ goto tx_error;
/* mangle the packet */
- if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, iph))
+ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh))
goto tx_error;
ipv6_hdr(skb)->daddr = cp->daddr.in6;
- if (!local || !skb->dev) {
- /* drop the old route when skb is not shared */
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
- } else {
- /* destined to loopback, do we need to change route? */
- dst_release(&rt->dst);
- }
-
IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");
/* FIXME: when application helper enlarges the packet and the length
@@ -776,20 +776,17 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
- IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
+ rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
+ rcu_read_unlock();
LeaveFunction(10);
- return NF_STOLEN;
+ return rc;
-tx_error_icmp:
- dst_link_failure(skb);
tx_error:
LeaveFunction(10);
kfree_skb(skb);
+ rcu_read_unlock();
return NF_STOLEN;
-tx_error_put:
- dst_release(&rt->dst);
- goto tx_error;
}
#endif
@@ -826,56 +823,40 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
__be16 df;
struct iphdr *iph; /* Our new IP header */
unsigned int max_headroom; /* The extra header space needed */
- int mtu;
- int ret;
+ int ret, local;
EnterFunction(10);
- if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
- RT_TOS(tos), IP_VS_RT_MODE_LOCAL |
- IP_VS_RT_MODE_NON_LOCAL |
- IP_VS_RT_MODE_CONNECT,
- &saddr)))
- goto tx_error_icmp;
- if (rt->rt_flags & RTCF_LOCAL) {
- ip_rt_put(rt);
- IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
+ rcu_read_lock();
+ local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+ IP_VS_RT_MODE_LOCAL |
+ IP_VS_RT_MODE_NON_LOCAL |
+ IP_VS_RT_MODE_CONNECT |
+ IP_VS_RT_MODE_TUNNEL, &saddr);
+ if (local < 0)
+ goto tx_error;
+ if (local) {
+ rcu_read_unlock();
+ return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
}
+ rt = skb_rtable(skb);
tdev = rt->dst.dev;
- mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
- if (mtu < 68) {
- IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
- goto tx_error_put;
- }
- if (rt_is_output_route(skb_rtable(skb)))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
-
/* Copy DF, reset fragment offset and MF */
df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;
- if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) {
- icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
- IP_VS_DBG_RL("%s(): frag needed\n", __func__);
- goto tx_error_put;
- }
-
/*
* Okay, now see if we can stuff it in the buffer as-is.
*/
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
- if (skb_headroom(skb) < max_headroom
- || skb_cloned(skb) || skb_shared(skb)) {
+ if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
struct sk_buff *new_skb =
skb_realloc_headroom(skb, max_headroom);
- if (!new_skb) {
- ip_rt_put(rt);
- kfree_skb(skb);
- IP_VS_ERR_RL("%s(): no memory\n", __func__);
- return NF_STOLEN;
- }
+
+ if (!new_skb)
+ goto tx_error;
consume_skb(skb);
skb = new_skb;
old_iph = ip_hdr(skb);
@@ -890,10 +871,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
skb_reset_network_header(skb);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- /* drop old route */
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
-
/*
* Push down and install the IPIP header.
*/
@@ -911,25 +888,22 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
- ret = IP_VS_XMIT_TUNNEL(skb, cp);
+ ret = ip_vs_tunnel_xmit_prepare(skb, cp);
if (ret == NF_ACCEPT)
ip_local_out(skb);
else if (ret == NF_DROP)
kfree_skb(skb);
+ rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
- tx_error_icmp:
- dst_link_failure(skb);
tx_error:
kfree_skb(skb);
+ rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
-tx_error_put:
- ip_rt_put(rt);
- goto tx_error;
}
#ifdef CONFIG_IP_VS_IPV6
@@ -943,60 +917,37 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ipv6hdr *old_iph = ipv6_hdr(skb);
struct ipv6hdr *iph; /* Our new IP header */
unsigned int max_headroom; /* The extra header space needed */
- int mtu;
- int ret;
+ int ret, local;
EnterFunction(10);
- if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
- &saddr, 1, (IP_VS_RT_MODE_LOCAL |
- IP_VS_RT_MODE_NON_LOCAL))))
- goto tx_error_icmp;
- if (__ip_vs_is_local_route6(rt)) {
- dst_release(&rt->dst);
- IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
+ rcu_read_lock();
+ local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
+ &saddr, ipvsh, 1,
+ IP_VS_RT_MODE_LOCAL |
+ IP_VS_RT_MODE_NON_LOCAL |
+ IP_VS_RT_MODE_TUNNEL);
+ if (local < 0)
+ goto tx_error;
+ if (local) {
+ rcu_read_unlock();
+ return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
}
+ rt = (struct rt6_info *) skb_dst(skb);
tdev = rt->dst.dev;
- mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
- if (mtu < IPV6_MIN_MTU) {
- IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
- IPV6_MIN_MTU);
- goto tx_error_put;
- }
- if (skb_dst(skb))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
-
- /* MTU checking: Notice that 'mtu' have been adjusted before hand */
- if (__mtu_check_toobig_v6(skb, mtu)) {
- if (!skb->dev) {
- struct net *net = dev_net(skb_dst(skb)->dev);
-
- skb->dev = net->loopback_dev;
- }
- /* only send ICMP too big on first fragment */
- if (!ipvsh->fragoffs)
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- IP_VS_DBG_RL("%s(): frag needed\n", __func__);
- goto tx_error_put;
- }
-
/*
* Okay, now see if we can stuff it in the buffer as-is.
*/
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
- if (skb_headroom(skb) < max_headroom
- || skb_cloned(skb) || skb_shared(skb)) {
+ if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
struct sk_buff *new_skb =
skb_realloc_headroom(skb, max_headroom);
- if (!new_skb) {
- dst_release(&rt->dst);
- kfree_skb(skb);
- IP_VS_ERR_RL("%s(): no memory\n", __func__);
- return NF_STOLEN;
- }
+
+ if (!new_skb)
+ goto tx_error;
consume_skb(skb);
skb = new_skb;
old_iph = ipv6_hdr(skb);
@@ -1008,10 +959,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb_reset_network_header(skb);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- /* drop old route */
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
-
/*
* Push down and install the IPIP header.
*/
@@ -1029,25 +976,22 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
- ret = IP_VS_XMIT_TUNNEL(skb, cp);
+ ret = ip_vs_tunnel_xmit_prepare(skb, cp);
if (ret == NF_ACCEPT)
ip6_local_out(skb);
else if (ret == NF_DROP)
kfree_skb(skb);
+ rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
-tx_error_icmp:
- dst_link_failure(skb);
tx_error:
kfree_skb(skb);
+ rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
-tx_error_put:
- dst_release(&rt->dst);
- goto tx_error;
}
#endif
@@ -1060,59 +1004,36 @@ int
ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
- struct rtable *rt; /* Route to the other host */
- struct iphdr *iph = ip_hdr(skb);
- int mtu;
+ int local;
EnterFunction(10);
- if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
- RT_TOS(iph->tos),
- IP_VS_RT_MODE_LOCAL |
- IP_VS_RT_MODE_NON_LOCAL |
- IP_VS_RT_MODE_KNOWN_NH, NULL)))
- goto tx_error_icmp;
- if (rt->rt_flags & RTCF_LOCAL) {
- ip_rt_put(rt);
- IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
- }
-
- /* MTU checking */
- mtu = dst_mtu(&rt->dst);
- if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
- !skb_is_gso(skb)) {
- icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
- ip_rt_put(rt);
- IP_VS_DBG_RL("%s(): frag needed\n", __func__);
+ rcu_read_lock();
+ local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+ IP_VS_RT_MODE_LOCAL |
+ IP_VS_RT_MODE_NON_LOCAL |
+ IP_VS_RT_MODE_KNOWN_NH, NULL);
+ if (local < 0)
goto tx_error;
+ if (local) {
+ rcu_read_unlock();
+ return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
}
- /*
- * Call ip_send_check because we are not sure it is called
- * after ip_defrag. Is copy-on-write needed?
- */
- if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
- ip_rt_put(rt);
- return NF_STOLEN;
- }
ip_send_check(ip_hdr(skb));
- /* drop old route */
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
-
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
- IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
+ ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
+ rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
- tx_error_icmp:
- dst_link_failure(skb);
tx_error:
kfree_skb(skb);
+ rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -1120,64 +1041,36 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
#ifdef CONFIG_IP_VS_IPV6
int
ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
- struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
+ struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
- struct rt6_info *rt; /* Route to the other host */
- int mtu;
+ int local;
EnterFunction(10);
- if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
- 0, (IP_VS_RT_MODE_LOCAL |
- IP_VS_RT_MODE_NON_LOCAL))))
- goto tx_error_icmp;
- if (__ip_vs_is_local_route6(rt)) {
- dst_release(&rt->dst);
- IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
- }
-
- /* MTU checking */
- mtu = dst_mtu(&rt->dst);
- if (__mtu_check_toobig_v6(skb, mtu)) {
- if (!skb->dev) {
- struct net *net = dev_net(skb_dst(skb)->dev);
-
- skb->dev = net->loopback_dev;
- }
- /* only send ICMP too big on first fragment */
- if (!iph->fragoffs)
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- dst_release(&rt->dst);
- IP_VS_DBG_RL("%s(): frag needed\n", __func__);
+ rcu_read_lock();
+ local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
+ ipvsh, 0,
+ IP_VS_RT_MODE_LOCAL |
+ IP_VS_RT_MODE_NON_LOCAL);
+ if (local < 0)
goto tx_error;
+ if (local) {
+ rcu_read_unlock();
+ return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
}
- /*
- * Call ip_send_check because we are not sure it is called
- * after ip_defrag. Is copy-on-write needed?
- */
- skb = skb_share_check(skb, GFP_ATOMIC);
- if (unlikely(skb == NULL)) {
- dst_release(&rt->dst);
- return NF_STOLEN;
- }
-
- /* drop old route */
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
-
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
- IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
+ ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
+ rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
-tx_error_icmp:
- dst_link_failure(skb);
tx_error:
kfree_skb(skb);
+ rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -1194,10 +1087,9 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_iphdr *iph)
{
struct rtable *rt; /* Route to the other host */
- int mtu;
int rc;
int local;
- int rt_mode;
+ int rt_mode, was_input;
EnterFunction(10);
@@ -1217,16 +1109,17 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/*
* mangle and send the packet here (only for VS/NAT)
*/
+ was_input = rt_is_input_route(skb_rtable(skb));
/* LOCALNODE from FORWARD hook is not supported */
rt_mode = (hooknum != NF_INET_FORWARD) ?
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
- if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
- RT_TOS(ip_hdr(skb)->tos),
- rt_mode, NULL)))
- goto tx_error_icmp;
- local = rt->rt_flags & RTCF_LOCAL;
+ rcu_read_lock();
+ local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, rt_mode, NULL);
+ if (local < 0)
+ goto tx_error;
+ rt = skb_rtable(skb);
/*
* Avoid duplicate tuple in reply direction for NAT traffic
@@ -1241,82 +1134,51 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_DBG(10, "%s(): "
"stopping DNAT to local address %pI4\n",
__func__, &cp->daddr.ip);
- goto tx_error_put;
+ goto tx_error;
}
}
#endif
/* From world but DNAT to loopback address? */
- if (local && ipv4_is_loopback(cp->daddr.ip) &&
- rt_is_input_route(skb_rtable(skb))) {
+ if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) {
IP_VS_DBG(1, "%s(): "
"stopping DNAT to loopback %pI4\n",
__func__, &cp->daddr.ip);
- goto tx_error_put;
- }
-
- /* MTU checking */
- mtu = dst_mtu(&rt->dst);
- if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
- !skb_is_gso(skb)) {
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
- IP_VS_DBG_RL("%s(): frag needed\n", __func__);
- goto tx_error_put;
+ goto tx_error;
}
/* copy-on-write the packet before mangling it */
if (!skb_make_writable(skb, offset))
- goto tx_error_put;
+ goto tx_error;
if (skb_cow(skb, rt->dst.dev->hard_header_len))
- goto tx_error_put;
+ goto tx_error;
ip_vs_nat_icmp(skb, pp, cp, 0);
- if (!local) {
- /* drop the old route when skb is not shared */
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
- } else {
- ip_rt_put(rt);
- /*
- * Some IPv4 replies get local address from routes,
- * not from iph, so while we DNAT after routing
- * we need this second input/output route.
- */
- if (!__ip_vs_reroute_locally(skb))
- goto tx_error;
- }
-
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
- IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
-
- rc = NF_STOLEN;
+ rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
+ rcu_read_unlock();
goto out;
- tx_error_icmp:
- dst_link_failure(skb);
tx_error:
- dev_kfree_skb(skb);
+ kfree_skb(skb);
+ rcu_read_unlock();
rc = NF_STOLEN;
out:
LeaveFunction(10);
return rc;
- tx_error_put:
- ip_rt_put(rt);
- goto tx_error;
}
#ifdef CONFIG_IP_VS_IPV6
int
ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
- struct ip_vs_iphdr *iph)
+ struct ip_vs_iphdr *ipvsh)
{
struct rt6_info *rt; /* Route to the other host */
- int mtu;
int rc;
int local;
int rt_mode;
@@ -1328,7 +1190,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
translate address/port back */
if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
if (cp->packet_xmit)
- rc = cp->packet_xmit(skb, cp, pp, iph);
+ rc = cp->packet_xmit(skb, cp, pp, ipvsh);
else
rc = NF_ACCEPT;
/* do not touch skb anymore */
@@ -1344,11 +1206,12 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
rt_mode = (hooknum != NF_INET_FORWARD) ?
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
- if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
- 0, rt_mode)))
- goto tx_error_icmp;
-
- local = __ip_vs_is_local_route6(rt);
+ rcu_read_lock();
+ local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
+ ipvsh, 0, rt_mode);
+ if (local < 0)
+ goto tx_error;
+ rt = (struct rt6_info *) skb_dst(skb);
/*
* Avoid duplicate tuple in reply direction for NAT traffic
* to local address when connection is sync-ed
@@ -1362,7 +1225,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_DBG(10, "%s(): "
"stopping DNAT to local address %pI6\n",
__func__, &cp->daddr.in6);
- goto tx_error_put;
+ goto tx_error;
}
}
#endif
@@ -1373,60 +1236,31 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_DBG(1, "%s(): "
"stopping DNAT to loopback %pI6\n",
__func__, &cp->daddr.in6);
- goto tx_error_put;
- }
-
- /* MTU checking */
- mtu = dst_mtu(&rt->dst);
- if (__mtu_check_toobig_v6(skb, mtu)) {
- if (!skb->dev) {
- struct net *net = dev_net(skb_dst(skb)->dev);
-
- skb->dev = net->loopback_dev;
- }
- /* only send ICMP too big on first fragment */
- if (!iph->fragoffs)
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- IP_VS_DBG_RL("%s(): frag needed\n", __func__);
- goto tx_error_put;
+ goto tx_error;
}
/* copy-on-write the packet before mangling it */
if (!skb_make_writable(skb, offset))
- goto tx_error_put;
+ goto tx_error;
if (skb_cow(skb, rt->dst.dev->hard_header_len))
- goto tx_error_put;
+ goto tx_error;
ip_vs_nat_icmp_v6(skb, pp, cp, 0);
- if (!local || !skb->dev) {
- /* drop the old route when skb is not shared */
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
- } else {
- /* destined to loopback, do we need to change route? */
- dst_release(&rt->dst);
- }
-
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
- IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
-
- rc = NF_STOLEN;
+ rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
+ rcu_read_unlock();
goto out;
-tx_error_icmp:
- dst_link_failure(skb);
tx_error:
- dev_kfree_skb(skb);
+ kfree_skb(skb);
+ rcu_read_unlock();
rc = NF_STOLEN;
out:
LeaveFunction(10);
return rc;
-tx_error_put:
- dst_release(&rt->dst);
- goto tx_error;
}
#endif
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index dbdaa1149260..b8b95f4027ca 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -2,6 +2,7 @@
*
* (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
* based on HW's ip_conntrack_irc.c as well as other modules
+ * (C) 2006 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c8e001a9c45b..0283baedcdfb 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -5,6 +5,7 @@
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
* (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ * (C) 2005-2012 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -48,6 +49,7 @@
#include <net/netfilter/nf_conntrack_labels.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_helper.h>
#define NF_CONNTRACK_VERSION "0.5.0"
@@ -264,7 +266,7 @@ static void death_by_event(unsigned long ul_conntrack)
if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) {
/* bad luck, let's retry again */
ecache->timeout.expires = jiffies +
- (random32() % net->ct.sysctl_events_retry_timeout);
+ (prandom_u32() % net->ct.sysctl_events_retry_timeout);
add_timer(&ecache->timeout);
return;
}
@@ -283,7 +285,7 @@ void nf_ct_dying_timeout(struct nf_conn *ct)
/* set a new timer to retry event delivery */
setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct);
ecache->timeout.expires = jiffies +
- (random32() % net->ct.sysctl_events_retry_timeout);
+ (prandom_u32() % net->ct.sysctl_events_retry_timeout);
add_timer(&ecache->timeout);
}
EXPORT_SYMBOL_GPL(nf_ct_dying_timeout);
@@ -1259,7 +1261,7 @@ void nf_ct_iterate_cleanup(struct net *net,
EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
struct __nf_ct_flush_report {
- u32 pid;
+ u32 portid;
int report;
};
@@ -1274,7 +1276,7 @@ static int kill_report(struct nf_conn *i, void *data)
/* If we fail to deliver the event, death_by_timeout() will retry */
if (nf_conntrack_event_report(IPCT_DESTROY, i,
- fr->pid, fr->report) < 0)
+ fr->portid, fr->report) < 0)
return 1;
/* Avoid the delivery of the destroy event in death_by_timeout(). */
@@ -1297,10 +1299,10 @@ void nf_ct_free_hashtable(void *hash, unsigned int size)
}
EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
-void nf_conntrack_flush_report(struct net *net, u32 pid, int report)
+void nf_conntrack_flush_report(struct net *net, u32 portid, int report)
{
struct __nf_ct_flush_report fr = {
- .pid = pid,
+ .portid = portid,
.report = report,
};
nf_ct_iterate_cleanup(net, kill_report, &fr);
@@ -1364,30 +1366,48 @@ void nf_conntrack_cleanup_end(void)
*/
void nf_conntrack_cleanup_net(struct net *net)
{
+ LIST_HEAD(single);
+
+ list_add(&net->exit_list, &single);
+ nf_conntrack_cleanup_net_list(&single);
+}
+
+void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
+{
+ int busy;
+ struct net *net;
+
/*
* This makes sure all current packets have passed through
* netfilter framework. Roll on, two-stage module
* delete...
*/
synchronize_net();
- i_see_dead_people:
- nf_ct_iterate_cleanup(net, kill_all, NULL);
- nf_ct_release_dying_list(net);
- if (atomic_read(&net->ct.count) != 0) {
+i_see_dead_people:
+ busy = 0;
+ list_for_each_entry(net, net_exit_list, exit_list) {
+ nf_ct_iterate_cleanup(net, kill_all, NULL);
+ nf_ct_release_dying_list(net);
+ if (atomic_read(&net->ct.count) != 0)
+ busy = 1;
+ }
+ if (busy) {
schedule();
goto i_see_dead_people;
}
- nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
- nf_conntrack_proto_pernet_fini(net);
- nf_conntrack_helper_pernet_fini(net);
- nf_conntrack_ecache_pernet_fini(net);
- nf_conntrack_tstamp_pernet_fini(net);
- nf_conntrack_acct_pernet_fini(net);
- nf_conntrack_expect_pernet_fini(net);
- kmem_cache_destroy(net->ct.nf_conntrack_cachep);
- kfree(net->ct.slabname);
- free_percpu(net->ct.stat);
+ list_for_each_entry(net, net_exit_list, exit_list) {
+ nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
+ nf_conntrack_proto_pernet_fini(net);
+ nf_conntrack_helper_pernet_fini(net);
+ nf_conntrack_ecache_pernet_fini(net);
+ nf_conntrack_tstamp_pernet_fini(net);
+ nf_conntrack_acct_pernet_fini(net);
+ nf_conntrack_expect_pernet_fini(net);
+ kmem_cache_destroy(net->ct.nf_conntrack_cachep);
+ kfree(net->ct.slabname);
+ free_percpu(net->ct.stat);
+ }
}
void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index b5d2eb8bf0d5..1df176146567 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -1,8 +1,10 @@
/* Event cache for netfilter. */
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+/*
+ * (C) 2005 Harald Welte <laforge@gnumonks.org>
+ * (C) 2005 Patrick McHardy <kaber@trash.net>
+ * (C) 2005-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2005 USAGI/WIDE Project <http://www.linux-ipv6.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 8c10e3db3d9b..c63b618cd619 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -3,6 +3,7 @@
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
* (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ * (c) 2005-2012 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -40,7 +41,7 @@ static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
/* nf_conntrack_expect helper functions */
void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
- u32 pid, int report)
+ u32 portid, int report)
{
struct nf_conn_help *master_help = nfct_help(exp->master);
struct net *net = nf_ct_exp_net(exp);
@@ -54,7 +55,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
hlist_del(&exp->lnode);
master_help->expecting[exp->class]--;
- nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report);
+ nf_ct_expect_event_report(IPEXP_DESTROY, exp, portid, report);
nf_ct_expect_put(exp);
NF_CT_STAT_INC(net, expect_delete);
@@ -412,7 +413,7 @@ out:
}
int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
- u32 pid, int report)
+ u32 portid, int report)
{
int ret;
@@ -425,7 +426,7 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
if (ret < 0)
goto out;
spin_unlock_bh(&nf_conntrack_lock);
- nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report);
+ nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
return ret;
out:
spin_unlock_bh(&nf_conntrack_lock);
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 62fb8faedb80..6b217074237b 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -3,6 +3,7 @@
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
* (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 7df7b36d2e24..bdebd03bc8cd 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -2,6 +2,7 @@
* H.323 connection tracking helper
*
* Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
+ * Copyright (c) 2006-2012 Patrick McHardy <kaber@trash.net>
*
* This source code is licensed under General Public License version 2.
*
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 94b4b9853f60..974a2a4adefa 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -3,6 +3,7 @@
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
* (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -353,7 +354,7 @@ void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct,
/* rcu_read_lock()ed by nf_hook_slow */
helper = rcu_dereference(help->helper);
- nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL,
+ nf_log_packet(nf_ct_net(ct), nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL,
"nf_ct_%s: dropping packet: %pV ", helper->name, &vaf);
va_end(args);
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 70985c5d0ffa..0fd2976db7ee 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -1,6 +1,7 @@
/* IRC extension for IP connection tracking, Version 1.21
* (C) 2000-2002 by Harald Welte <laforge@gnumonks.org>
* based on RR's ip_conntrack_ftp.c
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c
index 8fe2e99428b7..355d2ef08094 100644
--- a/net/netfilter/nf_conntrack_labels.c
+++ b/net/netfilter/nf_conntrack_labels.c
@@ -45,7 +45,7 @@ int nf_connlabel_set(struct nf_conn *ct, u16 bit)
if (test_bit(bit, labels->bits))
return 0;
- if (test_and_set_bit(bit, labels->bits))
+ if (!test_and_set_bit(bit, labels->bits))
nf_conntrack_event_cache(IPCT_LABEL, ct);
return 0;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 9904b15f600e..ecf065f94032 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1825,6 +1825,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
(1 << IPCT_ASSURED) |
(1 << IPCT_HELPER) |
+ (1 << IPCT_LABEL) |
(1 << IPCT_PROTOINFO) |
(1 << IPCT_NATSEQADJ) |
(1 << IPCT_MARK),
@@ -2409,6 +2410,92 @@ out:
return skb->len;
}
+static int
+ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct nf_conntrack_expect *exp, *last;
+ struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+ struct nf_conn *ct = cb->data;
+ struct nf_conn_help *help = nfct_help(ct);
+ u_int8_t l3proto = nfmsg->nfgen_family;
+
+ if (cb->args[0])
+ return 0;
+
+ rcu_read_lock();
+ last = (struct nf_conntrack_expect *)cb->args[1];
+restart:
+ hlist_for_each_entry(exp, &help->expectations, lnode) {
+ if (l3proto && exp->tuple.src.l3num != l3proto)
+ continue;
+ if (cb->args[1]) {
+ if (exp != last)
+ continue;
+ cb->args[1] = 0;
+ }
+ if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ IPCTNL_MSG_EXP_NEW,
+ exp) < 0) {
+ if (!atomic_inc_not_zero(&exp->use))
+ continue;
+ cb->args[1] = (unsigned long)exp;
+ goto out;
+ }
+ }
+ if (cb->args[1]) {
+ cb->args[1] = 0;
+ goto restart;
+ }
+ cb->args[0] = 1;
+out:
+ rcu_read_unlock();
+ if (last)
+ nf_ct_expect_put(last);
+
+ return skb->len;
+}
+
+static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
+{
+ int err;
+ struct net *net = sock_net(ctnl);
+ struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ u_int8_t u3 = nfmsg->nfgen_family;
+ struct nf_conntrack_tuple tuple;
+ struct nf_conntrack_tuple_hash *h;
+ struct nf_conn *ct;
+ u16 zone = 0;
+ struct netlink_dump_control c = {
+ .dump = ctnetlink_exp_ct_dump_table,
+ .done = ctnetlink_exp_done,
+ };
+
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3);
+ if (err < 0)
+ return err;
+
+ if (cda[CTA_EXPECT_ZONE]) {
+ err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
+ if (err < 0)
+ return err;
+ }
+
+ h = nf_conntrack_find_get(net, zone, &tuple);
+ if (!h)
+ return -ENOENT;
+
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ c.data = ct;
+
+ err = netlink_dump_start(ctnl, skb, nlh, &c);
+ nf_ct_put(ct);
+
+ return err;
+}
+
static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
[CTA_EXPECT_MASTER] = { .type = NLA_NESTED },
[CTA_EXPECT_TUPLE] = { .type = NLA_NESTED },
@@ -2439,11 +2526,15 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
int err;
if (nlh->nlmsg_flags & NLM_F_DUMP) {
- struct netlink_dump_control c = {
- .dump = ctnetlink_exp_dump_table,
- .done = ctnetlink_exp_done,
- };
- return netlink_dump_start(ctnl, skb, nlh, &c);
+ if (cda[CTA_EXPECT_MASTER])
+ return ctnetlink_dump_exp_ct(ctnl, skb, nlh, cda);
+ else {
+ struct netlink_dump_control c = {
+ .dump = ctnetlink_exp_dump_table,
+ .done = ctnetlink_exp_done,
+ };
+ return netlink_dump_start(ctnl, skb, nlh, &c);
+ }
}
err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index e6678d2b624e..7bd03decd36c 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -11,6 +11,8 @@
*
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ *
* Limitations:
* - We blindly assume that control connections are always
* established in PNS->PAC direction. This is a violation
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 58ab4050830c..0ab9636ac57e 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -3,6 +3,7 @@
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
* (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index ba65b2041eb4..a99b6c3427b0 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -456,7 +456,8 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
out_invalid:
if (LOG_INVALID(net, IPPROTO_DCCP))
- nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, msg);
+ nf_log_packet(net, nf_ct_l3num(ct), 0, skb, NULL, NULL,
+ NULL, msg);
return false;
}
@@ -542,13 +543,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
spin_unlock_bh(&ct->lock);
if (LOG_INVALID(net, IPPROTO_DCCP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_dccp: invalid packet ignored ");
return NF_ACCEPT;
case CT_DCCP_INVALID:
spin_unlock_bh(&ct->lock);
if (LOG_INVALID(net, IPPROTO_DCCP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_dccp: invalid state transition ");
return -NF_ACCEPT;
}
@@ -613,7 +614,7 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl,
out_invalid:
if (LOG_INVALID(net, IPPROTO_DCCP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL, msg);
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, msg);
return -NF_ACCEPT;
}
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 155ce9f8a0db..9d9c0dade602 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -21,6 +21,7 @@
*
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
*/
#include <linux/module.h>
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index ec83536def9a..1314d33f6bcf 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -1,6 +1,9 @@
/*
* Connection tracking protocol helper module for SCTP.
*
+ * Copyright (c) 2004 Kiran Kumar Immidi <immidi_kiran@yahoo.com>
+ * Copyright (c) 2004-2012 Patrick McHardy <kaber@trash.net>
+ *
* SCTP is defined in RFC 2960. References to various sections in this code
* are to this RFC.
*
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 83876e9877f1..4d4d8f1d01fc 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1,5 +1,7 @@
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -720,7 +722,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
tn->tcp_be_liberal)
res = true;
if (!res && LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: %s ",
before(seq, sender->td_maxend + 1) ?
after(end, sender->td_end - receiver->td_maxwin - 1) ?
@@ -772,7 +774,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
if (th == NULL) {
if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: short packet ");
return -NF_ACCEPT;
}
@@ -780,7 +782,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
/* Not whole TCP header or malformed packet */
if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: truncated/malformed packet ");
return -NF_ACCEPT;
}
@@ -793,7 +795,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: bad TCP checksum ");
return -NF_ACCEPT;
}
@@ -802,7 +804,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
if (!tcp_valid_flags[tcpflags]) {
if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: invalid TCP flag combination ");
return -NF_ACCEPT;
}
@@ -949,7 +951,7 @@ static int tcp_packet(struct nf_conn *ct,
}
spin_unlock_bh(&ct->lock);
if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: invalid packet ignored in "
"state %s ", tcp_conntrack_names[old_state]);
return NF_ACCEPT;
@@ -959,7 +961,7 @@ static int tcp_packet(struct nf_conn *ct,
dir, get_conntrack_index(th), old_state);
spin_unlock_bh(&ct->lock);
if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: invalid state ");
return -NF_ACCEPT;
case TCP_CONNTRACK_CLOSE:
@@ -969,8 +971,8 @@ static int tcp_packet(struct nf_conn *ct,
/* Invalid RST */
spin_unlock_bh(&ct->lock);
if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: invalid RST ");
+ nf_log_packet(net, pf, 0, skb, NULL, NULL,
+ NULL, "nf_ct_tcp: invalid RST ");
return -NF_ACCEPT;
}
if (index == TCP_RST_SET
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 59623cc56e8d..9d7721cbce4b 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -1,5 +1,6 @@
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -119,7 +120,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
if (hdr == NULL) {
if (LOG_INVALID(net, IPPROTO_UDP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udp: short packet ");
return -NF_ACCEPT;
}
@@ -127,7 +128,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
/* Truncated/malformed packets */
if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
if (LOG_INVALID(net, IPPROTO_UDP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udp: truncated/malformed packet ");
return -NF_ACCEPT;
}
@@ -143,7 +144,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
if (LOG_INVALID(net, IPPROTO_UDP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udp: bad UDP checksum ");
return -NF_ACCEPT;
}
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index ca969f6273f7..2750e6c69f82 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -131,7 +131,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
if (hdr == NULL) {
if (LOG_INVALID(net, IPPROTO_UDPLITE))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udplite: short packet ");
return -NF_ACCEPT;
}
@@ -141,7 +141,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
cscov = udplen;
else if (cscov < sizeof(*hdr) || cscov > udplen) {
if (LOG_INVALID(net, IPPROTO_UDPLITE))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udplite: invalid checksum coverage ");
return -NF_ACCEPT;
}
@@ -149,7 +149,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
/* UDPLITE mandates checksums */
if (!hdr->check) {
if (LOG_INVALID(net, IPPROTO_UDPLITE))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udplite: checksum missing ");
return -NF_ACCEPT;
}
@@ -159,7 +159,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP,
pf)) {
if (LOG_INVALID(net, IPPROTO_UDPLITE))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udplite: bad UDPLite checksum ");
return -NF_ACCEPT;
}
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index fedee3943661..bd700b4013c1 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -1,5 +1,6 @@
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2005-2012 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -545,16 +546,20 @@ out_init:
return ret;
}
-static void nf_conntrack_pernet_exit(struct net *net)
+static void nf_conntrack_pernet_exit(struct list_head *net_exit_list)
{
- nf_conntrack_standalone_fini_sysctl(net);
- nf_conntrack_standalone_fini_proc(net);
- nf_conntrack_cleanup_net(net);
+ struct net *net;
+
+ list_for_each_entry(net, net_exit_list, exit_list) {
+ nf_conntrack_standalone_fini_sysctl(net);
+ nf_conntrack_standalone_fini_proc(net);
+ }
+ nf_conntrack_cleanup_net_list(net_exit_list);
}
static struct pernet_operations nf_conntrack_net_ops = {
- .init = nf_conntrack_pernet_init,
- .exit = nf_conntrack_pernet_exit,
+ .init = nf_conntrack_pernet_init,
+ .exit_batch = nf_conntrack_pernet_exit,
};
static int __init nf_conntrack_standalone_init(void)
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index e9936c830208..e68ab4fbd71f 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -1,5 +1,5 @@
/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
- *
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 9e312695c818..3b18dd1be7d9 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -16,7 +16,6 @@
#define NF_LOG_PREFIXLEN 128
#define NFLOGGER_NAME_LEN 64
-static const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO] __read_mostly;
static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly;
static DEFINE_MUTEX(nf_log_mutex);
@@ -32,13 +31,46 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger)
return NULL;
}
+void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger)
+{
+ const struct nf_logger *log;
+
+ if (pf == NFPROTO_UNSPEC)
+ return;
+
+ mutex_lock(&nf_log_mutex);
+ log = rcu_dereference_protected(net->nf.nf_loggers[pf],
+ lockdep_is_held(&nf_log_mutex));
+ if (log == NULL)
+ rcu_assign_pointer(net->nf.nf_loggers[pf], logger);
+
+ mutex_unlock(&nf_log_mutex);
+}
+EXPORT_SYMBOL(nf_log_set);
+
+void nf_log_unset(struct net *net, const struct nf_logger *logger)
+{
+ int i;
+ const struct nf_logger *log;
+
+ mutex_lock(&nf_log_mutex);
+ for (i = 0; i < NFPROTO_NUMPROTO; i++) {
+ log = rcu_dereference_protected(net->nf.nf_loggers[i],
+ lockdep_is_held(&nf_log_mutex));
+ if (log == logger)
+ RCU_INIT_POINTER(net->nf.nf_loggers[i], NULL);
+ }
+ mutex_unlock(&nf_log_mutex);
+ synchronize_rcu();
+}
+EXPORT_SYMBOL(nf_log_unset);
+
/* return EEXIST if the same logger is registered, 0 on success. */
int nf_log_register(u_int8_t pf, struct nf_logger *logger)
{
- const struct nf_logger *llog;
int i;
- if (pf >= ARRAY_SIZE(nf_loggers))
+ if (pf >= ARRAY_SIZE(init_net.nf.nf_loggers))
return -EINVAL;
for (i = 0; i < ARRAY_SIZE(logger->list); i++)
@@ -52,10 +84,6 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger)
} else {
/* register at end of list to honor first register win */
list_add_tail(&logger->list[pf], &nf_loggers_l[pf]);
- llog = rcu_dereference_protected(nf_loggers[pf],
- lockdep_is_held(&nf_log_mutex));
- if (llog == NULL)
- rcu_assign_pointer(nf_loggers[pf], logger);
}
mutex_unlock(&nf_log_mutex);
@@ -66,49 +94,43 @@ EXPORT_SYMBOL(nf_log_register);
void nf_log_unregister(struct nf_logger *logger)
{
- const struct nf_logger *c_logger;
int i;
mutex_lock(&nf_log_mutex);
- for (i = 0; i < ARRAY_SIZE(nf_loggers); i++) {
- c_logger = rcu_dereference_protected(nf_loggers[i],
- lockdep_is_held(&nf_log_mutex));
- if (c_logger == logger)
- RCU_INIT_POINTER(nf_loggers[i], NULL);
+ for (i = 0; i < NFPROTO_NUMPROTO; i++)
list_del(&logger->list[i]);
- }
mutex_unlock(&nf_log_mutex);
-
- synchronize_rcu();
}
EXPORT_SYMBOL(nf_log_unregister);
-int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger)
+int nf_log_bind_pf(struct net *net, u_int8_t pf,
+ const struct nf_logger *logger)
{
- if (pf >= ARRAY_SIZE(nf_loggers))
+ if (pf >= ARRAY_SIZE(net->nf.nf_loggers))
return -EINVAL;
mutex_lock(&nf_log_mutex);
if (__find_logger(pf, logger->name) == NULL) {
mutex_unlock(&nf_log_mutex);
return -ENOENT;
}
- rcu_assign_pointer(nf_loggers[pf], logger);
+ rcu_assign_pointer(net->nf.nf_loggers[pf], logger);
mutex_unlock(&nf_log_mutex);
return 0;
}
EXPORT_SYMBOL(nf_log_bind_pf);
-void nf_log_unbind_pf(u_int8_t pf)
+void nf_log_unbind_pf(struct net *net, u_int8_t pf)
{
- if (pf >= ARRAY_SIZE(nf_loggers))
+ if (pf >= ARRAY_SIZE(net->nf.nf_loggers))
return;
mutex_lock(&nf_log_mutex);
- RCU_INIT_POINTER(nf_loggers[pf], NULL);
+ RCU_INIT_POINTER(net->nf.nf_loggers[pf], NULL);
mutex_unlock(&nf_log_mutex);
}
EXPORT_SYMBOL(nf_log_unbind_pf);
-void nf_log_packet(u_int8_t pf,
+void nf_log_packet(struct net *net,
+ u_int8_t pf,
unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
@@ -121,12 +143,12 @@ void nf_log_packet(u_int8_t pf,
const struct nf_logger *logger;
rcu_read_lock();
- logger = rcu_dereference(nf_loggers[pf]);
+ logger = rcu_dereference(net->nf.nf_loggers[pf]);
if (logger) {
va_start(args, fmt);
vsnprintf(prefix, sizeof(prefix), fmt, args);
va_end(args);
- logger->logfn(pf, hooknum, skb, in, out, loginfo, prefix);
+ logger->logfn(net, pf, hooknum, skb, in, out, loginfo, prefix);
}
rcu_read_unlock();
}
@@ -135,9 +157,11 @@ EXPORT_SYMBOL(nf_log_packet);
#ifdef CONFIG_PROC_FS
static void *seq_start(struct seq_file *seq, loff_t *pos)
{
+ struct net *net = seq_file_net(seq);
+
mutex_lock(&nf_log_mutex);
- if (*pos >= ARRAY_SIZE(nf_loggers))
+ if (*pos >= ARRAY_SIZE(net->nf.nf_loggers))
return NULL;
return pos;
@@ -145,9 +169,11 @@ static void *seq_start(struct seq_file *seq, loff_t *pos)
static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
{
+ struct net *net = seq_file_net(s);
+
(*pos)++;
- if (*pos >= ARRAY_SIZE(nf_loggers))
+ if (*pos >= ARRAY_SIZE(net->nf.nf_loggers))
return NULL;
return pos;
@@ -164,8 +190,9 @@ static int seq_show(struct seq_file *s, void *v)
const struct nf_logger *logger;
struct nf_logger *t;
int ret;
+ struct net *net = seq_file_net(s);
- logger = rcu_dereference_protected(nf_loggers[*pos],
+ logger = rcu_dereference_protected(net->nf.nf_loggers[*pos],
lockdep_is_held(&nf_log_mutex));
if (!logger)
@@ -199,7 +226,8 @@ static const struct seq_operations nflog_seq_ops = {
static int nflog_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &nflog_seq_ops);
+ return seq_open_net(inode, file, &nflog_seq_ops,
+ sizeof(struct seq_net_private));
}
static const struct file_operations nflog_file_ops = {
@@ -207,7 +235,7 @@ static const struct file_operations nflog_file_ops = {
.open = nflog_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = seq_release_net,
};
@@ -216,7 +244,6 @@ static const struct file_operations nflog_file_ops = {
#ifdef CONFIG_SYSCTL
static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3];
static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1];
-static struct ctl_table_header *nf_log_dir_header;
static int nf_log_proc_dostring(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -226,6 +253,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
size_t size = *lenp;
int r = 0;
int tindex = (unsigned long)table->extra1;
+ struct net *net = current->nsproxy->net_ns;
if (write) {
if (size > sizeof(buf))
@@ -234,7 +262,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
return -EFAULT;
if (!strcmp(buf, "NONE")) {
- nf_log_unbind_pf(tindex);
+ nf_log_unbind_pf(net, tindex);
return 0;
}
mutex_lock(&nf_log_mutex);
@@ -243,11 +271,11 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
mutex_unlock(&nf_log_mutex);
return -ENOENT;
}
- rcu_assign_pointer(nf_loggers[tindex], logger);
+ rcu_assign_pointer(net->nf.nf_loggers[tindex], logger);
mutex_unlock(&nf_log_mutex);
} else {
mutex_lock(&nf_log_mutex);
- logger = rcu_dereference_protected(nf_loggers[tindex],
+ logger = rcu_dereference_protected(net->nf.nf_loggers[tindex],
lockdep_is_held(&nf_log_mutex));
if (!logger)
table->data = "NONE";
@@ -260,49 +288,114 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
return r;
}
-static __init int netfilter_log_sysctl_init(void)
+static int netfilter_log_sysctl_init(struct net *net)
{
int i;
-
- for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) {
- snprintf(nf_log_sysctl_fnames[i-NFPROTO_UNSPEC], 3, "%d", i);
- nf_log_sysctl_table[i].procname =
- nf_log_sysctl_fnames[i-NFPROTO_UNSPEC];
- nf_log_sysctl_table[i].data = NULL;
- nf_log_sysctl_table[i].maxlen =
- NFLOGGER_NAME_LEN * sizeof(char);
- nf_log_sysctl_table[i].mode = 0644;
- nf_log_sysctl_table[i].proc_handler = nf_log_proc_dostring;
- nf_log_sysctl_table[i].extra1 = (void *)(unsigned long) i;
+ struct ctl_table *table;
+
+ table = nf_log_sysctl_table;
+ if (!net_eq(net, &init_net)) {
+ table = kmemdup(nf_log_sysctl_table,
+ sizeof(nf_log_sysctl_table),
+ GFP_KERNEL);
+ if (!table)
+ goto err_alloc;
+ } else {
+ for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) {
+ snprintf(nf_log_sysctl_fnames[i],
+ 3, "%d", i);
+ nf_log_sysctl_table[i].procname =
+ nf_log_sysctl_fnames[i];
+ nf_log_sysctl_table[i].data = NULL;
+ nf_log_sysctl_table[i].maxlen =
+ NFLOGGER_NAME_LEN * sizeof(char);
+ nf_log_sysctl_table[i].mode = 0644;
+ nf_log_sysctl_table[i].proc_handler =
+ nf_log_proc_dostring;
+ nf_log_sysctl_table[i].extra1 =
+ (void *)(unsigned long) i;
+ }
}
- nf_log_dir_header = register_net_sysctl(&init_net, "net/netfilter/nf_log",
- nf_log_sysctl_table);
- if (!nf_log_dir_header)
- return -ENOMEM;
+ net->nf.nf_log_dir_header = register_net_sysctl(net,
+ "net/netfilter/nf_log",
+ table);
+ if (!net->nf.nf_log_dir_header)
+ goto err_reg;
return 0;
+
+err_reg:
+ if (!net_eq(net, &init_net))
+ kfree(table);
+err_alloc:
+ return -ENOMEM;
+}
+
+static void netfilter_log_sysctl_exit(struct net *net)
+{
+ struct ctl_table *table;
+
+ table = net->nf.nf_log_dir_header->ctl_table_arg;
+ unregister_net_sysctl_table(net->nf.nf_log_dir_header);
+ if (!net_eq(net, &init_net))
+ kfree(table);
}
#else
-static __init int netfilter_log_sysctl_init(void)
+static int netfilter_log_sysctl_init(struct net *net)
{
return 0;
}
+
+static void netfilter_log_sysctl_exit(struct net *net)
+{
+}
#endif /* CONFIG_SYSCTL */
-int __init netfilter_log_init(void)
+static int __net_init nf_log_net_init(struct net *net)
{
- int i, r;
+ int ret = -ENOMEM;
+
#ifdef CONFIG_PROC_FS
if (!proc_create("nf_log", S_IRUGO,
- proc_net_netfilter, &nflog_file_ops))
- return -1;
+ net->nf.proc_netfilter, &nflog_file_ops))
+ return ret;
+#endif
+ ret = netfilter_log_sysctl_init(net);
+ if (ret < 0)
+ goto out_sysctl;
+
+ return 0;
+
+out_sysctl:
+#ifdef CONFIG_PROC_FS
+ /* For init_net: errors will trigger panic, don't unroll on error. */
+ if (!net_eq(net, &init_net))
+ remove_proc_entry("nf_log", net->nf.proc_netfilter);
#endif
+ return ret;
+}
+
+static void __net_exit nf_log_net_exit(struct net *net)
+{
+ netfilter_log_sysctl_exit(net);
+#ifdef CONFIG_PROC_FS
+ remove_proc_entry("nf_log", net->nf.proc_netfilter);
+#endif
+}
- /* Errors will trigger panic, unroll on error is unnecessary. */
- r = netfilter_log_sysctl_init();
- if (r < 0)
- return r;
+static struct pernet_operations nf_log_net_ops = {
+ .init = nf_log_net_init,
+ .exit = nf_log_net_exit,
+};
+
+int __init netfilter_log_init(void)
+{
+ int i, ret;
+
+ ret = register_pernet_subsys(&nf_log_net_ops);
+ if (ret < 0)
+ return ret;
for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++)
INIT_LIST_HEAD(&(nf_loggers_l[i]));
diff --git a/net/netfilter/nf_nat_amanda.c b/net/netfilter/nf_nat_amanda.c
index 3b67c9d11273..eb772380a202 100644
--- a/net/netfilter/nf_nat_amanda.c
+++ b/net/netfilter/nf_nat_amanda.c
@@ -1,6 +1,7 @@
/* Amanda extension for TCP NAT alteration.
* (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
* based on a copy of HW's ip_nat_irc.c as well as other modules
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index ad24be070e53..038eee5c8f85 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -87,9 +87,11 @@ int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family)
struct flowi fl;
unsigned int hh_len;
struct dst_entry *dst;
+ int err;
- if (xfrm_decode_session(skb, &fl, family) < 0)
- return -1;
+ err = xfrm_decode_session(skb, &fl, family);
+ if (err < 0)
+ return err;
dst = skb_dst(skb);
if (dst->xfrm)
@@ -98,7 +100,7 @@ int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family)
dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0);
if (IS_ERR(dst))
- return -1;
+ return PTR_ERR(dst);
skb_dst_drop(skb);
skb_dst_set(skb, dst);
@@ -107,7 +109,7 @@ int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family)
hh_len = skb_dst(skb)->dev->hard_header_len;
if (skb_headroom(skb) < hh_len &&
pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
- return -1;
+ return -ENOMEM;
return 0;
}
EXPORT_SYMBOL(nf_xfrm_me_harder);
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index 23c2b38676a6..5fea563afe30 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -2,6 +2,7 @@
*
* (C) 2000-2002 Harald Welte <laforge@netfilter.org>
* (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2007-2012 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c
index e64faa5ca893..396e55d46f90 100644
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -36,7 +36,7 @@ sctp_manip_pkt(struct sk_buff *skb,
{
struct sk_buff *frag;
sctp_sctphdr_t *hdr;
- __be32 crc32;
+ __u32 crc32;
if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
return false;
@@ -55,8 +55,7 @@ sctp_manip_pkt(struct sk_buff *skb,
skb_walk_frags(skb, frag)
crc32 = sctp_update_cksum((u8 *)frag->data, skb_headlen(frag),
crc32);
- crc32 = sctp_end_cksum(crc32);
- hdr->checksum = crc32;
+ hdr->checksum = sctp_end_cksum(crc32);
return true;
}
diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
index 96ccdf78a29f..dac11f73868e 100644
--- a/net/netfilter/nf_nat_sip.c
+++ b/net/netfilter/nf_nat_sip.c
@@ -230,9 +230,10 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff,
&ct->tuplehash[!dir].tuple.src.u3,
false);
if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
- poff, plen, buffer, buflen))
+ poff, plen, buffer, buflen)) {
nf_ct_helper_log(skb, ct, "cannot mangle received");
return NF_DROP;
+ }
}
/* The rport= parameter (RFC 3581) contains the port number
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index d812c1235b30..5d24b1fdb593 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -1,3 +1,8 @@
+/*
+ * Rusty Russell (C)2000 -- This code is GPL.
+ * Patrick McHardy (c) 2006-2012
+ */
+
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/init.h>
@@ -40,7 +45,7 @@ void nf_unregister_queue_handler(void)
}
EXPORT_SYMBOL(nf_unregister_queue_handler);
-static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
+void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
{
/* Release those devices we held, or Alexey will kill me. */
if (entry->indev)
@@ -60,12 +65,41 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
/* Drop reference to owner of hook which queued us. */
module_put(entry->elem->owner);
}
+EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
+
+/* Bump dev refs so they don't vanish while packet is out */
+bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
+{
+ if (!try_module_get(entry->elem->owner))
+ return false;
+
+ if (entry->indev)
+ dev_hold(entry->indev);
+ if (entry->outdev)
+ dev_hold(entry->outdev);
+#ifdef CONFIG_BRIDGE_NETFILTER
+ if (entry->skb->nf_bridge) {
+ struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
+ struct net_device *physdev;
+
+ physdev = nf_bridge->physindev;
+ if (physdev)
+ dev_hold(physdev);
+ physdev = nf_bridge->physoutdev;
+ if (physdev)
+ dev_hold(physdev);
+ }
+#endif
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
/*
* Any packet that leaves via this function must come back
* through nf_reinject().
*/
-static int __nf_queue(struct sk_buff *skb,
+int nf_queue(struct sk_buff *skb,
struct nf_hook_ops *elem,
u_int8_t pf, unsigned int hook,
struct net_device *indev,
@@ -75,10 +109,6 @@ static int __nf_queue(struct sk_buff *skb,
{
int status = -ENOENT;
struct nf_queue_entry *entry = NULL;
-#ifdef CONFIG_BRIDGE_NETFILTER
- struct net_device *physindev;
- struct net_device *physoutdev;
-#endif
const struct nf_afinfo *afinfo;
const struct nf_queue_handler *qh;
@@ -109,28 +139,13 @@ static int __nf_queue(struct sk_buff *skb,
.indev = indev,
.outdev = outdev,
.okfn = okfn,
+ .size = sizeof(*entry) + afinfo->route_key_size,
};
- /* If it's going away, ignore hook. */
- if (!try_module_get(entry->elem->owner)) {
+ if (!nf_queue_entry_get_refs(entry)) {
status = -ECANCELED;
goto err_unlock;
}
- /* Bump dev refs so they don't vanish while packet is out */
- if (indev)
- dev_hold(indev);
- if (outdev)
- dev_hold(outdev);
-#ifdef CONFIG_BRIDGE_NETFILTER
- if (skb->nf_bridge) {
- physindev = skb->nf_bridge->physindev;
- if (physindev)
- dev_hold(physindev);
- physoutdev = skb->nf_bridge->physoutdev;
- if (physoutdev)
- dev_hold(physoutdev);
- }
-#endif
skb_dst_force(skb);
afinfo->saveroute(skb, entry);
status = qh->outfn(entry, queuenum);
@@ -151,87 +166,6 @@ err:
return status;
}
-#ifdef CONFIG_BRIDGE_NETFILTER
-/* When called from bridge netfilter, skb->data must point to MAC header
- * before calling skb_gso_segment(). Else, original MAC header is lost
- * and segmented skbs will be sent to wrong destination.
- */
-static void nf_bridge_adjust_skb_data(struct sk_buff *skb)
-{
- if (skb->nf_bridge)
- __skb_push(skb, skb->network_header - skb->mac_header);
-}
-
-static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
-{
- if (skb->nf_bridge)
- __skb_pull(skb, skb->network_header - skb->mac_header);
-}
-#else
-#define nf_bridge_adjust_skb_data(s) do {} while (0)
-#define nf_bridge_adjust_segmented_data(s) do {} while (0)
-#endif
-
-int nf_queue(struct sk_buff *skb,
- struct nf_hook_ops *elem,
- u_int8_t pf, unsigned int hook,
- struct net_device *indev,
- struct net_device *outdev,
- int (*okfn)(struct sk_buff *),
- unsigned int queuenum)
-{
- struct sk_buff *segs;
- int err = -EINVAL;
- unsigned int queued;
-
- if (!skb_is_gso(skb))
- return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
- queuenum);
-
- switch (pf) {
- case NFPROTO_IPV4:
- skb->protocol = htons(ETH_P_IP);
- break;
- case NFPROTO_IPV6:
- skb->protocol = htons(ETH_P_IPV6);
- break;
- }
-
- nf_bridge_adjust_skb_data(skb);
- segs = skb_gso_segment(skb, 0);
- /* Does not use PTR_ERR to limit the number of error codes that can be
- * returned by nf_queue. For instance, callers rely on -ECANCELED to mean
- * 'ignore this hook'.
- */
- if (IS_ERR(segs))
- goto out_err;
- queued = 0;
- err = 0;
- do {
- struct sk_buff *nskb = segs->next;
-
- segs->next = NULL;
- if (err == 0) {
- nf_bridge_adjust_segmented_data(segs);
- err = __nf_queue(segs, elem, pf, hook, indev,
- outdev, okfn, queuenum);
- }
- if (err == 0)
- queued++;
- else
- kfree_skb(segs);
- segs = nskb;
- } while (segs);
-
- if (queued) {
- kfree_skb(skb);
- return 0;
- }
- out_err:
- nf_bridge_adjust_segmented_data(skb);
- return err;
-}
-
void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
{
struct sk_buff *skb = entry->skb;
@@ -271,9 +205,9 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
local_bh_enable();
break;
case NF_QUEUE:
- err = __nf_queue(skb, elem, entry->pf, entry->hook,
- entry->indev, entry->outdev, entry->okfn,
- verdict >> NF_VERDICT_QBITS);
+ err = nf_queue(skb, elem, entry->pf, entry->hook,
+ entry->indev, entry->outdev, entry->okfn,
+ verdict >> NF_VERDICT_QBITS);
if (err < 0) {
if (err == -ECANCELED)
goto next_hook;
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 0b1b32cda307..572d87dc116f 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -24,10 +24,9 @@
#include <linux/skbuff.h>
#include <asm/uaccess.h>
#include <net/sock.h>
-#include <net/netlink.h>
#include <linux/init.h>
-#include <linux/netlink.h>
+#include <net/netlink.h>
#include <linux/netfilter/nfnetlink.h>
MODULE_LICENSE("GPL");
@@ -113,22 +112,30 @@ int nfnetlink_has_listeners(struct net *net, unsigned int group)
}
EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
-int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid,
+struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size,
+ u32 dst_portid, gfp_t gfp_mask)
+{
+ return netlink_alloc_skb(net->nfnl, size, dst_portid, gfp_mask);
+}
+EXPORT_SYMBOL_GPL(nfnetlink_alloc_skb);
+
+int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,
unsigned int group, int echo, gfp_t flags)
{
- return nlmsg_notify(net->nfnl, skb, pid, group, echo, flags);
+ return nlmsg_notify(net->nfnl, skb, portid, group, echo, flags);
}
EXPORT_SYMBOL_GPL(nfnetlink_send);
-int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error)
+int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error)
{
- return netlink_set_err(net->nfnl, pid, group, error);
+ return netlink_set_err(net->nfnl, portid, group, error);
}
EXPORT_SYMBOL_GPL(nfnetlink_set_err);
-int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags)
+int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid,
+ int flags)
{
- return netlink_unicast(net->nfnl, skb, pid, flags);
+ return netlink_unicast(net->nfnl, skb, portid, flags);
}
EXPORT_SYMBOL_GPL(nfnetlink_unicast);
@@ -144,7 +151,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
return -EPERM;
/* All the messages must at least contain nfgenmsg */
- if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct nfgenmsg)))
+ if (nlmsg_len(nlh) < sizeof(struct nfgenmsg))
return 0;
type = nlh->nlmsg_type;
@@ -172,7 +179,7 @@ replay:
}
{
- int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
+ int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
struct nlattr *cda[ss->cb[cb_id].attr_count + 1];
struct nlattr *attr = (void *)nlh + min_len;
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index dc3fd5d44464..c7b6d466a662 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -149,9 +149,12 @@ nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock();
list_for_each_entry_rcu(cur, &nfnl_acct_list, head) {
- if (last && cur != last)
- continue;
+ if (last) {
+ if (cur != last)
+ continue;
+ last = NULL;
+ }
if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 701c88a20fea..65074dfb9383 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -220,9 +220,12 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock();
list_for_each_entry_rcu(cur, &cttimeout_list, head) {
- if (last && cur != last)
- continue;
+ if (last) {
+ if (cur != last)
+ continue;
+ last = NULL;
+ }
if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index f248db572972..962e9792e317 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -3,6 +3,7 @@
* nfetlink.
*
* (C) 2005 by Harald Welte <laforge@netfilter.org>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
*
* Based on the old ipv4-only ipt_ULOG.c:
* (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
@@ -19,7 +20,7 @@
#include <linux/ipv6.h>
#include <linux/netdevice.h>
#include <linux/netfilter.h>
-#include <linux/netlink.h>
+#include <net/netlink.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_log.h>
#include <linux/spinlock.h>
@@ -32,6 +33,7 @@
#include <linux/slab.h>
#include <net/sock.h>
#include <net/netfilter/nf_log.h>
+#include <net/netns/generic.h>
#include <net/netfilter/nfnetlink_log.h>
#include <linux/atomic.h>
@@ -56,6 +58,7 @@ struct nfulnl_instance {
unsigned int qlen; /* number of nlmsgs in skb */
struct sk_buff *skb; /* pre-allocatd skb */
struct timer_list timer;
+ struct net *net;
struct user_namespace *peer_user_ns; /* User namespace of the peer process */
int peer_portid; /* PORTID of the peer process */
@@ -71,25 +74,34 @@ struct nfulnl_instance {
struct rcu_head rcu;
};
-static DEFINE_SPINLOCK(instances_lock);
-static atomic_t global_seq;
-
#define INSTANCE_BUCKETS 16
-static struct hlist_head instance_table[INSTANCE_BUCKETS];
static unsigned int hash_init;
+static int nfnl_log_net_id __read_mostly;
+
+struct nfnl_log_net {
+ spinlock_t instances_lock;
+ struct hlist_head instance_table[INSTANCE_BUCKETS];
+ atomic_t global_seq;
+};
+
+static struct nfnl_log_net *nfnl_log_pernet(struct net *net)
+{
+ return net_generic(net, nfnl_log_net_id);
+}
+
static inline u_int8_t instance_hashfn(u_int16_t group_num)
{
return ((group_num & 0xff) % INSTANCE_BUCKETS);
}
static struct nfulnl_instance *
-__instance_lookup(u_int16_t group_num)
+__instance_lookup(struct nfnl_log_net *log, u_int16_t group_num)
{
struct hlist_head *head;
struct nfulnl_instance *inst;
- head = &instance_table[instance_hashfn(group_num)];
+ head = &log->instance_table[instance_hashfn(group_num)];
hlist_for_each_entry_rcu(inst, head, hlist) {
if (inst->group_num == group_num)
return inst;
@@ -104,12 +116,12 @@ instance_get(struct nfulnl_instance *inst)
}
static struct nfulnl_instance *
-instance_lookup_get(u_int16_t group_num)
+instance_lookup_get(struct nfnl_log_net *log, u_int16_t group_num)
{
struct nfulnl_instance *inst;
rcu_read_lock_bh();
- inst = __instance_lookup(group_num);
+ inst = __instance_lookup(log, group_num);
if (inst && !atomic_inc_not_zero(&inst->use))
inst = NULL;
rcu_read_unlock_bh();
@@ -119,7 +131,11 @@ instance_lookup_get(u_int16_t group_num)
static void nfulnl_instance_free_rcu(struct rcu_head *head)
{
- kfree(container_of(head, struct nfulnl_instance, rcu));
+ struct nfulnl_instance *inst =
+ container_of(head, struct nfulnl_instance, rcu);
+
+ put_net(inst->net);
+ kfree(inst);
module_put(THIS_MODULE);
}
@@ -133,13 +149,15 @@ instance_put(struct nfulnl_instance *inst)
static void nfulnl_timer(unsigned long data);
static struct nfulnl_instance *
-instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns)
+instance_create(struct net *net, u_int16_t group_num,
+ int portid, struct user_namespace *user_ns)
{
struct nfulnl_instance *inst;
+ struct nfnl_log_net *log = nfnl_log_pernet(net);
int err;
- spin_lock_bh(&instances_lock);
- if (__instance_lookup(group_num)) {
+ spin_lock_bh(&log->instances_lock);
+ if (__instance_lookup(log, group_num)) {
err = -EEXIST;
goto out_unlock;
}
@@ -163,6 +181,7 @@ instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns)
setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst);
+ inst->net = get_net(net);
inst->peer_user_ns = user_ns;
inst->peer_portid = portid;
inst->group_num = group_num;
@@ -174,14 +193,15 @@ instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns)
inst->copy_range = NFULNL_COPY_RANGE_MAX;
hlist_add_head_rcu(&inst->hlist,
- &instance_table[instance_hashfn(group_num)]);
+ &log->instance_table[instance_hashfn(group_num)]);
- spin_unlock_bh(&instances_lock);
+
+ spin_unlock_bh(&log->instances_lock);
return inst;
out_unlock:
- spin_unlock_bh(&instances_lock);
+ spin_unlock_bh(&log->instances_lock);
return ERR_PTR(err);
}
@@ -210,11 +230,12 @@ __instance_destroy(struct nfulnl_instance *inst)
}
static inline void
-instance_destroy(struct nfulnl_instance *inst)
+instance_destroy(struct nfnl_log_net *log,
+ struct nfulnl_instance *inst)
{
- spin_lock_bh(&instances_lock);
+ spin_lock_bh(&log->instances_lock);
__instance_destroy(inst);
- spin_unlock_bh(&instances_lock);
+ spin_unlock_bh(&log->instances_lock);
}
static int
@@ -298,7 +319,7 @@ nfulnl_set_flags(struct nfulnl_instance *inst, u_int16_t flags)
}
static struct sk_buff *
-nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size)
+nfulnl_alloc_skb(u32 peer_portid, unsigned int inst_size, unsigned int pkt_size)
{
struct sk_buff *skb;
unsigned int n;
@@ -307,13 +328,14 @@ nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size)
* message. WARNING: has to be <= 128k due to slab restrictions */
n = max(inst_size, pkt_size);
- skb = alloc_skb(n, GFP_ATOMIC);
+ skb = nfnetlink_alloc_skb(&init_net, n, peer_portid, GFP_ATOMIC);
if (!skb) {
if (n > pkt_size) {
/* try to allocate only as much as we need for current
* packet */
- skb = alloc_skb(pkt_size, GFP_ATOMIC);
+ skb = nfnetlink_alloc_skb(&init_net, pkt_size,
+ peer_portid, GFP_ATOMIC);
if (!skb)
pr_err("nfnetlink_log: can't even alloc %u bytes\n",
pkt_size);
@@ -336,7 +358,7 @@ __nfulnl_send(struct nfulnl_instance *inst)
if (!nlh)
goto out;
}
- status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_portid,
+ status = nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid,
MSG_DONTWAIT);
inst->qlen = 0;
@@ -370,7 +392,8 @@ nfulnl_timer(unsigned long data)
/* This is an inline function, we don't really care about a long
* list of arguments */
static inline int
-__build_packet_message(struct nfulnl_instance *inst,
+__build_packet_message(struct nfnl_log_net *log,
+ struct nfulnl_instance *inst,
const struct sk_buff *skb,
unsigned int data_len,
u_int8_t pf,
@@ -536,7 +559,7 @@ __build_packet_message(struct nfulnl_instance *inst,
/* global sequence number */
if ((inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) &&
nla_put_be32(inst->skb, NFULA_SEQ_GLOBAL,
- htonl(atomic_inc_return(&global_seq))))
+ htonl(atomic_inc_return(&log->global_seq))))
goto nla_put_failure;
if (data_len) {
@@ -579,7 +602,8 @@ static struct nf_loginfo default_loginfo = {
/* log handler for internal netfilter logging api */
void
-nfulnl_log_packet(u_int8_t pf,
+nfulnl_log_packet(struct net *net,
+ u_int8_t pf,
unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
@@ -592,13 +616,14 @@ nfulnl_log_packet(u_int8_t pf,
const struct nf_loginfo *li;
unsigned int qthreshold;
unsigned int plen;
+ struct nfnl_log_net *log = nfnl_log_pernet(net);
if (li_user && li_user->type == NF_LOG_TYPE_ULOG)
li = li_user;
else
li = &default_loginfo;
- inst = instance_lookup_get(li->u.ulog.group);
+ inst = instance_lookup_get(log, li->u.ulog.group);
if (!inst)
return;
@@ -609,7 +634,7 @@ nfulnl_log_packet(u_int8_t pf,
/* FIXME: do we want to make the size calculation conditional based on
* what is actually present? way more branches and checks, but more
* memory efficient... */
- size = NLMSG_SPACE(sizeof(struct nfgenmsg))
+ size = nlmsg_total_size(sizeof(struct nfgenmsg))
+ nla_total_size(sizeof(struct nfulnl_msg_packet_hdr))
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
@@ -673,14 +698,15 @@ nfulnl_log_packet(u_int8_t pf,
}
if (!inst->skb) {
- inst->skb = nfulnl_alloc_skb(inst->nlbufsiz, size);
+ inst->skb = nfulnl_alloc_skb(inst->peer_portid, inst->nlbufsiz,
+ size);
if (!inst->skb)
goto alloc_failure;
}
inst->qlen++;
- __build_packet_message(inst, skb, data_len, pf,
+ __build_packet_message(log, inst, skb, data_len, pf,
hooknum, in, out, prefix, plen);
if (inst->qlen >= qthreshold)
@@ -709,24 +735,24 @@ nfulnl_rcv_nl_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct netlink_notify *n = ptr;
+ struct nfnl_log_net *log = nfnl_log_pernet(n->net);
if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
int i;
/* destroy all instances for this portid */
- spin_lock_bh(&instances_lock);
+ spin_lock_bh(&log->instances_lock);
for (i = 0; i < INSTANCE_BUCKETS; i++) {
struct hlist_node *t2;
struct nfulnl_instance *inst;
- struct hlist_head *head = &instance_table[i];
+ struct hlist_head *head = &log->instance_table[i];
hlist_for_each_entry_safe(inst, t2, head, hlist) {
- if ((net_eq(n->net, &init_net)) &&
- (n->portid == inst->peer_portid))
+ if (n->portid == inst->peer_portid)
__instance_destroy(inst);
}
}
- spin_unlock_bh(&instances_lock);
+ spin_unlock_bh(&log->instances_lock);
}
return NOTIFY_DONE;
}
@@ -767,6 +793,8 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
u_int16_t group_num = ntohs(nfmsg->res_id);
struct nfulnl_instance *inst;
struct nfulnl_msg_config_cmd *cmd = NULL;
+ struct net *net = sock_net(ctnl);
+ struct nfnl_log_net *log = nfnl_log_pernet(net);
int ret = 0;
if (nfula[NFULA_CFG_CMD]) {
@@ -776,14 +804,14 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
/* Commands without queue context */
switch (cmd->command) {
case NFULNL_CFG_CMD_PF_BIND:
- return nf_log_bind_pf(pf, &nfulnl_logger);
+ return nf_log_bind_pf(net, pf, &nfulnl_logger);
case NFULNL_CFG_CMD_PF_UNBIND:
- nf_log_unbind_pf(pf);
+ nf_log_unbind_pf(net, pf);
return 0;
}
}
- inst = instance_lookup_get(group_num);
+ inst = instance_lookup_get(log, group_num);
if (inst && inst->peer_portid != NETLINK_CB(skb).portid) {
ret = -EPERM;
goto out_put;
@@ -797,9 +825,9 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
goto out_put;
}
- inst = instance_create(group_num,
+ inst = instance_create(net, group_num,
NETLINK_CB(skb).portid,
- sk_user_ns(NETLINK_CB(skb).ssk));
+ sk_user_ns(NETLINK_CB(skb).sk));
if (IS_ERR(inst)) {
ret = PTR_ERR(inst);
goto out;
@@ -811,7 +839,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
goto out;
}
- instance_destroy(inst);
+ instance_destroy(log, inst);
goto out_put;
default:
ret = -ENOTSUPP;
@@ -894,55 +922,68 @@ static const struct nfnetlink_subsystem nfulnl_subsys = {
#ifdef CONFIG_PROC_FS
struct iter_state {
+ struct seq_net_private p;
unsigned int bucket;
};
-static struct hlist_node *get_first(struct iter_state *st)
+static struct hlist_node *get_first(struct net *net, struct iter_state *st)
{
+ struct nfnl_log_net *log;
if (!st)
return NULL;
+ log = nfnl_log_pernet(net);
+
for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
- if (!hlist_empty(&instance_table[st->bucket]))
- return rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
+ struct hlist_head *head = &log->instance_table[st->bucket];
+
+ if (!hlist_empty(head))
+ return rcu_dereference_bh(hlist_first_rcu(head));
}
return NULL;
}
-static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h)
+static struct hlist_node *get_next(struct net *net, struct iter_state *st,
+ struct hlist_node *h)
{
h = rcu_dereference_bh(hlist_next_rcu(h));
while (!h) {
+ struct nfnl_log_net *log;
+ struct hlist_head *head;
+
if (++st->bucket >= INSTANCE_BUCKETS)
return NULL;
- h = rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
+ log = nfnl_log_pernet(net);
+ head = &log->instance_table[st->bucket];
+ h = rcu_dereference_bh(hlist_first_rcu(head));
}
return h;
}
-static struct hlist_node *get_idx(struct iter_state *st, loff_t pos)
+static struct hlist_node *get_idx(struct net *net, struct iter_state *st,
+ loff_t pos)
{
struct hlist_node *head;
- head = get_first(st);
+ head = get_first(net, st);
if (head)
- while (pos && (head = get_next(st, head)))
+ while (pos && (head = get_next(net, st, head)))
pos--;
return pos ? NULL : head;
}
-static void *seq_start(struct seq_file *seq, loff_t *pos)
+static void *seq_start(struct seq_file *s, loff_t *pos)
__acquires(rcu_bh)
{
rcu_read_lock_bh();
- return get_idx(seq->private, *pos);
+ return get_idx(seq_file_net(s), s->private, *pos);
}
static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
{
(*pos)++;
- return get_next(s->private, v);
+ return get_next(seq_file_net(s), s->private, v);
}
static void seq_stop(struct seq_file *s, void *v)
@@ -971,8 +1012,8 @@ static const struct seq_operations nful_seq_ops = {
static int nful_open(struct inode *inode, struct file *file)
{
- return seq_open_private(file, &nful_seq_ops,
- sizeof(struct iter_state));
+ return seq_open_net(inode, file, &nful_seq_ops,
+ sizeof(struct iter_state));
}
static const struct file_operations nful_file_ops = {
@@ -980,17 +1021,45 @@ static const struct file_operations nful_file_ops = {
.open = nful_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
+ .release = seq_release_net,
};
#endif /* PROC_FS */
-static int __init nfnetlink_log_init(void)
+static int __net_init nfnl_log_net_init(struct net *net)
{
- int i, status = -ENOMEM;
+ unsigned int i;
+ struct nfnl_log_net *log = nfnl_log_pernet(net);
for (i = 0; i < INSTANCE_BUCKETS; i++)
- INIT_HLIST_HEAD(&instance_table[i]);
+ INIT_HLIST_HEAD(&log->instance_table[i]);
+ spin_lock_init(&log->instances_lock);
+
+#ifdef CONFIG_PROC_FS
+ if (!proc_create("nfnetlink_log", 0440,
+ net->nf.proc_netfilter, &nful_file_ops))
+ return -ENOMEM;
+#endif
+ return 0;
+}
+
+static void __net_exit nfnl_log_net_exit(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+ remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter);
+#endif
+}
+
+static struct pernet_operations nfnl_log_net_ops = {
+ .init = nfnl_log_net_init,
+ .exit = nfnl_log_net_exit,
+ .id = &nfnl_log_net_id,
+ .size = sizeof(struct nfnl_log_net),
+};
+
+static int __init nfnetlink_log_init(void)
+{
+ int status = -ENOMEM;
/* it's not really all that important to have a random value, so
* we can do this from the init function, even if there hasn't
@@ -1000,29 +1069,25 @@ static int __init nfnetlink_log_init(void)
netlink_register_notifier(&nfulnl_rtnl_notifier);
status = nfnetlink_subsys_register(&nfulnl_subsys);
if (status < 0) {
- printk(KERN_ERR "log: failed to create netlink socket\n");
+ pr_err("log: failed to create netlink socket\n");
goto cleanup_netlink_notifier;
}
status = nf_log_register(NFPROTO_UNSPEC, &nfulnl_logger);
if (status < 0) {
- printk(KERN_ERR "log: failed to register logger\n");
+ pr_err("log: failed to register logger\n");
goto cleanup_subsys;
}
-#ifdef CONFIG_PROC_FS
- if (!proc_create("nfnetlink_log", 0440,
- proc_net_netfilter, &nful_file_ops)) {
- status = -ENOMEM;
+ status = register_pernet_subsys(&nfnl_log_net_ops);
+ if (status < 0) {
+ pr_err("log: failed to register pernet ops\n");
goto cleanup_logger;
}
-#endif
return status;
-#ifdef CONFIG_PROC_FS
cleanup_logger:
nf_log_unregister(&nfulnl_logger);
-#endif
cleanup_subsys:
nfnetlink_subsys_unregister(&nfulnl_subsys);
cleanup_netlink_notifier:
@@ -1032,10 +1097,8 @@ cleanup_netlink_notifier:
static void __exit nfnetlink_log_fini(void)
{
+ unregister_pernet_subsys(&nfnl_log_net_ops);
nf_log_unregister(&nfulnl_logger);
-#ifdef CONFIG_PROC_FS
- remove_proc_entry("nfnetlink_log", proc_net_netfilter);
-#endif
nfnetlink_subsys_unregister(&nfulnl_subsys);
netlink_unregister_notifier(&nfulnl_rtnl_notifier);
}
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 42680b2baa11..5352b2d2d5bf 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -30,6 +30,7 @@
#include <linux/list.h>
#include <net/sock.h>
#include <net/netfilter/nf_queue.h>
+#include <net/netns/generic.h>
#include <net/netfilter/nfnetlink_queue.h>
#include <linux/atomic.h>
@@ -66,23 +67,31 @@ struct nfqnl_instance {
typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
-static DEFINE_SPINLOCK(instances_lock);
+static int nfnl_queue_net_id __read_mostly;
#define INSTANCE_BUCKETS 16
-static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly;
+struct nfnl_queue_net {
+ spinlock_t instances_lock;
+ struct hlist_head instance_table[INSTANCE_BUCKETS];
+};
+
+static struct nfnl_queue_net *nfnl_queue_pernet(struct net *net)
+{
+ return net_generic(net, nfnl_queue_net_id);
+}
static inline u_int8_t instance_hashfn(u_int16_t queue_num)
{
- return ((queue_num >> 8) | queue_num) % INSTANCE_BUCKETS;
+ return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS;
}
static struct nfqnl_instance *
-instance_lookup(u_int16_t queue_num)
+instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num)
{
struct hlist_head *head;
struct nfqnl_instance *inst;
- head = &instance_table[instance_hashfn(queue_num)];
+ head = &q->instance_table[instance_hashfn(queue_num)];
hlist_for_each_entry_rcu(inst, head, hlist) {
if (inst->queue_num == queue_num)
return inst;
@@ -91,14 +100,15 @@ instance_lookup(u_int16_t queue_num)
}
static struct nfqnl_instance *
-instance_create(u_int16_t queue_num, int portid)
+instance_create(struct nfnl_queue_net *q, u_int16_t queue_num,
+ int portid)
{
struct nfqnl_instance *inst;
unsigned int h;
int err;
- spin_lock(&instances_lock);
- if (instance_lookup(queue_num)) {
+ spin_lock(&q->instances_lock);
+ if (instance_lookup(q, queue_num)) {
err = -EEXIST;
goto out_unlock;
}
@@ -123,16 +133,16 @@ instance_create(u_int16_t queue_num, int portid)
}
h = instance_hashfn(queue_num);
- hlist_add_head_rcu(&inst->hlist, &instance_table[h]);
+ hlist_add_head_rcu(&inst->hlist, &q->instance_table[h]);
- spin_unlock(&instances_lock);
+ spin_unlock(&q->instances_lock);
return inst;
out_free:
kfree(inst);
out_unlock:
- spin_unlock(&instances_lock);
+ spin_unlock(&q->instances_lock);
return ERR_PTR(err);
}
@@ -158,11 +168,11 @@ __instance_destroy(struct nfqnl_instance *inst)
}
static void
-instance_destroy(struct nfqnl_instance *inst)
+instance_destroy(struct nfnl_queue_net *q, struct nfqnl_instance *inst)
{
- spin_lock(&instances_lock);
+ spin_lock(&q->instances_lock);
__instance_destroy(inst);
- spin_unlock(&instances_lock);
+ spin_unlock(&q->instances_lock);
}
static inline void
@@ -217,14 +227,71 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
spin_unlock_bh(&queue->lock);
}
+static void
+nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen)
+{
+ int i, j = 0;
+ int plen = 0; /* length of skb->head fragment */
+ struct page *page;
+ unsigned int offset;
+
+ /* dont bother with small payloads */
+ if (len <= skb_tailroom(to)) {
+ skb_copy_bits(from, 0, skb_put(to, len), len);
+ return;
+ }
+
+ if (hlen) {
+ skb_copy_bits(from, 0, skb_put(to, hlen), hlen);
+ len -= hlen;
+ } else {
+ plen = min_t(int, skb_headlen(from), len);
+ if (plen) {
+ page = virt_to_head_page(from->head);
+ offset = from->data - (unsigned char *)page_address(page);
+ __skb_fill_page_desc(to, 0, page, offset, plen);
+ get_page(page);
+ j = 1;
+ len -= plen;
+ }
+ }
+
+ to->truesize += len + plen;
+ to->len += len + plen;
+ to->data_len += len + plen;
+
+ for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
+ if (!len)
+ break;
+ skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
+ skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len);
+ len -= skb_shinfo(to)->frags[j].size;
+ skb_frag_ref(to, j);
+ j++;
+ }
+ skb_shinfo(to)->nr_frags = j;
+}
+
+static int nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet)
+{
+ __u32 flags = 0;
+
+ if (packet->ip_summed == CHECKSUM_PARTIAL)
+ flags = NFQA_SKB_CSUMNOTREADY;
+ if (skb_is_gso(packet))
+ flags |= NFQA_SKB_GSO;
+
+ return flags ? nla_put_be32(nlskb, NFQA_SKB_INFO, htonl(flags)) : 0;
+}
+
static struct sk_buff *
nfqnl_build_packet_message(struct nfqnl_instance *queue,
struct nf_queue_entry *entry,
__be32 **packet_id_ptr)
{
- sk_buff_data_t old_tail;
size_t size;
size_t data_len = 0, cap_len = 0;
+ int hlen = 0;
struct sk_buff *skb;
struct nlattr *nla;
struct nfqnl_msg_packet_hdr *pmsg;
@@ -236,7 +303,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
struct nf_conn *ct = NULL;
enum ip_conntrack_info uninitialized_var(ctinfo);
- size = NLMSG_SPACE(sizeof(struct nfgenmsg))
+ size = nlmsg_total_size(sizeof(struct nfgenmsg))
+ nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
@@ -246,8 +313,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
#endif
+ nla_total_size(sizeof(u_int32_t)) /* mark */
+ nla_total_size(sizeof(struct nfqnl_msg_packet_hw))
- + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)
- + nla_total_size(sizeof(u_int32_t))); /* cap_len */
+ + nla_total_size(sizeof(u_int32_t)) /* skbinfo */
+ + nla_total_size(sizeof(u_int32_t)); /* cap_len */
+
+ if (entskb->tstamp.tv64)
+ size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
outdev = entry->outdev;
@@ -257,7 +327,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
break;
case NFQNL_COPY_PACKET:
- if (entskb->ip_summed == CHECKSUM_PARTIAL &&
+ if (!(queue->flags & NFQA_CFG_F_GSO) &&
+ entskb->ip_summed == CHECKSUM_PARTIAL &&
skb_checksum_help(entskb))
return NULL;
@@ -265,7 +336,16 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
if (data_len == 0 || data_len > entskb->len)
data_len = entskb->len;
- size += nla_total_size(data_len);
+
+ if (!entskb->head_frag ||
+ skb_headlen(entskb) < L1_CACHE_BYTES ||
+ skb_shinfo(entskb)->nr_frags >= MAX_SKB_FRAGS)
+ hlen = skb_headlen(entskb);
+
+ if (skb_has_frag_list(entskb))
+ hlen = entskb->len;
+ hlen = min_t(int, data_len, hlen);
+ size += sizeof(struct nlattr) + hlen;
cap_len = entskb->len;
break;
}
@@ -273,11 +353,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
if (queue->flags & NFQA_CFG_F_CONNTRACK)
ct = nfqnl_ct_get(entskb, &size, &ctinfo);
- skb = alloc_skb(size, GFP_ATOMIC);
+ skb = nfnetlink_alloc_skb(&init_net, size, queue->peer_portid,
+ GFP_ATOMIC);
if (!skb)
return NULL;
- old_tail = skb->tail;
nlh = nlmsg_put(skb, 0, 0,
NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
sizeof(struct nfgenmsg), 0);
@@ -382,31 +462,29 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
goto nla_put_failure;
}
+ if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
+ goto nla_put_failure;
+
+ if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
+ goto nla_put_failure;
+
+ if (nfqnl_put_packet_info(skb, entskb))
+ goto nla_put_failure;
+
if (data_len) {
struct nlattr *nla;
- int sz = nla_attr_size(data_len);
- if (skb_tailroom(skb) < nla_total_size(data_len)) {
- printk(KERN_WARNING "nf_queue: no tailroom!\n");
- kfree_skb(skb);
- return NULL;
- }
+ if (skb_tailroom(skb) < sizeof(*nla) + hlen)
+ goto nla_put_failure;
- nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len));
+ nla = (struct nlattr *)skb_put(skb, sizeof(*nla));
nla->nla_type = NFQA_PAYLOAD;
- nla->nla_len = sz;
+ nla->nla_len = nla_attr_size(data_len);
- if (skb_copy_bits(entskb, 0, nla_data(nla), data_len))
- BUG();
+ nfqnl_zcopy(skb, entskb, data_len, hlen);
}
- if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
- goto nla_put_failure;
-
- if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
- goto nla_put_failure;
-
- nlh->nlmsg_len = skb->tail - old_tail;
+ nlh->nlmsg_len = skb->len;
return skb;
nla_put_failure:
@@ -416,26 +494,14 @@ nla_put_failure:
}
static int
-nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
+__nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
+ struct nf_queue_entry *entry)
{
struct sk_buff *nskb;
- struct nfqnl_instance *queue;
int err = -ENOBUFS;
__be32 *packet_id_ptr;
int failopen = 0;
- /* rcu_read_lock()ed by nf_hook_slow() */
- queue = instance_lookup(queuenum);
- if (!queue) {
- err = -ESRCH;
- goto err_out;
- }
-
- if (queue->copy_mode == NFQNL_COPY_NONE) {
- err = -EINVAL;
- goto err_out;
- }
-
nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr);
if (nskb == NULL) {
err = -ENOMEM;
@@ -462,7 +528,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
*packet_id_ptr = htonl(entry->id);
/* nfnetlink_unicast will either free the nskb or add it to a socket */
- err = nfnetlink_unicast(nskb, &init_net, queue->peer_portid, MSG_DONTWAIT);
+ err = nfnetlink_unicast(nskb, net, queue->peer_portid, MSG_DONTWAIT);
if (err < 0) {
queue->queue_user_dropped++;
goto err_out_unlock;
@@ -483,6 +549,141 @@ err_out:
return err;
}
+static struct nf_queue_entry *
+nf_queue_entry_dup(struct nf_queue_entry *e)
+{
+ struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
+ if (entry) {
+ if (nf_queue_entry_get_refs(entry))
+ return entry;
+ kfree(entry);
+ }
+ return NULL;
+}
+
+#ifdef CONFIG_BRIDGE_NETFILTER
+/* When called from bridge netfilter, skb->data must point to MAC header
+ * before calling skb_gso_segment(). Else, original MAC header is lost
+ * and segmented skbs will be sent to wrong destination.
+ */
+static void nf_bridge_adjust_skb_data(struct sk_buff *skb)
+{
+ if (skb->nf_bridge)
+ __skb_push(skb, skb->network_header - skb->mac_header);
+}
+
+static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
+{
+ if (skb->nf_bridge)
+ __skb_pull(skb, skb->network_header - skb->mac_header);
+}
+#else
+#define nf_bridge_adjust_skb_data(s) do {} while (0)
+#define nf_bridge_adjust_segmented_data(s) do {} while (0)
+#endif
+
+static void free_entry(struct nf_queue_entry *entry)
+{
+ nf_queue_entry_release_refs(entry);
+ kfree(entry);
+}
+
+static int
+__nfqnl_enqueue_packet_gso(struct net *net, struct nfqnl_instance *queue,
+ struct sk_buff *skb, struct nf_queue_entry *entry)
+{
+ int ret = -ENOMEM;
+ struct nf_queue_entry *entry_seg;
+
+ nf_bridge_adjust_segmented_data(skb);
+
+ if (skb->next == NULL) { /* last packet, no need to copy entry */
+ struct sk_buff *gso_skb = entry->skb;
+ entry->skb = skb;
+ ret = __nfqnl_enqueue_packet(net, queue, entry);
+ if (ret)
+ entry->skb = gso_skb;
+ return ret;
+ }
+
+ skb->next = NULL;
+
+ entry_seg = nf_queue_entry_dup(entry);
+ if (entry_seg) {
+ entry_seg->skb = skb;
+ ret = __nfqnl_enqueue_packet(net, queue, entry_seg);
+ if (ret)
+ free_entry(entry_seg);
+ }
+ return ret;
+}
+
+static int
+nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
+{
+ unsigned int queued;
+ struct nfqnl_instance *queue;
+ struct sk_buff *skb, *segs;
+ int err = -ENOBUFS;
+ struct net *net = dev_net(entry->indev ?
+ entry->indev : entry->outdev);
+ struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+
+ /* rcu_read_lock()ed by nf_hook_slow() */
+ queue = instance_lookup(q, queuenum);
+ if (!queue)
+ return -ESRCH;
+
+ if (queue->copy_mode == NFQNL_COPY_NONE)
+ return -EINVAL;
+
+ skb = entry->skb;
+
+ switch (entry->pf) {
+ case NFPROTO_IPV4:
+ skb->protocol = htons(ETH_P_IP);
+ break;
+ case NFPROTO_IPV6:
+ skb->protocol = htons(ETH_P_IPV6);
+ break;
+ }
+
+ if ((queue->flags & NFQA_CFG_F_GSO) || !skb_is_gso(skb))
+ return __nfqnl_enqueue_packet(net, queue, entry);
+
+ nf_bridge_adjust_skb_data(skb);
+ segs = skb_gso_segment(skb, 0);
+ /* Does not use PTR_ERR to limit the number of error codes that can be
+ * returned by nf_queue. For instance, callers rely on -ECANCELED to
+ * mean 'ignore this hook'.
+ */
+ if (IS_ERR(segs))
+ goto out_err;
+ queued = 0;
+ err = 0;
+ do {
+ struct sk_buff *nskb = segs->next;
+ if (err == 0)
+ err = __nfqnl_enqueue_packet_gso(net, queue,
+ segs, entry);
+ if (err == 0)
+ queued++;
+ else
+ kfree_skb(segs);
+ segs = nskb;
+ } while (segs);
+
+ if (queued) {
+ if (err) /* some segments are already queued */
+ free_entry(entry);
+ kfree_skb(skb);
+ return 0;
+ }
+ out_err:
+ nf_bridge_adjust_segmented_data(skb);
+ return err;
+}
+
static int
nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
{
@@ -575,15 +776,16 @@ dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
/* drop all packets with either indev or outdev == ifindex from all queue
* instances */
static void
-nfqnl_dev_drop(int ifindex)
+nfqnl_dev_drop(struct net *net, int ifindex)
{
int i;
+ struct nfnl_queue_net *q = nfnl_queue_pernet(net);
rcu_read_lock();
for (i = 0; i < INSTANCE_BUCKETS; i++) {
struct nfqnl_instance *inst;
- struct hlist_head *head = &instance_table[i];
+ struct hlist_head *head = &q->instance_table[i];
hlist_for_each_entry_rcu(inst, head, hlist)
nfqnl_flush(inst, dev_cmp, ifindex);
@@ -600,12 +802,9 @@ nfqnl_rcv_dev_event(struct notifier_block *this,
{
struct net_device *dev = ptr;
- if (!net_eq(dev_net(dev), &init_net))
- return NOTIFY_DONE;
-
/* Drop any packets associated with the downed device */
if (event == NETDEV_DOWN)
- nfqnl_dev_drop(dev->ifindex);
+ nfqnl_dev_drop(dev_net(dev), dev->ifindex);
return NOTIFY_DONE;
}
@@ -618,24 +817,24 @@ nfqnl_rcv_nl_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct netlink_notify *n = ptr;
+ struct nfnl_queue_net *q = nfnl_queue_pernet(n->net);
if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
int i;
/* destroy all instances for this portid */
- spin_lock(&instances_lock);
+ spin_lock(&q->instances_lock);
for (i = 0; i < INSTANCE_BUCKETS; i++) {
struct hlist_node *t2;
struct nfqnl_instance *inst;
- struct hlist_head *head = &instance_table[i];
+ struct hlist_head *head = &q->instance_table[i];
hlist_for_each_entry_safe(inst, t2, head, hlist) {
- if ((n->net == &init_net) &&
- (n->portid == inst->peer_portid))
+ if (n->portid == inst->peer_portid)
__instance_destroy(inst);
}
}
- spin_unlock(&instances_lock);
+ spin_unlock(&q->instances_lock);
}
return NOTIFY_DONE;
}
@@ -656,11 +855,12 @@ static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
[NFQA_MARK] = { .type = NLA_U32 },
};
-static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlportid)
+static struct nfqnl_instance *
+verdict_instance_lookup(struct nfnl_queue_net *q, u16 queue_num, int nlportid)
{
struct nfqnl_instance *queue;
- queue = instance_lookup(queue_num);
+ queue = instance_lookup(q, queue_num);
if (!queue)
return ERR_PTR(-ENODEV);
@@ -704,7 +904,11 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb,
LIST_HEAD(batch_list);
u16 queue_num = ntohs(nfmsg->res_id);
- queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid);
+ struct net *net = sock_net(ctnl);
+ struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+
+ queue = verdict_instance_lookup(q, queue_num,
+ NETLINK_CB(skb).portid);
if (IS_ERR(queue))
return PTR_ERR(queue);
@@ -752,10 +956,13 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
enum ip_conntrack_info uninitialized_var(ctinfo);
struct nf_conn *ct = NULL;
- queue = instance_lookup(queue_num);
- if (!queue)
+ struct net *net = sock_net(ctnl);
+ struct nfnl_queue_net *q = nfnl_queue_pernet(net);
- queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid);
+ queue = instance_lookup(q, queue_num);
+ if (!queue)
+ queue = verdict_instance_lookup(q, queue_num,
+ NETLINK_CB(skb).portid);
if (IS_ERR(queue))
return PTR_ERR(queue);
@@ -819,6 +1026,8 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
u_int16_t queue_num = ntohs(nfmsg->res_id);
struct nfqnl_instance *queue;
struct nfqnl_msg_config_cmd *cmd = NULL;
+ struct net *net = sock_net(ctnl);
+ struct nfnl_queue_net *q = nfnl_queue_pernet(net);
int ret = 0;
if (nfqa[NFQA_CFG_CMD]) {
@@ -832,7 +1041,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
}
rcu_read_lock();
- queue = instance_lookup(queue_num);
+ queue = instance_lookup(q, queue_num);
if (queue && queue->peer_portid != NETLINK_CB(skb).portid) {
ret = -EPERM;
goto err_out_unlock;
@@ -845,7 +1054,8 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
ret = -EBUSY;
goto err_out_unlock;
}
- queue = instance_create(queue_num, NETLINK_CB(skb).portid);
+ queue = instance_create(q, queue_num,
+ NETLINK_CB(skb).portid);
if (IS_ERR(queue)) {
ret = PTR_ERR(queue);
goto err_out_unlock;
@@ -856,7 +1066,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
ret = -ENODEV;
goto err_out_unlock;
}
- instance_destroy(queue);
+ instance_destroy(q, queue);
break;
case NFQNL_CFG_CMD_PF_BIND:
case NFQNL_CFG_CMD_PF_UNBIND:
@@ -950,19 +1160,24 @@ static const struct nfnetlink_subsystem nfqnl_subsys = {
#ifdef CONFIG_PROC_FS
struct iter_state {
+ struct seq_net_private p;
unsigned int bucket;
};
static struct hlist_node *get_first(struct seq_file *seq)
{
struct iter_state *st = seq->private;
+ struct net *net;
+ struct nfnl_queue_net *q;
if (!st)
return NULL;
+ net = seq_file_net(seq);
+ q = nfnl_queue_pernet(net);
for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
- if (!hlist_empty(&instance_table[st->bucket]))
- return instance_table[st->bucket].first;
+ if (!hlist_empty(&q->instance_table[st->bucket]))
+ return q->instance_table[st->bucket].first;
}
return NULL;
}
@@ -970,13 +1185,17 @@ static struct hlist_node *get_first(struct seq_file *seq)
static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)
{
struct iter_state *st = seq->private;
+ struct net *net = seq_file_net(seq);
h = h->next;
while (!h) {
+ struct nfnl_queue_net *q;
+
if (++st->bucket >= INSTANCE_BUCKETS)
return NULL;
- h = instance_table[st->bucket].first;
+ q = nfnl_queue_pernet(net);
+ h = q->instance_table[st->bucket].first;
}
return h;
}
@@ -992,11 +1211,11 @@ static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
return pos ? NULL : head;
}
-static void *seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(instances_lock)
+static void *seq_start(struct seq_file *s, loff_t *pos)
+ __acquires(nfnl_queue_pernet(seq_file_net(s))->instances_lock)
{
- spin_lock(&instances_lock);
- return get_idx(seq, *pos);
+ spin_lock(&nfnl_queue_pernet(seq_file_net(s))->instances_lock);
+ return get_idx(s, *pos);
}
static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
@@ -1006,9 +1225,9 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
}
static void seq_stop(struct seq_file *s, void *v)
- __releases(instances_lock)
+ __releases(nfnl_queue_pernet(seq_file_net(s))->instances_lock)
{
- spin_unlock(&instances_lock);
+ spin_unlock(&nfnl_queue_pernet(seq_file_net(s))->instances_lock);
}
static int seq_show(struct seq_file *s, void *v)
@@ -1032,7 +1251,7 @@ static const struct seq_operations nfqnl_seq_ops = {
static int nfqnl_open(struct inode *inode, struct file *file)
{
- return seq_open_private(file, &nfqnl_seq_ops,
+ return seq_open_net(inode, file, &nfqnl_seq_ops,
sizeof(struct iter_state));
}
@@ -1041,41 +1260,65 @@ static const struct file_operations nfqnl_file_ops = {
.open = nfqnl_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
+ .release = seq_release_net,
};
#endif /* PROC_FS */
-static int __init nfnetlink_queue_init(void)
+static int __net_init nfnl_queue_net_init(struct net *net)
{
- int i, status = -ENOMEM;
+ unsigned int i;
+ struct nfnl_queue_net *q = nfnl_queue_pernet(net);
for (i = 0; i < INSTANCE_BUCKETS; i++)
- INIT_HLIST_HEAD(&instance_table[i]);
+ INIT_HLIST_HEAD(&q->instance_table[i]);
+
+ spin_lock_init(&q->instances_lock);
+
+#ifdef CONFIG_PROC_FS
+ if (!proc_create("nfnetlink_queue", 0440,
+ net->nf.proc_netfilter, &nfqnl_file_ops))
+ return -ENOMEM;
+#endif
+ return 0;
+}
+
+static void __net_exit nfnl_queue_net_exit(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+ remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter);
+#endif
+}
+
+static struct pernet_operations nfnl_queue_net_ops = {
+ .init = nfnl_queue_net_init,
+ .exit = nfnl_queue_net_exit,
+ .id = &nfnl_queue_net_id,
+ .size = sizeof(struct nfnl_queue_net),
+};
+
+static int __init nfnetlink_queue_init(void)
+{
+ int status = -ENOMEM;
netlink_register_notifier(&nfqnl_rtnl_notifier);
status = nfnetlink_subsys_register(&nfqnl_subsys);
if (status < 0) {
- printk(KERN_ERR "nf_queue: failed to create netlink socket\n");
+ pr_err("nf_queue: failed to create netlink socket\n");
goto cleanup_netlink_notifier;
}
-#ifdef CONFIG_PROC_FS
- if (!proc_create("nfnetlink_queue", 0440,
- proc_net_netfilter, &nfqnl_file_ops)) {
- status = -ENOMEM;
+ status = register_pernet_subsys(&nfnl_queue_net_ops);
+ if (status < 0) {
+ pr_err("nf_queue: failed to register pernet ops\n");
goto cleanup_subsys;
}
-#endif
-
register_netdevice_notifier(&nfqnl_dev_notifier);
nf_register_queue_handler(&nfqh);
return status;
-#ifdef CONFIG_PROC_FS
cleanup_subsys:
nfnetlink_subsys_unregister(&nfqnl_subsys);
-#endif
cleanup_netlink_notifier:
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
return status;
@@ -1085,9 +1328,7 @@ static void __exit nfnetlink_queue_fini(void)
{
nf_unregister_queue_handler();
unregister_netdevice_notifier(&nfqnl_dev_notifier);
-#ifdef CONFIG_PROC_FS
- remove_proc_entry("nfnetlink_queue", proc_net_netfilter);
-#endif
+ unregister_pernet_subsys(&nfnl_queue_net_ops);
nfnetlink_subsys_unregister(&nfqnl_subsys);
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 686c7715d777..8b03028cca69 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -2,6 +2,7 @@
* x_tables core - Backend for {ip,ip6,arp}_tables
*
* Copyright (C) 2006-2006 Harald Welte <laforge@netfilter.org>
+ * Copyright (C) 2006-2012 Patrick McHardy <kaber@trash.net>
*
* Based on existing ip_tables code which is
* Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
@@ -999,7 +1000,7 @@ static int xt_table_open(struct inode *inode, struct file *file)
sizeof(struct xt_names_priv));
if (!ret) {
priv = ((struct seq_file *)file->private_data)->private;
- priv->af = (unsigned long)PDE(inode)->data;
+ priv->af = (unsigned long)PDE_DATA(inode);
}
return ret;
}
@@ -1147,7 +1148,7 @@ static int xt_match_open(struct inode *inode, struct file *file)
seq = file->private_data;
seq->private = trav;
- trav->nfproto = (unsigned long)PDE(inode)->data;
+ trav->nfproto = (unsigned long)PDE_DATA(inode);
return 0;
}
@@ -1211,7 +1212,7 @@ static int xt_target_open(struct inode *inode, struct file *file)
seq = file->private_data;
seq->private = trav;
- trav->nfproto = (unsigned long)PDE(inode)->data;
+ trav->nfproto = (unsigned long)PDE_DATA(inode);
return 0;
}
diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c
index fa40096940a1..5ab24843370a 100644
--- a/net/netfilter/xt_LOG.c
+++ b/net/netfilter/xt_LOG.c
@@ -466,7 +466,8 @@ log_packet_common(struct sbuff *m,
static void
-ipt_log_packet(u_int8_t pf,
+ipt_log_packet(struct net *net,
+ u_int8_t pf,
unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
@@ -474,7 +475,13 @@ ipt_log_packet(u_int8_t pf,
const struct nf_loginfo *loginfo,
const char *prefix)
{
- struct sbuff *m = sb_open();
+ struct sbuff *m;
+
+ /* FIXME: Disabled from containers until syslog ns is supported */
+ if (!net_eq(net, &init_net))
+ return;
+
+ m = sb_open();
if (!loginfo)
loginfo = &default_loginfo;
@@ -730,7 +737,7 @@ static void dump_ipv6_packet(struct sbuff *m,
dump_sk_uid_gid(m, skb->sk);
/* Max length: 16 "MARK=0xFFFFFFFF " */
- if (!recurse && skb->mark)
+ if (recurse && skb->mark)
sb_add(m, "MARK=0x%x ", skb->mark);
}
@@ -790,7 +797,8 @@ fallback:
}
static void
-ip6t_log_packet(u_int8_t pf,
+ip6t_log_packet(struct net *net,
+ u_int8_t pf,
unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
@@ -798,7 +806,13 @@ ip6t_log_packet(u_int8_t pf,
const struct nf_loginfo *loginfo,
const char *prefix)
{
- struct sbuff *m = sb_open();
+ struct sbuff *m;
+
+ /* FIXME: Disabled from containers until syslog ns is supported */
+ if (!net_eq(net, &init_net))
+ return;
+
+ m = sb_open();
if (!loginfo)
loginfo = &default_loginfo;
@@ -819,17 +833,18 @@ log_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_log_info *loginfo = par->targinfo;
struct nf_loginfo li;
+ struct net *net = dev_net(par->in ? par->in : par->out);
li.type = NF_LOG_TYPE_LOG;
li.u.log.level = loginfo->level;
li.u.log.logflags = loginfo->logflags;
if (par->family == NFPROTO_IPV4)
- ipt_log_packet(NFPROTO_IPV4, par->hooknum, skb, par->in,
+ ipt_log_packet(net, NFPROTO_IPV4, par->hooknum, skb, par->in,
par->out, &li, loginfo->prefix);
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
else if (par->family == NFPROTO_IPV6)
- ip6t_log_packet(NFPROTO_IPV6, par->hooknum, skb, par->in,
+ ip6t_log_packet(net, NFPROTO_IPV6, par->hooknum, skb, par->in,
par->out, &li, loginfo->prefix);
#endif
else
@@ -893,23 +908,55 @@ static struct nf_logger ip6t_log_logger __read_mostly = {
};
#endif
+static int __net_init log_net_init(struct net *net)
+{
+ nf_log_set(net, NFPROTO_IPV4, &ipt_log_logger);
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+ nf_log_set(net, NFPROTO_IPV6, &ip6t_log_logger);
+#endif
+ return 0;
+}
+
+static void __net_exit log_net_exit(struct net *net)
+{
+ nf_log_unset(net, &ipt_log_logger);
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+ nf_log_unset(net, &ip6t_log_logger);
+#endif
+}
+
+static struct pernet_operations log_net_ops = {
+ .init = log_net_init,
+ .exit = log_net_exit,
+};
+
static int __init log_tg_init(void)
{
int ret;
+ ret = register_pernet_subsys(&log_net_ops);
+ if (ret < 0)
+ goto err_pernet;
+
ret = xt_register_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs));
if (ret < 0)
- return ret;
+ goto err_target;
nf_log_register(NFPROTO_IPV4, &ipt_log_logger);
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
nf_log_register(NFPROTO_IPV6, &ip6t_log_logger);
#endif
return 0;
+
+err_target:
+ unregister_pernet_subsys(&log_net_ops);
+err_pernet:
+ return ret;
}
static void __exit log_tg_exit(void)
{
+ unregister_pernet_subsys(&log_net_ops);
nf_log_unregister(&ipt_log_logger);
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
nf_log_unregister(&ip6t_log_logger);
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index a17dd0f589b2..fb7497c928a0 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -26,13 +26,14 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_nflog_info *info = par->targinfo;
struct nf_loginfo li;
+ struct net *net = dev_net(par->in ? par->in : par->out);
li.type = NF_LOG_TYPE_ULOG;
li.u.ulog.copy_len = info->len;
li.u.ulog.group = info->group;
li.u.ulog.qthreshold = info->threshold;
- nfulnl_log_packet(par->family, par->hooknum, skb, par->in,
+ nfulnl_log_packet(net, par->family, par->hooknum, skb, par->in,
par->out, &li, info->prefix);
return XT_CONTINUE;
}
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 817f9e9f2b16..1e2fae32f81b 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -76,22 +76,31 @@ static u32 hash_v6(const struct sk_buff *skb)
}
#endif
-static unsigned int
-nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
+static u32
+nfqueue_hash(const struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_NFQ_info_v1 *info = par->targinfo;
u32 queue = info->queuenum;
- if (info->queues_total > 1) {
- if (par->family == NFPROTO_IPV4)
- queue = (((u64) hash_v4(skb) * info->queues_total) >>
- 32) + queue;
+ if (par->family == NFPROTO_IPV4)
+ queue += ((u64) hash_v4(skb) * info->queues_total) >> 32;
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
- else if (par->family == NFPROTO_IPV6)
- queue = (((u64) hash_v6(skb) * info->queues_total) >>
- 32) + queue;
+ else if (par->family == NFPROTO_IPV6)
+ queue += ((u64) hash_v6(skb) * info->queues_total) >> 32;
#endif
- }
+
+ return queue;
+}
+
+static unsigned int
+nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_NFQ_info_v1 *info = par->targinfo;
+ u32 queue = info->queuenum;
+
+ if (info->queues_total > 1)
+ queue = nfqueue_hash(skb, par);
+
return NF_QUEUE_NR(queue);
}
@@ -108,7 +117,7 @@ nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)
static int nfqueue_tg_check(const struct xt_tgchk_param *par)
{
- const struct xt_NFQ_info_v2 *info = par->targinfo;
+ const struct xt_NFQ_info_v3 *info = par->targinfo;
u32 maxid;
if (unlikely(!rnd_inited)) {
@@ -125,11 +134,32 @@ static int nfqueue_tg_check(const struct xt_tgchk_param *par)
info->queues_total, maxid);
return -ERANGE;
}
- if (par->target->revision == 2 && info->bypass > 1)
+ if (par->target->revision == 2 && info->flags > 1)
return -EINVAL;
+ if (par->target->revision == 3 && info->flags & ~NFQ_FLAG_MASK)
+ return -EINVAL;
+
return 0;
}
+static unsigned int
+nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_NFQ_info_v3 *info = par->targinfo;
+ u32 queue = info->queuenum;
+
+ if (info->queues_total > 1) {
+ if (info->flags & NFQ_FLAG_CPU_FANOUT) {
+ int cpu = smp_processor_id();
+
+ queue = info->queuenum + cpu % info->queues_total;
+ } else
+ queue = nfqueue_hash(skb, par);
+ }
+
+ return NF_QUEUE_NR(queue);
+}
+
static struct xt_target nfqueue_tg_reg[] __read_mostly = {
{
.name = "NFQUEUE",
@@ -156,6 +186,15 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
.targetsize = sizeof(struct xt_NFQ_info_v2),
.me = THIS_MODULE,
},
+ {
+ .name = "NFQUEUE",
+ .revision = 3,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = nfqueue_tg_check,
+ .target = nfqueue_tg_v3,
+ .targetsize = sizeof(struct xt_NFQ_info_v3),
+ .me = THIS_MODULE,
+ },
};
static int __init nfqueue_tg_init(void)
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 71a266de5fb4..7011c71646f0 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -2,6 +2,7 @@
* This is a module which is used for setting the MSS option in TCP packets.
*
* Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
+ * Copyright (C) 2007 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -44,17 +45,22 @@ optlen(const u_int8_t *opt, unsigned int offset)
static int
tcpmss_mangle_packet(struct sk_buff *skb,
- const struct xt_tcpmss_info *info,
+ const struct xt_action_param *par,
unsigned int in_mtu,
unsigned int tcphoff,
unsigned int minlen)
{
+ const struct xt_tcpmss_info *info = par->targinfo;
struct tcphdr *tcph;
unsigned int tcplen, i;
__be16 oldval;
u16 newmss;
u8 *opt;
+ /* This is a fragment, no TCP header is available */
+ if (par->fragoff != 0)
+ return XT_CONTINUE;
+
if (!skb_make_writable(skb, skb->len))
return -1;
@@ -124,6 +130,18 @@ tcpmss_mangle_packet(struct sk_buff *skb,
skb_put(skb, TCPOLEN_MSS);
+ /*
+ * IPv4: RFC 1122 states "If an MSS option is not received at
+ * connection setup, TCP MUST assume a default send MSS of 536".
+ * IPv6: RFC 2460 states IPv6 has a minimum MTU of 1280 and a minimum
+ * length IPv6 header of 60, ergo the default MSS value is 1220
+ * Since no MSS was provided, we must use the default values
+ */
+ if (par->family == NFPROTO_IPV4)
+ newmss = min(newmss, (u16)536);
+ else
+ newmss = min(newmss, (u16)1220);
+
opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
@@ -181,7 +199,7 @@ tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par)
__be16 newlen;
int ret;
- ret = tcpmss_mangle_packet(skb, par->targinfo,
+ ret = tcpmss_mangle_packet(skb, par,
tcpmss_reverse_mtu(skb, PF_INET),
iph->ihl * 4,
sizeof(*iph) + sizeof(struct tcphdr));
@@ -210,7 +228,7 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr, &frag_off);
if (tcphoff < 0)
return NF_DROP;
- ret = tcpmss_mangle_packet(skb, par->targinfo,
+ ret = tcpmss_mangle_packet(skb, par,
tcpmss_reverse_mtu(skb, PF_INET6),
tcphoff,
sizeof(*ipv6h) + sizeof(struct tcphdr));
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
index 25fd1c4e1eec..b68fa191710f 100644
--- a/net/netfilter/xt_TCPOPTSTRIP.c
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -30,18 +30,31 @@ static inline unsigned int optlen(const u_int8_t *opt, unsigned int offset)
static unsigned int
tcpoptstrip_mangle_packet(struct sk_buff *skb,
- const struct xt_tcpoptstrip_target_info *info,
+ const struct xt_action_param *par,
unsigned int tcphoff, unsigned int minlen)
{
+ const struct xt_tcpoptstrip_target_info *info = par->targinfo;
unsigned int optl, i, j;
struct tcphdr *tcph;
u_int16_t n, o;
u_int8_t *opt;
+ int len;
+
+ /* This is a fragment, no TCP header is available */
+ if (par->fragoff != 0)
+ return XT_CONTINUE;
if (!skb_make_writable(skb, skb->len))
return NF_DROP;
+ len = skb->len - tcphoff;
+ if (len < (int)sizeof(struct tcphdr))
+ return NF_DROP;
+
tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
+ if (tcph->doff * 4 > len)
+ return NF_DROP;
+
opt = (u_int8_t *)tcph;
/*
@@ -76,7 +89,7 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb,
static unsigned int
tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_action_param *par)
{
- return tcpoptstrip_mangle_packet(skb, par->targinfo, ip_hdrlen(skb),
+ return tcpoptstrip_mangle_packet(skb, par, ip_hdrlen(skb),
sizeof(struct iphdr) + sizeof(struct tcphdr));
}
@@ -94,7 +107,7 @@ tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_action_param *par)
if (tcphoff < 0)
return NF_DROP;
- return tcpoptstrip_mangle_packet(skb, par->targinfo, tcphoff,
+ return tcpoptstrip_mangle_packet(skb, par, tcphoff,
sizeof(*ipv6h) + sizeof(struct tcphdr));
}
#endif
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index 49c5ff7f6dd6..68ff29f60867 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -22,6 +22,7 @@
#include <net/ip6_fib.h>
#endif
+#include <linux/netfilter_ipv6.h>
#include <linux/netfilter/xt_addrtype.h>
#include <linux/netfilter/x_tables.h>
@@ -33,12 +34,12 @@ MODULE_ALIAS("ip6t_addrtype");
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
- const struct in6_addr *addr)
+ const struct in6_addr *addr, u16 mask)
{
const struct nf_afinfo *afinfo;
struct flowi6 flow;
struct rt6_info *rt;
- u32 ret;
+ u32 ret = 0;
int route_err;
memset(&flow, 0, sizeof(flow));
@@ -49,12 +50,19 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
rcu_read_lock();
afinfo = nf_get_afinfo(NFPROTO_IPV6);
- if (afinfo != NULL)
+ if (afinfo != NULL) {
+ const struct nf_ipv6_ops *v6ops;
+
+ if (dev && (mask & XT_ADDRTYPE_LOCAL)) {
+ v6ops = nf_get_ipv6_ops();
+ if (v6ops && v6ops->chk_addr(net, addr, dev, true))
+ ret = XT_ADDRTYPE_LOCAL;
+ }
route_err = afinfo->route(net, (struct dst_entry **)&rt,
- flowi6_to_flowi(&flow), !!dev);
- else
+ flowi6_to_flowi(&flow), false);
+ } else {
route_err = 1;
-
+ }
rcu_read_unlock();
if (route_err)
@@ -62,15 +70,12 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
if (rt->rt6i_flags & RTF_REJECT)
ret = XT_ADDRTYPE_UNREACHABLE;
- else
- ret = 0;
- if (rt->rt6i_flags & RTF_LOCAL)
+ if (dev == NULL && rt->rt6i_flags & RTF_LOCAL)
ret |= XT_ADDRTYPE_LOCAL;
if (rt->rt6i_flags & RTF_ANYCAST)
ret |= XT_ADDRTYPE_ANYCAST;
-
dst_release(&rt->dst);
return ret;
}
@@ -90,7 +95,7 @@ static bool match_type6(struct net *net, const struct net_device *dev,
if ((XT_ADDRTYPE_LOCAL | XT_ADDRTYPE_ANYCAST |
XT_ADDRTYPE_UNREACHABLE) & mask)
- return !!(mask & match_lookup_rt6(net, dev, addr));
+ return !!(mask & match_lookup_rt6(net, dev, addr, mask));
return true;
}
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 61805d7b38aa..188404b9b002 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -3,6 +3,7 @@
* information. (Superset of Rusty's minimalistic state match.)
*
* (C) 2001 Marc Boucher (marc@mbsi.ca).
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
* Copyright © CC Computer Consultants GmbH, 2007 - 2008
*
* This program is free software; you can redistribute it and/or modify
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index f330e8beaf69..9ff035c71403 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -3,6 +3,7 @@
* separately for each hashbucket (sourceip/sourceport/dstip/dstport)
*
* (C) 2003-2004 by Harald Welte <laforge@netfilter.org>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
* Copyright © CC Computer Consultants GmbH, 2007 - 2008
*
* Development of this code was funded by Astaro AG, http://www.astaro.com/
@@ -107,6 +108,7 @@ struct xt_hashlimit_htable {
/* seq_file stuff */
struct proc_dir_entry *pde;
+ const char *name;
struct net *net;
struct hlist_head hash[0]; /* hashtable itself */
@@ -253,6 +255,11 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
hinfo->count = 0;
hinfo->family = family;
hinfo->rnd_initialized = false;
+ hinfo->name = kstrdup(minfo->name, GFP_KERNEL);
+ if (!hinfo->name) {
+ vfree(hinfo);
+ return -ENOMEM;
+ }
spin_lock_init(&hinfo->lock);
hinfo->pde = proc_create_data(minfo->name, 0,
@@ -260,6 +267,7 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
&dl_file_ops, hinfo);
if (hinfo->pde == NULL) {
+ kfree(hinfo->name);
vfree(hinfo);
return -ENOMEM;
}
@@ -330,9 +338,10 @@ static void htable_destroy(struct xt_hashlimit_htable *hinfo)
parent = hashlimit_net->ip6t_hashlimit;
if(parent != NULL)
- remove_proc_entry(hinfo->pde->name, parent);
+ remove_proc_entry(hinfo->name, parent);
htable_selective_cleanup(hinfo, select_all);
+ kfree(hinfo->name);
vfree(hinfo);
}
@@ -344,7 +353,7 @@ static struct xt_hashlimit_htable *htable_find_get(struct net *net,
struct xt_hashlimit_htable *hinfo;
hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) {
- if (!strcmp(name, hinfo->pde->name) &&
+ if (!strcmp(name, hinfo->name) &&
hinfo->family == family) {
hinfo->use++;
return hinfo;
@@ -841,7 +850,7 @@ static int dl_proc_open(struct inode *inode, struct file *file)
if (!ret) {
struct seq_file *sf = file->private_data;
- sf->private = PDE(inode)->data;
+ sf->private = PDE_DATA(inode);
}
return ret;
}
@@ -887,7 +896,7 @@ static void __net_exit hashlimit_proc_net_exit(struct net *net)
pde = hashlimit_net->ip6t_hashlimit;
hlist_for_each_entry(hinfo, &hashlimit_net->htables, node)
- remove_proc_entry(hinfo->pde->name, pde);
+ remove_proc_entry(hinfo->name, pde);
hashlimit_net->ipt_hashlimit = NULL;
hashlimit_net->ip6t_hashlimit = NULL;
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index a4c1e4528cac..bef850596558 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -1,5 +1,6 @@
/* (C) 1999 Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr>
* (C) 1999 Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index a5e673d32bda..647d989a01e6 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -201,6 +201,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
unsigned char opts[MAX_IPOPTLEN];
const struct xt_osf_finger *kf;
const struct xt_osf_user_finger *f;
+ struct net *net = dev_net(p->in ? p->in : p->out);
if (!info)
return false;
@@ -325,7 +326,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
fcount++;
if (info->flags & XT_OSF_LOG)
- nf_log_packet(p->family, p->hooknum, skb,
+ nf_log_packet(net, p->family, p->hooknum, skb,
p->in, p->out, NULL,
"%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n",
f->genre, f->version, f->subtype,
@@ -341,7 +342,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
rcu_read_unlock();
if (!fcount && (info->flags & XT_OSF_LOG))
- nf_log_packet(p->family, p->hooknum, skb, p->in, p->out, NULL,
+ nf_log_packet(net, p->family, p->hooknum, skb, p->in,
+ p->out, NULL,
"Remote OS is not known: %pI4:%u -> %pI4:%u\n",
&ip->saddr, ntohs(tcp->source),
&ip->daddr, ntohs(tcp->dest));
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index d9cad315229d..1e657cf715c4 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -401,8 +401,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
ret = -ENOMEM;
goto out;
}
- pde->uid = uid;
- pde->gid = gid;
+ proc_set_user(pde, uid, gid);
#endif
spin_lock_bh(&recent_lock);
list_add_tail(&t->list, &recent_net->tables);
@@ -525,14 +524,13 @@ static const struct seq_operations recent_seq_ops = {
static int recent_seq_open(struct inode *inode, struct file *file)
{
- struct proc_dir_entry *pde = PDE(inode);
struct recent_iter_state *st;
st = __seq_open_private(file, &recent_seq_ops, sizeof(*st));
if (st == NULL)
return -ENOMEM;
- st->table = pde->data;
+ st->table = PDE_DATA(inode);
return 0;
}
@@ -540,8 +538,7 @@ static ssize_t
recent_mt_proc_write(struct file *file, const char __user *input,
size_t size, loff_t *loff)
{
- const struct proc_dir_entry *pde = PDE(file_inode(file));
- struct recent_table *t = pde->data;
+ struct recent_table *t = PDE_DATA(file_inode(file));
struct recent_entry *e;
char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")];
const char *c = buf;
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 865a9e54f3ad..31790e789e22 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -1,7 +1,7 @@
/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
* Patrick Schaaf <bof@bof.de>
* Martin Josefsson <gandalf@wlug.westbo.se>
- * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -30,7 +30,7 @@ MODULE_ALIAS("ip6t_SET");
static inline int
match_set(ip_set_id_t index, const struct sk_buff *skb,
const struct xt_action_param *par,
- const struct ip_set_adt_opt *opt, int inv)
+ struct ip_set_adt_opt *opt, int inv)
{
if (ip_set_test(index, skb, par, opt))
inv = !inv;
@@ -38,20 +38,12 @@ match_set(ip_set_id_t index, const struct sk_buff *skb,
}
#define ADT_OPT(n, f, d, fs, cfs, t) \
-const struct ip_set_adt_opt n = { \
- .family = f, \
- .dim = d, \
- .flags = fs, \
- .cmdflags = cfs, \
- .timeout = t, \
-}
-#define ADT_MOPT(n, f, d, fs, cfs, t) \
struct ip_set_adt_opt n = { \
.family = f, \
.dim = d, \
.flags = fs, \
.cmdflags = cfs, \
- .timeout = t, \
+ .ext.timeout = t, \
}
/* Revision 0 interface: backward compatible with netfilter/iptables */
@@ -197,6 +189,9 @@ set_match_v1(const struct sk_buff *skb, struct xt_action_param *par)
ADT_OPT(opt, par->family, info->match_set.dim,
info->match_set.flags, 0, UINT_MAX);
+ if (opt.flags & IPSET_RETURN_NOMATCH)
+ opt.cmdflags |= IPSET_FLAG_RETURN_NOMATCH;
+
return match_set(info->match_set.index, skb, par, &opt,
info->match_set.flags & IPSET_INV_MATCH);
}
@@ -305,15 +300,15 @@ static unsigned int
set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_set_info_target_v2 *info = par->targinfo;
- ADT_MOPT(add_opt, par->family, info->add_set.dim,
- info->add_set.flags, info->flags, info->timeout);
+ ADT_OPT(add_opt, par->family, info->add_set.dim,
+ info->add_set.flags, info->flags, info->timeout);
ADT_OPT(del_opt, par->family, info->del_set.dim,
info->del_set.flags, 0, UINT_MAX);
/* Normalize to fit into jiffies */
- if (add_opt.timeout != IPSET_NO_TIMEOUT &&
- add_opt.timeout > UINT_MAX/MSEC_PER_SEC)
- add_opt.timeout = UINT_MAX/MSEC_PER_SEC;
+ if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
+ add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC)
+ add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC;
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_add(info->add_set.index, skb, par, &add_opt);
if (info->del_set.index != IPSET_INVALID_ID)
@@ -325,6 +320,52 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
#define set_target_v2_checkentry set_target_v1_checkentry
#define set_target_v2_destroy set_target_v1_destroy
+/* Revision 3 match */
+
+static bool
+match_counter(u64 counter, const struct ip_set_counter_match *info)
+{
+ switch (info->op) {
+ case IPSET_COUNTER_NONE:
+ return true;
+ case IPSET_COUNTER_EQ:
+ return counter == info->value;
+ case IPSET_COUNTER_NE:
+ return counter != info->value;
+ case IPSET_COUNTER_LT:
+ return counter < info->value;
+ case IPSET_COUNTER_GT:
+ return counter > info->value;
+ }
+ return false;
+}
+
+static bool
+set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_set_info_match_v3 *info = par->matchinfo;
+ ADT_OPT(opt, par->family, info->match_set.dim,
+ info->match_set.flags, info->flags, UINT_MAX);
+ int ret;
+
+ if (info->packets.op != IPSET_COUNTER_NONE ||
+ info->bytes.op != IPSET_COUNTER_NONE)
+ opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS;
+
+ ret = match_set(info->match_set.index, skb, par, &opt,
+ info->match_set.flags & IPSET_INV_MATCH);
+
+ if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS))
+ return ret;
+
+ if (!match_counter(opt.ext.packets, &info->packets))
+ return 0;
+ return match_counter(opt.ext.bytes, &info->bytes);
+}
+
+#define set_match_v3_checkentry set_match_v1_checkentry
+#define set_match_v3_destroy set_match_v1_destroy
+
static struct xt_match set_matches[] __read_mostly = {
{
.name = "set",
@@ -377,6 +418,27 @@ static struct xt_match set_matches[] __read_mostly = {
.destroy = set_match_v1_destroy,
.me = THIS_MODULE
},
+ /* counters support: update, match */
+ {
+ .name = "set",
+ .family = NFPROTO_IPV4,
+ .revision = 3,
+ .match = set_match_v3,
+ .matchsize = sizeof(struct xt_set_info_match_v3),
+ .checkentry = set_match_v3_checkentry,
+ .destroy = set_match_v3_destroy,
+ .me = THIS_MODULE
+ },
+ {
+ .name = "set",
+ .family = NFPROTO_IPV6,
+ .revision = 3,
+ .match = set_match_v3,
+ .matchsize = sizeof(struct xt_set_info_match_v3),
+ .checkentry = set_match_v3_checkentry,
+ .destroy = set_match_v3_destroy,
+ .me = THIS_MODULE
+ },
};
static struct xt_target set_targets[] __read_mostly = {
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index d8d424337550..6bb1d42f0fac 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -245,6 +245,71 @@ static void netlbl_domhsh_audit_add(struct netlbl_dom_map *entry,
}
}
+/**
+ * netlbl_domhsh_validate - Validate a new domain mapping entry
+ * @entry: the entry to validate
+ *
+ * This function validates the new domain mapping entry to ensure that it is
+ * a valid entry. Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_domhsh_validate(const struct netlbl_dom_map *entry)
+{
+ struct netlbl_af4list *iter4;
+ struct netlbl_domaddr4_map *map4;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct netlbl_af6list *iter6;
+ struct netlbl_domaddr6_map *map6;
+#endif /* IPv6 */
+
+ if (entry == NULL)
+ return -EINVAL;
+
+ switch (entry->type) {
+ case NETLBL_NLTYPE_UNLABELED:
+ if (entry->type_def.cipsov4 != NULL ||
+ entry->type_def.addrsel != NULL)
+ return -EINVAL;
+ break;
+ case NETLBL_NLTYPE_CIPSOV4:
+ if (entry->type_def.cipsov4 == NULL)
+ return -EINVAL;
+ break;
+ case NETLBL_NLTYPE_ADDRSELECT:
+ netlbl_af4list_foreach(iter4, &entry->type_def.addrsel->list4) {
+ map4 = netlbl_domhsh_addr4_entry(iter4);
+ switch (map4->type) {
+ case NETLBL_NLTYPE_UNLABELED:
+ if (map4->type_def.cipsov4 != NULL)
+ return -EINVAL;
+ break;
+ case NETLBL_NLTYPE_CIPSOV4:
+ if (map4->type_def.cipsov4 == NULL)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ netlbl_af6list_foreach(iter6, &entry->type_def.addrsel->list6) {
+ map6 = netlbl_domhsh_addr6_entry(iter6);
+ switch (map6->type) {
+ case NETLBL_NLTYPE_UNLABELED:
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+#endif /* IPv6 */
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/*
* Domain Hash Table Functions
*/
@@ -311,6 +376,10 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
struct netlbl_af6list *tmp6;
#endif /* IPv6 */
+ ret_val = netlbl_domhsh_validate(entry);
+ if (ret_val != 0)
+ return ret_val;
+
/* XXX - we can remove this RCU read lock as the spinlock protects the
* entire function, but before we do we need to fixup the
* netlbl_af[4,6]list RCU functions to do "the right thing" with
diff --git a/net/netlink/Kconfig b/net/netlink/Kconfig
new file mode 100644
index 000000000000..2c5e95e9bfbd
--- /dev/null
+++ b/net/netlink/Kconfig
@@ -0,0 +1,19 @@
+#
+# Netlink Sockets
+#
+
+config NETLINK_MMAP
+ bool "NETLINK: mmaped IO"
+ ---help---
+ This option enables support for memory mapped netlink IO. This
+ reduces overhead by avoiding copying data between kernel- and
+ userspace.
+
+ If unsure, say N.
+
+config NETLINK_DIAG
+ tristate "NETLINK: socket monitoring interface"
+ default n
+ ---help---
+ Support for NETLINK socket monitoring interface used by the ss tool.
+ If unsure, say Y.
diff --git a/net/netlink/Makefile b/net/netlink/Makefile
index bdd6ddf4e95b..e837917f6c03 100644
--- a/net/netlink/Makefile
+++ b/net/netlink/Makefile
@@ -3,3 +3,6 @@
#
obj-y := af_netlink.o genetlink.o
+
+obj-$(CONFIG_NETLINK_DIAG) += netlink_diag.o
+netlink_diag-y := diag.o
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 1e3fd5bfcd86..57ee84d21470 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -3,6 +3,7 @@
*
* Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>
* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ * Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -55,87 +56,45 @@
#include <linux/types.h>
#include <linux/audit.h>
#include <linux/mutex.h>
+#include <linux/vmalloc.h>
+#include <asm/cacheflush.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/scm.h>
#include <net/netlink.h>
-#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
-#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long))
-
-struct netlink_sock {
- /* struct sock has to be the first member of netlink_sock */
- struct sock sk;
- u32 portid;
- u32 dst_portid;
- u32 dst_group;
- u32 flags;
- u32 subscriptions;
- u32 ngroups;
- unsigned long *groups;
- unsigned long state;
- wait_queue_head_t wait;
- struct netlink_callback *cb;
- struct mutex *cb_mutex;
- struct mutex cb_def_mutex;
- void (*netlink_rcv)(struct sk_buff *skb);
- void (*netlink_bind)(int group);
- struct module *module;
-};
+#include "af_netlink.h"
struct listeners {
struct rcu_head rcu;
unsigned long masks[0];
};
+/* state bits */
+#define NETLINK_CONGESTED 0x0
+
+/* flags */
#define NETLINK_KERNEL_SOCKET 0x1
#define NETLINK_RECV_PKTINFO 0x2
#define NETLINK_BROADCAST_SEND_ERROR 0x4
#define NETLINK_RECV_NO_ENOBUFS 0x8
-static inline struct netlink_sock *nlk_sk(struct sock *sk)
-{
- return container_of(sk, struct netlink_sock, sk);
-}
-
static inline int netlink_is_kernel(struct sock *sk)
{
return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
}
-struct nl_portid_hash {
- struct hlist_head *table;
- unsigned long rehash_time;
-
- unsigned int mask;
- unsigned int shift;
-
- unsigned int entries;
- unsigned int max_shift;
-
- u32 rnd;
-};
-
-struct netlink_table {
- struct nl_portid_hash hash;
- struct hlist_head mc_list;
- struct listeners __rcu *listeners;
- unsigned int flags;
- unsigned int groups;
- struct mutex *cb_mutex;
- struct module *module;
- void (*bind)(int group);
- int registered;
-};
-
-static struct netlink_table *nl_table;
+struct netlink_table *nl_table;
+EXPORT_SYMBOL_GPL(nl_table);
static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
static int netlink_dump(struct sock *sk);
+static void netlink_skb_destructor(struct sk_buff *skb);
-static DEFINE_RWLOCK(nl_table_lock);
+DEFINE_RWLOCK(nl_table_lock);
+EXPORT_SYMBOL_GPL(nl_table_lock);
static atomic_t nl_table_users = ATOMIC_INIT(0);
#define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
@@ -152,6 +111,599 @@ static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u
return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask];
}
+static void netlink_overrun(struct sock *sk)
+{
+ struct netlink_sock *nlk = nlk_sk(sk);
+
+ if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
+ if (!test_and_set_bit(NETLINK_CONGESTED, &nlk_sk(sk)->state)) {
+ sk->sk_err = ENOBUFS;
+ sk->sk_error_report(sk);
+ }
+ }
+ atomic_inc(&sk->sk_drops);
+}
+
+static void netlink_rcv_wake(struct sock *sk)
+{
+ struct netlink_sock *nlk = nlk_sk(sk);
+
+ if (skb_queue_empty(&sk->sk_receive_queue))
+ clear_bit(NETLINK_CONGESTED, &nlk->state);
+ if (!test_bit(NETLINK_CONGESTED, &nlk->state))
+ wake_up_interruptible(&nlk->wait);
+}
+
+#ifdef CONFIG_NETLINK_MMAP
+static bool netlink_skb_is_mmaped(const struct sk_buff *skb)
+{
+ return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
+}
+
+static bool netlink_rx_is_mmaped(struct sock *sk)
+{
+ return nlk_sk(sk)->rx_ring.pg_vec != NULL;
+}
+
+static bool netlink_tx_is_mmaped(struct sock *sk)
+{
+ return nlk_sk(sk)->tx_ring.pg_vec != NULL;
+}
+
+static __pure struct page *pgvec_to_page(const void *addr)
+{
+ if (is_vmalloc_addr(addr))
+ return vmalloc_to_page(addr);
+ else
+ return virt_to_page(addr);
+}
+
+static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len)
+{
+ unsigned int i;
+
+ for (i = 0; i < len; i++) {
+ if (pg_vec[i] != NULL) {
+ if (is_vmalloc_addr(pg_vec[i]))
+ vfree(pg_vec[i]);
+ else
+ free_pages((unsigned long)pg_vec[i], order);
+ }
+ }
+ kfree(pg_vec);
+}
+
+static void *alloc_one_pg_vec_page(unsigned long order)
+{
+ void *buffer;
+ gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO |
+ __GFP_NOWARN | __GFP_NORETRY;
+
+ buffer = (void *)__get_free_pages(gfp_flags, order);
+ if (buffer != NULL)
+ return buffer;
+
+ buffer = vzalloc((1 << order) * PAGE_SIZE);
+ if (buffer != NULL)
+ return buffer;
+
+ gfp_flags &= ~__GFP_NORETRY;
+ return (void *)__get_free_pages(gfp_flags, order);
+}
+
+static void **alloc_pg_vec(struct netlink_sock *nlk,
+ struct nl_mmap_req *req, unsigned int order)
+{
+ unsigned int block_nr = req->nm_block_nr;
+ unsigned int i;
+ void **pg_vec, *ptr;
+
+ pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL);
+ if (pg_vec == NULL)
+ return NULL;
+
+ for (i = 0; i < block_nr; i++) {
+ pg_vec[i] = ptr = alloc_one_pg_vec_page(order);
+ if (pg_vec[i] == NULL)
+ goto err1;
+ }
+
+ return pg_vec;
+err1:
+ free_pg_vec(pg_vec, order, block_nr);
+ return NULL;
+}
+
+static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
+ bool closing, bool tx_ring)
+{
+ struct netlink_sock *nlk = nlk_sk(sk);
+ struct netlink_ring *ring;
+ struct sk_buff_head *queue;
+ void **pg_vec = NULL;
+ unsigned int order = 0;
+ int err;
+
+ ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
+ queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
+
+ if (!closing) {
+ if (atomic_read(&nlk->mapped))
+ return -EBUSY;
+ if (atomic_read(&ring->pending))
+ return -EBUSY;
+ }
+
+ if (req->nm_block_nr) {
+ if (ring->pg_vec != NULL)
+ return -EBUSY;
+
+ if ((int)req->nm_block_size <= 0)
+ return -EINVAL;
+ if (!IS_ALIGNED(req->nm_block_size, PAGE_SIZE))
+ return -EINVAL;
+ if (req->nm_frame_size < NL_MMAP_HDRLEN)
+ return -EINVAL;
+ if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT))
+ return -EINVAL;
+
+ ring->frames_per_block = req->nm_block_size /
+ req->nm_frame_size;
+ if (ring->frames_per_block == 0)
+ return -EINVAL;
+ if (ring->frames_per_block * req->nm_block_nr !=
+ req->nm_frame_nr)
+ return -EINVAL;
+
+ order = get_order(req->nm_block_size);
+ pg_vec = alloc_pg_vec(nlk, req, order);
+ if (pg_vec == NULL)
+ return -ENOMEM;
+ } else {
+ if (req->nm_frame_nr)
+ return -EINVAL;
+ }
+
+ err = -EBUSY;
+ mutex_lock(&nlk->pg_vec_lock);
+ if (closing || atomic_read(&nlk->mapped) == 0) {
+ err = 0;
+ spin_lock_bh(&queue->lock);
+
+ ring->frame_max = req->nm_frame_nr - 1;
+ ring->head = 0;
+ ring->frame_size = req->nm_frame_size;
+ ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE;
+
+ swap(ring->pg_vec_len, req->nm_block_nr);
+ swap(ring->pg_vec_order, order);
+ swap(ring->pg_vec, pg_vec);
+
+ __skb_queue_purge(queue);
+ spin_unlock_bh(&queue->lock);
+
+ WARN_ON(atomic_read(&nlk->mapped));
+ }
+ mutex_unlock(&nlk->pg_vec_lock);
+
+ if (pg_vec)
+ free_pg_vec(pg_vec, order, req->nm_block_nr);
+ return err;
+}
+
+static void netlink_mm_open(struct vm_area_struct *vma)
+{
+ struct file *file = vma->vm_file;
+ struct socket *sock = file->private_data;
+ struct sock *sk = sock->sk;
+
+ if (sk)
+ atomic_inc(&nlk_sk(sk)->mapped);
+}
+
+static void netlink_mm_close(struct vm_area_struct *vma)
+{
+ struct file *file = vma->vm_file;
+ struct socket *sock = file->private_data;
+ struct sock *sk = sock->sk;
+
+ if (sk)
+ atomic_dec(&nlk_sk(sk)->mapped);
+}
+
+static const struct vm_operations_struct netlink_mmap_ops = {
+ .open = netlink_mm_open,
+ .close = netlink_mm_close,
+};
+
+static int netlink_mmap(struct file *file, struct socket *sock,
+ struct vm_area_struct *vma)
+{
+ struct sock *sk = sock->sk;
+ struct netlink_sock *nlk = nlk_sk(sk);
+ struct netlink_ring *ring;
+ unsigned long start, size, expected;
+ unsigned int i;
+ int err = -EINVAL;
+
+ if (vma->vm_pgoff)
+ return -EINVAL;
+
+ mutex_lock(&nlk->pg_vec_lock);
+
+ expected = 0;
+ for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
+ if (ring->pg_vec == NULL)
+ continue;
+ expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE;
+ }
+
+ if (expected == 0)
+ goto out;
+
+ size = vma->vm_end - vma->vm_start;
+ if (size != expected)
+ goto out;
+
+ start = vma->vm_start;
+ for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
+ if (ring->pg_vec == NULL)
+ continue;
+
+ for (i = 0; i < ring->pg_vec_len; i++) {
+ struct page *page;
+ void *kaddr = ring->pg_vec[i];
+ unsigned int pg_num;
+
+ for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) {
+ page = pgvec_to_page(kaddr);
+ err = vm_insert_page(vma, start, page);
+ if (err < 0)
+ goto out;
+ start += PAGE_SIZE;
+ kaddr += PAGE_SIZE;
+ }
+ }
+ }
+
+ atomic_inc(&nlk->mapped);
+ vma->vm_ops = &netlink_mmap_ops;
+ err = 0;
+out:
+ mutex_unlock(&nlk->pg_vec_lock);
+ return err;
+}
+
+static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr)
+{
+#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
+ struct page *p_start, *p_end;
+
+ /* First page is flushed through netlink_{get,set}_status */
+ p_start = pgvec_to_page(hdr + PAGE_SIZE);
+ p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + hdr->nm_len - 1);
+ while (p_start <= p_end) {
+ flush_dcache_page(p_start);
+ p_start++;
+ }
+#endif
+}
+
+static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr)
+{
+ smp_rmb();
+ flush_dcache_page(pgvec_to_page(hdr));
+ return hdr->nm_status;
+}
+
+static void netlink_set_status(struct nl_mmap_hdr *hdr,
+ enum nl_mmap_status status)
+{
+ hdr->nm_status = status;
+ flush_dcache_page(pgvec_to_page(hdr));
+ smp_wmb();
+}
+
+static struct nl_mmap_hdr *
+__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos)
+{
+ unsigned int pg_vec_pos, frame_off;
+
+ pg_vec_pos = pos / ring->frames_per_block;
+ frame_off = pos % ring->frames_per_block;
+
+ return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size);
+}
+
+static struct nl_mmap_hdr *
+netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos,
+ enum nl_mmap_status status)
+{
+ struct nl_mmap_hdr *hdr;
+
+ hdr = __netlink_lookup_frame(ring, pos);
+ if (netlink_get_status(hdr) != status)
+ return NULL;
+
+ return hdr;
+}
+
+static struct nl_mmap_hdr *
+netlink_current_frame(const struct netlink_ring *ring,
+ enum nl_mmap_status status)
+{
+ return netlink_lookup_frame(ring, ring->head, status);
+}
+
+static struct nl_mmap_hdr *
+netlink_previous_frame(const struct netlink_ring *ring,
+ enum nl_mmap_status status)
+{
+ unsigned int prev;
+
+ prev = ring->head ? ring->head - 1 : ring->frame_max;
+ return netlink_lookup_frame(ring, prev, status);
+}
+
+static void netlink_increment_head(struct netlink_ring *ring)
+{
+ ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0;
+}
+
+static void netlink_forward_ring(struct netlink_ring *ring)
+{
+ unsigned int head = ring->head, pos = head;
+ const struct nl_mmap_hdr *hdr;
+
+ do {
+ hdr = __netlink_lookup_frame(ring, pos);
+ if (hdr->nm_status == NL_MMAP_STATUS_UNUSED)
+ break;
+ if (hdr->nm_status != NL_MMAP_STATUS_SKIP)
+ break;
+ netlink_increment_head(ring);
+ } while (ring->head != head);
+}
+
+static bool netlink_dump_space(struct netlink_sock *nlk)
+{
+ struct netlink_ring *ring = &nlk->rx_ring;
+ struct nl_mmap_hdr *hdr;
+ unsigned int n;
+
+ hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
+ if (hdr == NULL)
+ return false;
+
+ n = ring->head + ring->frame_max / 2;
+ if (n > ring->frame_max)
+ n -= ring->frame_max;
+
+ hdr = __netlink_lookup_frame(ring, n);
+
+ return hdr->nm_status == NL_MMAP_STATUS_UNUSED;
+}
+
+static unsigned int netlink_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
+{
+ struct sock *sk = sock->sk;
+ struct netlink_sock *nlk = nlk_sk(sk);
+ unsigned int mask;
+ int err;
+
+ if (nlk->rx_ring.pg_vec != NULL) {
+ /* Memory mapped sockets don't call recvmsg(), so flow control
+ * for dumps is performed here. A dump is allowed to continue
+ * if at least half the ring is unused.
+ */
+ while (nlk->cb != NULL && netlink_dump_space(nlk)) {
+ err = netlink_dump(sk);
+ if (err < 0) {
+ sk->sk_err = err;
+ sk->sk_error_report(sk);
+ break;
+ }
+ }
+ netlink_rcv_wake(sk);
+ }
+
+ mask = datagram_poll(file, sock, wait);
+
+ spin_lock_bh(&sk->sk_receive_queue.lock);
+ if (nlk->rx_ring.pg_vec) {
+ netlink_forward_ring(&nlk->rx_ring);
+ if (!netlink_previous_frame(&nlk->rx_ring, NL_MMAP_STATUS_UNUSED))
+ mask |= POLLIN | POLLRDNORM;
+ }
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
+
+ spin_lock_bh(&sk->sk_write_queue.lock);
+ if (nlk->tx_ring.pg_vec) {
+ if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED))
+ mask |= POLLOUT | POLLWRNORM;
+ }
+ spin_unlock_bh(&sk->sk_write_queue.lock);
+
+ return mask;
+}
+
+static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb)
+{
+ return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN);
+}
+
+static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk,
+ struct netlink_ring *ring,
+ struct nl_mmap_hdr *hdr)
+{
+ unsigned int size;
+ void *data;
+
+ size = ring->frame_size - NL_MMAP_HDRLEN;
+ data = (void *)hdr + NL_MMAP_HDRLEN;
+
+ skb->head = data;
+ skb->data = data;
+ skb_reset_tail_pointer(skb);
+ skb->end = skb->tail + size;
+ skb->len = 0;
+
+ skb->destructor = netlink_skb_destructor;
+ NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED;
+ NETLINK_CB(skb).sk = sk;
+}
+
+static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
+ u32 dst_portid, u32 dst_group,
+ struct sock_iocb *siocb)
+{
+ struct netlink_sock *nlk = nlk_sk(sk);
+ struct netlink_ring *ring;
+ struct nl_mmap_hdr *hdr;
+ struct sk_buff *skb;
+ unsigned int maxlen;
+ bool excl = true;
+ int err = 0, len = 0;
+
+ /* Netlink messages are validated by the receiver before processing.
+ * In order to avoid userspace changing the contents of the message
+ * after validation, the socket and the ring may only be used by a
+ * single process, otherwise we fall back to copying.
+ */
+ if (atomic_long_read(&sk->sk_socket->file->f_count) > 2 ||
+ atomic_read(&nlk->mapped) > 1)
+ excl = false;
+
+ mutex_lock(&nlk->pg_vec_lock);
+
+ ring = &nlk->tx_ring;
+ maxlen = ring->frame_size - NL_MMAP_HDRLEN;
+
+ do {
+ hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID);
+ if (hdr == NULL) {
+ if (!(msg->msg_flags & MSG_DONTWAIT) &&
+ atomic_read(&nlk->tx_ring.pending))
+ schedule();
+ continue;
+ }
+ if (hdr->nm_len > maxlen) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ netlink_frame_flush_dcache(hdr);
+
+ if (likely(dst_portid == 0 && dst_group == 0 && excl)) {
+ skb = alloc_skb_head(GFP_KERNEL);
+ if (skb == NULL) {
+ err = -ENOBUFS;
+ goto out;
+ }
+ sock_hold(sk);
+ netlink_ring_setup_skb(skb, sk, ring, hdr);
+ NETLINK_CB(skb).flags |= NETLINK_SKB_TX;
+ __skb_put(skb, hdr->nm_len);
+ netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
+ atomic_inc(&ring->pending);
+ } else {
+ skb = alloc_skb(hdr->nm_len, GFP_KERNEL);
+ if (skb == NULL) {
+ err = -ENOBUFS;
+ goto out;
+ }
+ __skb_put(skb, hdr->nm_len);
+ memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len);
+ netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
+ }
+
+ netlink_increment_head(ring);
+
+ NETLINK_CB(skb).portid = nlk->portid;
+ NETLINK_CB(skb).dst_group = dst_group;
+ NETLINK_CB(skb).creds = siocb->scm->creds;
+
+ err = security_netlink_send(sk, skb);
+ if (err) {
+ kfree_skb(skb);
+ goto out;
+ }
+
+ if (unlikely(dst_group)) {
+ atomic_inc(&skb->users);
+ netlink_broadcast(sk, skb, dst_portid, dst_group,
+ GFP_KERNEL);
+ }
+ err = netlink_unicast(sk, skb, dst_portid,
+ msg->msg_flags & MSG_DONTWAIT);
+ if (err < 0)
+ goto out;
+ len += err;
+
+ } while (hdr != NULL ||
+ (!(msg->msg_flags & MSG_DONTWAIT) &&
+ atomic_read(&nlk->tx_ring.pending)));
+
+ if (len > 0)
+ err = len;
+out:
+ mutex_unlock(&nlk->pg_vec_lock);
+ return err;
+}
+
+static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb)
+{
+ struct nl_mmap_hdr *hdr;
+
+ hdr = netlink_mmap_hdr(skb);
+ hdr->nm_len = skb->len;
+ hdr->nm_group = NETLINK_CB(skb).dst_group;
+ hdr->nm_pid = NETLINK_CB(skb).creds.pid;
+ hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
+ hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
+ netlink_frame_flush_dcache(hdr);
+ netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
+
+ NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
+ kfree_skb(skb);
+}
+
+static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
+{
+ struct netlink_sock *nlk = nlk_sk(sk);
+ struct netlink_ring *ring = &nlk->rx_ring;
+ struct nl_mmap_hdr *hdr;
+
+ spin_lock_bh(&sk->sk_receive_queue.lock);
+ hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
+ if (hdr == NULL) {
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
+ kfree_skb(skb);
+ netlink_overrun(sk);
+ return;
+ }
+ netlink_increment_head(ring);
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
+
+ hdr->nm_len = skb->len;
+ hdr->nm_group = NETLINK_CB(skb).dst_group;
+ hdr->nm_pid = NETLINK_CB(skb).creds.pid;
+ hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
+ hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
+ netlink_set_status(hdr, NL_MMAP_STATUS_COPY);
+}
+
+#else /* CONFIG_NETLINK_MMAP */
+#define netlink_skb_is_mmaped(skb) false
+#define netlink_rx_is_mmaped(sk) false
+#define netlink_tx_is_mmaped(sk) false
+#define netlink_mmap sock_no_mmap
+#define netlink_poll datagram_poll
+#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb) 0
+#endif /* CONFIG_NETLINK_MMAP */
+
static void netlink_destroy_callback(struct netlink_callback *cb)
{
kfree_skb(cb->skb);
@@ -164,6 +716,53 @@ static void netlink_consume_callback(struct netlink_callback *cb)
kfree(cb);
}
+static void netlink_skb_destructor(struct sk_buff *skb)
+{
+#ifdef CONFIG_NETLINK_MMAP
+ struct nl_mmap_hdr *hdr;
+ struct netlink_ring *ring;
+ struct sock *sk;
+
+ /* If a packet from the kernel to userspace was freed because of an
+ * error without being delivered to userspace, the kernel must reset
+ * the status. In the direction userspace to kernel, the status is
+ * always reset here after the packet was processed and freed.
+ */
+ if (netlink_skb_is_mmaped(skb)) {
+ hdr = netlink_mmap_hdr(skb);
+ sk = NETLINK_CB(skb).sk;
+
+ if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) {
+ netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
+ ring = &nlk_sk(sk)->tx_ring;
+ } else {
+ if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) {
+ hdr->nm_len = 0;
+ netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
+ }
+ ring = &nlk_sk(sk)->rx_ring;
+ }
+
+ WARN_ON(atomic_read(&ring->pending) == 0);
+ atomic_dec(&ring->pending);
+ sock_put(sk);
+
+ skb->head = NULL;
+ }
+#endif
+ if (skb->sk != NULL)
+ sock_rfree(skb);
+}
+
+static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
+{
+ WARN_ON(skb->sk != NULL);
+ skb->sk = sk;
+ skb->destructor = netlink_skb_destructor;
+ atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+ sk_mem_charge(sk, skb->truesize);
+}
+
static void netlink_sock_destruct(struct sock *sk)
{
struct netlink_sock *nlk = nlk_sk(sk);
@@ -177,6 +776,18 @@ static void netlink_sock_destruct(struct sock *sk)
}
skb_queue_purge(&sk->sk_receive_queue);
+#ifdef CONFIG_NETLINK_MMAP
+ if (1) {
+ struct nl_mmap_req req;
+
+ memset(&req, 0, sizeof(req));
+ if (nlk->rx_ring.pg_vec)
+ netlink_set_ring(sk, &req, true, false);
+ memset(&req, 0, sizeof(req));
+ if (nlk->tx_ring.pg_vec)
+ netlink_set_ring(sk, &req, true, true);
+ }
+#endif /* CONFIG_NETLINK_MMAP */
if (!sock_flag(sk, SOCK_DEAD)) {
printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
@@ -440,6 +1051,9 @@ static int __netlink_create(struct net *net, struct socket *sock,
mutex_init(nlk->cb_mutex);
}
init_waitqueue_head(&nlk->wait);
+#ifdef CONFIG_NETLINK_MMAP
+ mutex_init(&nlk->pg_vec_lock);
+#endif
sk->sk_destruct = netlink_sock_destruct;
sk->sk_protocol = protocol;
@@ -771,19 +1385,6 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
return 0;
}
-static void netlink_overrun(struct sock *sk)
-{
- struct netlink_sock *nlk = nlk_sk(sk);
-
- if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
- if (!test_and_set_bit(0, &nlk_sk(sk)->state)) {
- sk->sk_err = ENOBUFS;
- sk->sk_error_report(sk);
- }
- }
- atomic_inc(&sk->sk_drops);
-}
-
static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
{
struct sock *sock;
@@ -836,8 +1437,9 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
nlk = nlk_sk(sk);
- if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- test_bit(0, &nlk->state)) {
+ if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
+ test_bit(NETLINK_CONGESTED, &nlk->state)) &&
+ !netlink_skb_is_mmaped(skb)) {
DECLARE_WAITQUEUE(wait, current);
if (!*timeo) {
if (!ssk || netlink_is_kernel(ssk))
@@ -851,7 +1453,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
add_wait_queue(&nlk->wait, &wait);
if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- test_bit(0, &nlk->state)) &&
+ test_bit(NETLINK_CONGESTED, &nlk->state)) &&
!sock_flag(sk, SOCK_DEAD))
*timeo = schedule_timeout(*timeo);
@@ -865,7 +1467,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
}
return 1;
}
- skb_set_owner_r(skb, sk);
+ netlink_skb_set_owner_r(skb, sk);
return 0;
}
@@ -873,7 +1475,14 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
{
int len = skb->len;
- skb_queue_tail(&sk->sk_receive_queue, skb);
+#ifdef CONFIG_NETLINK_MMAP
+ if (netlink_skb_is_mmaped(skb))
+ netlink_queue_mmaped_skb(sk, skb);
+ else if (netlink_rx_is_mmaped(sk))
+ netlink_ring_set_copied(sk, skb);
+ else
+#endif /* CONFIG_NETLINK_MMAP */
+ skb_queue_tail(&sk->sk_receive_queue, skb);
sk->sk_data_ready(sk, len);
return len;
}
@@ -896,7 +1505,9 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
{
int delta;
- skb_orphan(skb);
+ WARN_ON(skb->sk != NULL);
+ if (netlink_skb_is_mmaped(skb))
+ return skb;
delta = skb->end - skb->tail;
if (delta * 2 < skb->truesize)
@@ -916,16 +1527,6 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
return skb;
}
-static void netlink_rcv_wake(struct sock *sk)
-{
- struct netlink_sock *nlk = nlk_sk(sk);
-
- if (skb_queue_empty(&sk->sk_receive_queue))
- clear_bit(0, &nlk->state);
- if (!test_bit(0, &nlk->state))
- wake_up_interruptible(&nlk->wait);
-}
-
static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
struct sock *ssk)
{
@@ -935,8 +1536,8 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
ret = -ECONNREFUSED;
if (nlk->netlink_rcv != NULL) {
ret = skb->len;
- skb_set_owner_r(skb, sk);
- NETLINK_CB(skb).ssk = ssk;
+ netlink_skb_set_owner_r(skb, sk);
+ NETLINK_CB(skb).sk = ssk;
nlk->netlink_rcv(skb);
consume_skb(skb);
} else {
@@ -982,6 +1583,69 @@ retry:
}
EXPORT_SYMBOL(netlink_unicast);
+struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
+ u32 dst_portid, gfp_t gfp_mask)
+{
+#ifdef CONFIG_NETLINK_MMAP
+ struct sock *sk = NULL;
+ struct sk_buff *skb;
+ struct netlink_ring *ring;
+ struct nl_mmap_hdr *hdr;
+ unsigned int maxlen;
+
+ sk = netlink_getsockbyportid(ssk, dst_portid);
+ if (IS_ERR(sk))
+ goto out;
+
+ ring = &nlk_sk(sk)->rx_ring;
+ /* fast-path without atomic ops for common case: non-mmaped receiver */
+ if (ring->pg_vec == NULL)
+ goto out_put;
+
+ skb = alloc_skb_head(gfp_mask);
+ if (skb == NULL)
+ goto err1;
+
+ spin_lock_bh(&sk->sk_receive_queue.lock);
+ /* check again under lock */
+ if (ring->pg_vec == NULL)
+ goto out_free;
+
+ maxlen = ring->frame_size - NL_MMAP_HDRLEN;
+ if (maxlen < size)
+ goto out_free;
+
+ netlink_forward_ring(ring);
+ hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
+ if (hdr == NULL)
+ goto err2;
+ netlink_ring_setup_skb(skb, sk, ring, hdr);
+ netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
+ atomic_inc(&ring->pending);
+ netlink_increment_head(ring);
+
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
+ return skb;
+
+err2:
+ kfree_skb(skb);
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
+ netlink_overrun(sk);
+err1:
+ sock_put(sk);
+ return NULL;
+
+out_free:
+ kfree_skb(skb);
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
+out_put:
+ sock_put(sk);
+out:
+#endif
+ return alloc_skb(size, gfp_mask);
+}
+EXPORT_SYMBOL_GPL(netlink_alloc_skb);
+
int netlink_has_listeners(struct sock *sk, unsigned int group)
{
int res = 0;
@@ -1006,8 +1670,8 @@ static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
struct netlink_sock *nlk = nlk_sk(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
- !test_bit(0, &nlk->state)) {
- skb_set_owner_r(skb, sk);
+ !test_bit(NETLINK_CONGESTED, &nlk->state)) {
+ netlink_skb_set_owner_r(skb, sk);
__netlink_sendskb(sk, skb);
return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
}
@@ -1242,7 +1906,8 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
if (level != SOL_NETLINK)
return -ENOPROTOOPT;
- if (optlen >= sizeof(int) &&
+ if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING &&
+ optlen >= sizeof(int) &&
get_user(val, (unsigned int __user *)optval))
return -EFAULT;
@@ -1284,13 +1949,32 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
case NETLINK_NO_ENOBUFS:
if (val) {
nlk->flags |= NETLINK_RECV_NO_ENOBUFS;
- clear_bit(0, &nlk->state);
+ clear_bit(NETLINK_CONGESTED, &nlk->state);
wake_up_interruptible(&nlk->wait);
} else {
nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS;
}
err = 0;
break;
+#ifdef CONFIG_NETLINK_MMAP
+ case NETLINK_RX_RING:
+ case NETLINK_TX_RING: {
+ struct nl_mmap_req req;
+
+ /* Rings might consume more memory than queue limits, require
+ * CAP_NET_ADMIN.
+ */
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ if (optlen < sizeof(req))
+ return -EINVAL;
+ if (copy_from_user(&req, optval, sizeof(req)))
+ return -EFAULT;
+ err = netlink_set_ring(sk, &req, false,
+ optname == NETLINK_TX_RING);
+ break;
+ }
+#endif /* CONFIG_NETLINK_MMAP */
default:
err = -ENOPROTOOPT;
}
@@ -1401,6 +2085,13 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
goto out;
}
+ if (netlink_tx_is_mmaped(sk) &&
+ msg->msg_iov->iov_base == NULL) {
+ err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
+ siocb);
+ goto out;
+ }
+
err = -EMSGSIZE;
if (len > sk->sk_sndbuf - 32)
goto out;
@@ -1695,7 +2386,7 @@ struct nlmsghdr *
__nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags)
{
struct nlmsghdr *nlh;
- int size = NLMSG_LENGTH(len);
+ int size = nlmsg_msg_size(len);
nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size));
nlh->nlmsg_type = type;
@@ -1704,7 +2395,7 @@ __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int fla
nlh->nlmsg_pid = portid;
nlh->nlmsg_seq = seq;
if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0)
- memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size);
+ memset(nlmsg_data(nlh) + len, 0, NLMSG_ALIGN(size) - size);
return nlh;
}
EXPORT_SYMBOL(__nlmsg_put);
@@ -1733,9 +2424,13 @@ static int netlink_dump(struct sock *sk)
alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
- skb = sock_rmalloc(sk, alloc_size, 0, GFP_KERNEL);
+ if (!netlink_rx_is_mmaped(sk) &&
+ atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
+ goto errout_skb;
+ skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL);
if (!skb)
goto errout_skb;
+ netlink_skb_set_owner_r(skb, sk);
len = cb->dump(skb, cb);
@@ -1790,13 +2485,25 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
if (cb == NULL)
return -ENOBUFS;
+ /* Memory mapped dump requests need to be copied to avoid looping
+ * on the pending state in netlink_mmap_sendmsg() while the CB hold
+ * a reference to the skb.
+ */
+ if (netlink_skb_is_mmaped(skb)) {
+ skb = skb_copy(skb, GFP_KERNEL);
+ if (skb == NULL) {
+ kfree(cb);
+ return -ENOBUFS;
+ }
+ } else
+ atomic_inc(&skb->users);
+
cb->dump = control->dump;
cb->done = control->done;
cb->nlh = nlh;
cb->data = control->data;
cb->module = control->module;
cb->min_dump_alloc = control->min_dump_alloc;
- atomic_inc(&skb->users);
cb->skb = skb;
sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
@@ -1850,7 +2557,8 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
if (err)
payload += nlmsg_len(nlh);
- skb = nlmsg_new(payload, GFP_KERNEL);
+ skb = netlink_alloc_skb(in_skb->sk, nlmsg_total_size(payload),
+ NETLINK_CB(in_skb).portid, GFP_KERNEL);
if (!skb) {
struct sock *sk;
@@ -2116,7 +2824,7 @@ static const struct proto_ops netlink_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = netlink_getname,
- .poll = datagram_poll,
+ .poll = netlink_poll,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -2124,7 +2832,7 @@ static const struct proto_ops netlink_ops = {
.getsockopt = netlink_getsockopt,
.sendmsg = netlink_sendmsg,
.recvmsg = netlink_recvmsg,
- .mmap = sock_no_mmap,
+ .mmap = netlink_mmap,
.sendpage = sock_no_sendpage,
};
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
new file mode 100644
index 000000000000..ed8522265f4e
--- /dev/null
+++ b/net/netlink/af_netlink.h
@@ -0,0 +1,82 @@
+#ifndef _AF_NETLINK_H
+#define _AF_NETLINK_H
+
+#include <net/sock.h>
+
+#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
+#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long))
+
+struct netlink_ring {
+ void **pg_vec;
+ unsigned int head;
+ unsigned int frames_per_block;
+ unsigned int frame_size;
+ unsigned int frame_max;
+
+ unsigned int pg_vec_order;
+ unsigned int pg_vec_pages;
+ unsigned int pg_vec_len;
+
+ atomic_t pending;
+};
+
+struct netlink_sock {
+ /* struct sock has to be the first member of netlink_sock */
+ struct sock sk;
+ u32 portid;
+ u32 dst_portid;
+ u32 dst_group;
+ u32 flags;
+ u32 subscriptions;
+ u32 ngroups;
+ unsigned long *groups;
+ unsigned long state;
+ wait_queue_head_t wait;
+ struct netlink_callback *cb;
+ struct mutex *cb_mutex;
+ struct mutex cb_def_mutex;
+ void (*netlink_rcv)(struct sk_buff *skb);
+ void (*netlink_bind)(int group);
+ struct module *module;
+#ifdef CONFIG_NETLINK_MMAP
+ struct mutex pg_vec_lock;
+ struct netlink_ring rx_ring;
+ struct netlink_ring tx_ring;
+ atomic_t mapped;
+#endif /* CONFIG_NETLINK_MMAP */
+};
+
+static inline struct netlink_sock *nlk_sk(struct sock *sk)
+{
+ return container_of(sk, struct netlink_sock, sk);
+}
+
+struct nl_portid_hash {
+ struct hlist_head *table;
+ unsigned long rehash_time;
+
+ unsigned int mask;
+ unsigned int shift;
+
+ unsigned int entries;
+ unsigned int max_shift;
+
+ u32 rnd;
+};
+
+struct netlink_table {
+ struct nl_portid_hash hash;
+ struct hlist_head mc_list;
+ struct listeners __rcu *listeners;
+ unsigned int flags;
+ unsigned int groups;
+ struct mutex *cb_mutex;
+ struct module *module;
+ void (*bind)(int group);
+ int registered;
+};
+
+extern struct netlink_table *nl_table;
+extern rwlock_t nl_table_lock;
+
+#endif
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
new file mode 100644
index 000000000000..1af29624b92f
--- /dev/null
+++ b/net/netlink/diag.c
@@ -0,0 +1,227 @@
+#include <linux/module.h>
+
+#include <net/sock.h>
+#include <linux/netlink.h>
+#include <linux/sock_diag.h>
+#include <linux/netlink_diag.h>
+
+#include "af_netlink.h"
+
+#ifdef CONFIG_NETLINK_MMAP
+static int sk_diag_put_ring(struct netlink_ring *ring, int nl_type,
+ struct sk_buff *nlskb)
+{
+ struct netlink_diag_ring ndr;
+
+ ndr.ndr_block_size = ring->pg_vec_pages << PAGE_SHIFT;
+ ndr.ndr_block_nr = ring->pg_vec_len;
+ ndr.ndr_frame_size = ring->frame_size;
+ ndr.ndr_frame_nr = ring->frame_max + 1;
+
+ return nla_put(nlskb, nl_type, sizeof(ndr), &ndr);
+}
+
+static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb)
+{
+ struct netlink_sock *nlk = nlk_sk(sk);
+ int ret;
+
+ mutex_lock(&nlk->pg_vec_lock);
+ ret = sk_diag_put_ring(&nlk->rx_ring, NETLINK_DIAG_RX_RING, nlskb);
+ if (!ret)
+ ret = sk_diag_put_ring(&nlk->tx_ring, NETLINK_DIAG_TX_RING,
+ nlskb);
+ mutex_unlock(&nlk->pg_vec_lock);
+
+ return ret;
+}
+#else
+static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb)
+{
+ return 0;
+}
+#endif
+
+static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb)
+{
+ struct netlink_sock *nlk = nlk_sk(sk);
+
+ if (nlk->groups == NULL)
+ return 0;
+
+ return nla_put(nlskb, NETLINK_DIAG_GROUPS, NLGRPSZ(nlk->ngroups),
+ nlk->groups);
+}
+
+static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
+ struct netlink_diag_req *req,
+ u32 portid, u32 seq, u32 flags, int sk_ino)
+{
+ struct nlmsghdr *nlh;
+ struct netlink_diag_msg *rep;
+ struct netlink_sock *nlk = nlk_sk(sk);
+
+ nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep),
+ flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ rep = nlmsg_data(nlh);
+ rep->ndiag_family = AF_NETLINK;
+ rep->ndiag_type = sk->sk_type;
+ rep->ndiag_protocol = sk->sk_protocol;
+ rep->ndiag_state = sk->sk_state;
+
+ rep->ndiag_ino = sk_ino;
+ rep->ndiag_portid = nlk->portid;
+ rep->ndiag_dst_portid = nlk->dst_portid;
+ rep->ndiag_dst_group = nlk->dst_group;
+ sock_diag_save_cookie(sk, rep->ndiag_cookie);
+
+ if ((req->ndiag_show & NDIAG_SHOW_GROUPS) &&
+ sk_diag_dump_groups(sk, skb))
+ goto out_nlmsg_trim;
+
+ if ((req->ndiag_show & NDIAG_SHOW_MEMINFO) &&
+ sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO))
+ goto out_nlmsg_trim;
+
+ if ((req->ndiag_show & NDIAG_SHOW_RING_CFG) &&
+ sk_diag_put_rings_cfg(sk, skb))
+ goto out_nlmsg_trim;
+
+ return nlmsg_end(skb, nlh);
+
+out_nlmsg_trim:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
+ int protocol, int s_num)
+{
+ struct netlink_table *tbl = &nl_table[protocol];
+ struct nl_portid_hash *hash = &tbl->hash;
+ struct net *net = sock_net(skb->sk);
+ struct netlink_diag_req *req;
+ struct sock *sk;
+ int ret = 0, num = 0, i;
+
+ req = nlmsg_data(cb->nlh);
+
+ for (i = 0; i <= hash->mask; i++) {
+ sk_for_each(sk, &hash->table[i]) {
+ if (!net_eq(sock_net(sk), net))
+ continue;
+ if (num < s_num) {
+ num++;
+ continue;
+ }
+
+ if (sk_diag_fill(sk, skb, req,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI,
+ sock_i_ino(sk)) < 0) {
+ ret = 1;
+ goto done;
+ }
+
+ num++;
+ }
+ }
+
+ sk_for_each_bound(sk, &tbl->mc_list) {
+ if (sk_hashed(sk))
+ continue;
+ if (!net_eq(sock_net(sk), net))
+ continue;
+ if (num < s_num) {
+ num++;
+ continue;
+ }
+
+ if (sk_diag_fill(sk, skb, req,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI,
+ sock_i_ino(sk)) < 0) {
+ ret = 1;
+ goto done;
+ }
+ num++;
+ }
+done:
+ cb->args[0] = num;
+ cb->args[1] = protocol;
+
+ return ret;
+}
+
+static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct netlink_diag_req *req;
+ int s_num = cb->args[0];
+
+ req = nlmsg_data(cb->nlh);
+
+ read_lock(&nl_table_lock);
+
+ if (req->sdiag_protocol == NDIAG_PROTO_ALL) {
+ int i;
+
+ for (i = cb->args[1]; i < MAX_LINKS; i++) {
+ if (__netlink_diag_dump(skb, cb, i, s_num))
+ break;
+ s_num = 0;
+ }
+ } else {
+ if (req->sdiag_protocol >= MAX_LINKS) {
+ read_unlock(&nl_table_lock);
+ return -ENOENT;
+ }
+
+ __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num);
+ }
+
+ read_unlock(&nl_table_lock);
+
+ return skb->len;
+}
+
+static int netlink_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
+{
+ int hdrlen = sizeof(struct netlink_diag_req);
+ struct net *net = sock_net(skb->sk);
+
+ if (nlmsg_len(h) < hdrlen)
+ return -EINVAL;
+
+ if (h->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = netlink_diag_dump,
+ };
+ return netlink_dump_start(net->diag_nlsk, skb, h, &c);
+ } else
+ return -EOPNOTSUPP;
+}
+
+static const struct sock_diag_handler netlink_diag_handler = {
+ .family = AF_NETLINK,
+ .dump = netlink_diag_handler_dump,
+};
+
+static int __init netlink_diag_init(void)
+{
+ return sock_diag_register(&netlink_diag_handler);
+}
+
+static void __exit netlink_diag_exit(void)
+{
+ sock_diag_unregister(&netlink_diag_handler);
+}
+
+module_init(netlink_diag_init);
+module_exit(netlink_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 16 /* AF_NETLINK */);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 5a55be3f17a5..2fd6dbea327a 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -16,10 +16,12 @@
#include <linux/skbuff.h>
#include <linux/mutex.h>
#include <linux/bitmap.h>
+#include <linux/rwsem.h>
#include <net/sock.h>
#include <net/genetlink.h>
static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */
+static DECLARE_RWSEM(cb_lock);
void genl_lock(void)
{
@@ -41,6 +43,18 @@ int lockdep_genl_is_held(void)
EXPORT_SYMBOL(lockdep_genl_is_held);
#endif
+static void genl_lock_all(void)
+{
+ down_write(&cb_lock);
+ genl_lock();
+}
+
+static void genl_unlock_all(void)
+{
+ genl_unlock();
+ up_write(&cb_lock);
+}
+
#define GENL_FAM_TAB_SIZE 16
#define GENL_FAM_TAB_MASK (GENL_FAM_TAB_SIZE - 1)
@@ -144,7 +158,7 @@ int genl_register_mc_group(struct genl_family *family,
BUG_ON(grp->name[0] == '\0');
BUG_ON(memchr(grp->name, '\0', GENL_NAMSIZ) == NULL);
- genl_lock();
+ genl_lock_all();
/* special-case our own group */
if (grp == &notify_grp)
@@ -213,7 +227,7 @@ int genl_register_mc_group(struct genl_family *family,
genl_ctrl_event(CTRL_CMD_NEWMCAST_GRP, grp);
out:
- genl_unlock();
+ genl_unlock_all();
return err;
}
EXPORT_SYMBOL(genl_register_mc_group);
@@ -255,9 +269,9 @@ static void __genl_unregister_mc_group(struct genl_family *family,
void genl_unregister_mc_group(struct genl_family *family,
struct genl_multicast_group *grp)
{
- genl_lock();
+ genl_lock_all();
__genl_unregister_mc_group(family, grp);
- genl_unlock();
+ genl_unlock_all();
}
EXPORT_SYMBOL(genl_unregister_mc_group);
@@ -303,9 +317,9 @@ int genl_register_ops(struct genl_family *family, struct genl_ops *ops)
if (ops->policy)
ops->flags |= GENL_CMD_CAP_HASPOL;
- genl_lock();
+ genl_lock_all();
list_add_tail(&ops->ops_list, &family->ops_list);
- genl_unlock();
+ genl_unlock_all();
genl_ctrl_event(CTRL_CMD_NEWOPS, ops);
err = 0;
@@ -334,16 +348,16 @@ int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops)
{
struct genl_ops *rc;
- genl_lock();
+ genl_lock_all();
list_for_each_entry(rc, &family->ops_list, ops_list) {
if (rc == ops) {
list_del(&ops->ops_list);
- genl_unlock();
+ genl_unlock_all();
genl_ctrl_event(CTRL_CMD_DELOPS, ops);
return 0;
}
}
- genl_unlock();
+ genl_unlock_all();
return -ENOENT;
}
@@ -373,7 +387,7 @@ int genl_register_family(struct genl_family *family)
INIT_LIST_HEAD(&family->ops_list);
INIT_LIST_HEAD(&family->mcast_groups);
- genl_lock();
+ genl_lock_all();
if (genl_family_find_byname(family->name)) {
err = -EEXIST;
@@ -394,7 +408,7 @@ int genl_register_family(struct genl_family *family)
goto errout_locked;
}
- if (family->maxattr) {
+ if (family->maxattr && !family->parallel_ops) {
family->attrbuf = kmalloc((family->maxattr+1) *
sizeof(struct nlattr *), GFP_KERNEL);
if (family->attrbuf == NULL) {
@@ -405,14 +419,14 @@ int genl_register_family(struct genl_family *family)
family->attrbuf = NULL;
list_add_tail(&family->family_list, genl_family_chain(family->id));
- genl_unlock();
+ genl_unlock_all();
genl_ctrl_event(CTRL_CMD_NEWFAMILY, family);
return 0;
errout_locked:
- genl_unlock();
+ genl_unlock_all();
errout:
return err;
}
@@ -476,7 +490,7 @@ int genl_unregister_family(struct genl_family *family)
{
struct genl_family *rc;
- genl_lock();
+ genl_lock_all();
genl_unregister_mc_groups(family);
@@ -486,14 +500,14 @@ int genl_unregister_family(struct genl_family *family)
list_del(&rc->family_list);
INIT_LIST_HEAD(&family->ops_list);
- genl_unlock();
+ genl_unlock_all();
kfree(family->attrbuf);
genl_ctrl_event(CTRL_CMD_DELFAMILY, family);
return 0;
}
- genl_unlock();
+ genl_unlock_all();
return -ENOENT;
}
@@ -530,19 +544,17 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
}
EXPORT_SYMBOL(genlmsg_put);
-static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int genl_family_rcv_msg(struct genl_family *family,
+ struct sk_buff *skb,
+ struct nlmsghdr *nlh)
{
struct genl_ops *ops;
- struct genl_family *family;
struct net *net = sock_net(skb->sk);
struct genl_info info;
struct genlmsghdr *hdr = nlmsg_data(nlh);
+ struct nlattr **attrbuf;
int hdrlen, err;
- family = genl_family_find_byid(nlh->nlmsg_type);
- if (family == NULL)
- return -ENOENT;
-
/* this family doesn't exist in this netns */
if (!family->netnsok && !net_eq(net, &init_net))
return -ENOENT;
@@ -560,29 +572,33 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
return -EPERM;
if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = ops->dumpit,
+ .done = ops->done,
+ };
+
if (ops->dumpit == NULL)
return -EOPNOTSUPP;
- genl_unlock();
- {
- struct netlink_dump_control c = {
- .dump = ops->dumpit,
- .done = ops->done,
- };
- err = netlink_dump_start(net->genl_sock, skb, nlh, &c);
- }
- genl_lock();
- return err;
+ return netlink_dump_start(net->genl_sock, skb, nlh, &c);
}
if (ops->doit == NULL)
return -EOPNOTSUPP;
- if (family->attrbuf) {
- err = nlmsg_parse(nlh, hdrlen, family->attrbuf, family->maxattr,
+ if (family->maxattr && family->parallel_ops) {
+ attrbuf = kmalloc((family->maxattr+1) *
+ sizeof(struct nlattr *), GFP_KERNEL);
+ if (attrbuf == NULL)
+ return -ENOMEM;
+ } else
+ attrbuf = family->attrbuf;
+
+ if (attrbuf) {
+ err = nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr,
ops->policy);
if (err < 0)
- return err;
+ goto out;
}
info.snd_seq = nlh->nlmsg_seq;
@@ -590,14 +606,14 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
info.nlhdr = nlh;
info.genlhdr = nlmsg_data(nlh);
info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
- info.attrs = family->attrbuf;
+ info.attrs = attrbuf;
genl_info_net_set(&info, net);
memset(&info.user_ptr, 0, sizeof(info.user_ptr));
if (family->pre_doit) {
err = family->pre_doit(ops, skb, &info);
if (err)
- return err;
+ goto out;
}
err = ops->doit(skb, &info);
@@ -605,14 +621,38 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (family->post_doit)
family->post_doit(ops, skb, &info);
+out:
+ if (family->parallel_ops)
+ kfree(attrbuf);
+
+ return err;
+}
+
+static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+ struct genl_family *family;
+ int err;
+
+ family = genl_family_find_byid(nlh->nlmsg_type);
+ if (family == NULL)
+ return -ENOENT;
+
+ if (!family->parallel_ops)
+ genl_lock();
+
+ err = genl_family_rcv_msg(family, skb, nlh);
+
+ if (!family->parallel_ops)
+ genl_unlock();
+
return err;
}
static void genl_rcv(struct sk_buff *skb)
{
- genl_lock();
+ down_read(&cb_lock);
netlink_rcv_skb(skb, &genl_rcv_msg);
- genl_unlock();
+ up_read(&cb_lock);
}
/**************************************************************************
@@ -918,7 +958,6 @@ static int __net_init genl_pernet_init(struct net *net)
{
struct netlink_kernel_cfg cfg = {
.input = genl_rcv,
- .cb_mutex = &genl_mutex,
.flags = NL_CFG_F_NONROOT_RECV,
};
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 103bd704b5fc..ec0c80fde69f 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -834,6 +834,8 @@ static int nr_getname(struct socket *sock, struct sockaddr *uaddr,
struct sock *sk = sock->sk;
struct nr_sock *nr = nr_sk(sk);
+ memset(&sax->fsa_ax25, 0, sizeof(struct sockaddr_ax25));
+
lock_sock(sk);
if (peer != 0) {
if (sk->sk_state != TCP_ESTABLISHED) {
diff --git a/net/nfc/Kconfig b/net/nfc/Kconfig
index 60c3bbb63e8e..5948b2fc72f6 100644
--- a/net/nfc/Kconfig
+++ b/net/nfc/Kconfig
@@ -4,6 +4,7 @@
menuconfig NFC
depends on NET
+ depends on RFKILL || !RFKILL
tristate "NFC subsystem support"
default n
help
@@ -15,6 +16,5 @@ menuconfig NFC
source "net/nfc/nci/Kconfig"
source "net/nfc/hci/Kconfig"
-source "net/nfc/llcp/Kconfig"
source "drivers/nfc/Kconfig"
diff --git a/net/nfc/Makefile b/net/nfc/Makefile
index d1a117c2c401..a76f4533cb6c 100644
--- a/net/nfc/Makefile
+++ b/net/nfc/Makefile
@@ -6,5 +6,6 @@ obj-$(CONFIG_NFC) += nfc.o
obj-$(CONFIG_NFC_NCI) += nci/
obj-$(CONFIG_NFC_HCI) += hci/
-nfc-objs := core.o netlink.o af_nfc.o rawsock.o
-nfc-$(CONFIG_NFC_LLCP) += llcp/llcp.o llcp/commands.o llcp/sock.o
+nfc-objs := core.o netlink.o af_nfc.o rawsock.o llcp_core.o llcp_commands.o \
+ llcp_sock.o
+
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 6ceee8e181ca..40d2527693da 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -27,6 +27,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/rfkill.h>
#include <linux/nfc.h>
#include <net/genetlink.h>
@@ -58,6 +59,11 @@ int nfc_dev_up(struct nfc_dev *dev)
device_lock(&dev->dev);
+ if (dev->rfkill && rfkill_blocked(dev->rfkill)) {
+ rc = -ERFKILL;
+ goto error;
+ }
+
if (!device_is_registered(&dev->dev)) {
rc = -ENODEV;
goto error;
@@ -117,6 +123,24 @@ error:
return rc;
}
+static int nfc_rfkill_set_block(void *data, bool blocked)
+{
+ struct nfc_dev *dev = data;
+
+ pr_debug("%s blocked %d", dev_name(&dev->dev), blocked);
+
+ if (!blocked)
+ return 0;
+
+ nfc_dev_down(dev);
+
+ return 0;
+}
+
+static const struct rfkill_ops nfc_rfkill_ops = {
+ .set_block = nfc_rfkill_set_block,
+};
+
/**
* nfc_start_poll - start polling for nfc targets
*
@@ -143,6 +167,11 @@ int nfc_start_poll(struct nfc_dev *dev, u32 im_protocols, u32 tm_protocols)
goto error;
}
+ if (!dev->dev_up) {
+ rc = -ENODEV;
+ goto error;
+ }
+
if (dev->polling) {
rc = -EBUSY;
goto error;
@@ -835,6 +864,15 @@ int nfc_register_device(struct nfc_dev *dev)
pr_debug("The userspace won't be notified that the device %s was added\n",
dev_name(&dev->dev));
+ dev->rfkill = rfkill_alloc(dev_name(&dev->dev), &dev->dev,
+ RFKILL_TYPE_NFC, &nfc_rfkill_ops, dev);
+ if (dev->rfkill) {
+ if (rfkill_register(dev->rfkill) < 0) {
+ rfkill_destroy(dev->rfkill);
+ dev->rfkill = NULL;
+ }
+ }
+
return 0;
}
EXPORT_SYMBOL(nfc_register_device);
@@ -852,6 +890,11 @@ void nfc_unregister_device(struct nfc_dev *dev)
id = dev->idx;
+ if (dev->rfkill) {
+ rfkill_unregister(dev->rfkill);
+ rfkill_destroy(dev->rfkill);
+ }
+
if (dev->ops->check_presence) {
device_lock(&dev->dev);
dev->shutting_down = true;
diff --git a/net/nfc/llcp/llcp.h b/net/nfc/llcp.h
index 0eae5c509504..ff8c434f7df8 100644
--- a/net/nfc/llcp/llcp.h
+++ b/net/nfc/llcp.h
@@ -31,6 +31,7 @@ enum llcp_state {
#define LLCP_MAX_LTO 0xff
#define LLCP_MAX_RW 15
#define LLCP_MAX_MIUX 0x7ff
+#define LLCP_MAX_MIU (LLCP_MAX_MIUX + 128)
#define LLCP_WKS_NUM_SAP 16
#define LLCP_SDP_NUM_SAP 16
@@ -46,6 +47,19 @@ struct llcp_sock_list {
rwlock_t lock;
};
+struct nfc_llcp_sdp_tlv {
+ u8 *tlv;
+ u8 tlv_len;
+
+ char *uri;
+ u8 tid;
+ u8 sap;
+
+ unsigned long time;
+
+ struct hlist_node node;
+};
+
struct nfc_llcp_local {
struct list_head list;
struct nfc_dev *dev;
@@ -86,6 +100,12 @@ struct nfc_llcp_local {
u8 remote_opt;
u16 remote_wks;
+ struct mutex sdreq_lock;
+ struct hlist_head pending_sdreqs;
+ struct timer_list sdreq_timer;
+ struct work_struct sdreq_timeout_work;
+ u8 sdreq_next_tid;
+
/* sockets array */
struct llcp_sock_list sockets;
struct llcp_sock_list connecting_sockets;
@@ -105,7 +125,12 @@ struct nfc_llcp_sock {
char *service_name;
size_t service_name_len;
u8 rw;
- u16 miu;
+ __be16 miux;
+
+
+ /* Remote link parameters */
+ u8 remote_rw;
+ u16 remote_miu;
/* Link variables */
u8 send_n;
@@ -138,6 +163,7 @@ struct nfc_llcp_ui_cb {
#define LLCP_HEADER_SIZE 2
#define LLCP_SEQUENCE_SIZE 1
+#define LLCP_AGF_PDU_HEADER_SIZE 2
/* LLCP versions: 1.1 is 1.0 plus SDP */
#define LLCP_VERSION_10 0x10
@@ -186,6 +212,7 @@ struct nfc_llcp_ui_cb {
void nfc_llcp_sock_link(struct llcp_sock_list *l, struct sock *s);
void nfc_llcp_sock_unlink(struct llcp_sock_list *l, struct sock *s);
+void nfc_llcp_socket_remote_param_init(struct nfc_llcp_sock *sock);
struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev);
struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local);
int nfc_llcp_local_put(struct nfc_llcp_local *local);
@@ -213,12 +240,20 @@ int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock,
/* Commands API */
void nfc_llcp_recv(void *data, struct sk_buff *skb, int err);
u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length);
+struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap);
+struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri,
+ size_t uri_len);
+void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp);
+void nfc_llcp_free_sdp_tlv_list(struct hlist_head *sdp_head);
void nfc_llcp_recv(void *data, struct sk_buff *skb, int err);
int nfc_llcp_disconnect(struct nfc_llcp_sock *sock);
int nfc_llcp_send_symm(struct nfc_dev *dev);
int nfc_llcp_send_connect(struct nfc_llcp_sock *sock);
int nfc_llcp_send_cc(struct nfc_llcp_sock *sock);
-int nfc_llcp_send_snl(struct nfc_llcp_local *local, u8 tid, u8 sap);
+int nfc_llcp_send_snl_sdres(struct nfc_llcp_local *local,
+ struct hlist_head *tlv_list, size_t tlvs_len);
+int nfc_llcp_send_snl_sdreq(struct nfc_llcp_local *local,
+ struct hlist_head *tlv_list, size_t tlvs_len);
int nfc_llcp_send_dm(struct nfc_llcp_local *local, u8 ssap, u8 dsap, u8 reason);
int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock);
int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
diff --git a/net/nfc/llcp/Kconfig b/net/nfc/llcp/Kconfig
deleted file mode 100644
index a1a41cd68255..000000000000
--- a/net/nfc/llcp/Kconfig
+++ /dev/null
@@ -1,7 +0,0 @@
-config NFC_LLCP
- depends on NFC
- bool "NFC LLCP support"
- default n
- help
- Say Y here if you want to build support for a kernel NFC LLCP
- implementation. \ No newline at end of file
diff --git a/net/nfc/llcp/commands.c b/net/nfc/llcp_commands.c
index c6bc3bd95052..c1b23eef83ca 100644
--- a/net/nfc/llcp/commands.c
+++ b/net/nfc/llcp_commands.c
@@ -26,7 +26,7 @@
#include <net/nfc/nfc.h>
-#include "../nfc.h"
+#include "nfc.h"
#include "llcp.h"
static u8 llcp_tlv_length[LLCP_TLV_MAX] = {
@@ -117,6 +117,88 @@ u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length)
return tlv;
}
+struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap)
+{
+ struct nfc_llcp_sdp_tlv *sdres;
+ u8 value[2];
+
+ sdres = kzalloc(sizeof(struct nfc_llcp_sdp_tlv), GFP_KERNEL);
+ if (sdres == NULL)
+ return NULL;
+
+ value[0] = tid;
+ value[1] = sap;
+
+ sdres->tlv = nfc_llcp_build_tlv(LLCP_TLV_SDRES, value, 2,
+ &sdres->tlv_len);
+ if (sdres->tlv == NULL) {
+ kfree(sdres);
+ return NULL;
+ }
+
+ sdres->tid = tid;
+ sdres->sap = sap;
+
+ INIT_HLIST_NODE(&sdres->node);
+
+ return sdres;
+}
+
+struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri,
+ size_t uri_len)
+{
+ struct nfc_llcp_sdp_tlv *sdreq;
+
+ pr_debug("uri: %s, len: %zu\n", uri, uri_len);
+
+ sdreq = kzalloc(sizeof(struct nfc_llcp_sdp_tlv), GFP_KERNEL);
+ if (sdreq == NULL)
+ return NULL;
+
+ sdreq->tlv_len = uri_len + 3;
+
+ if (uri[uri_len - 1] == 0)
+ sdreq->tlv_len--;
+
+ sdreq->tlv = kzalloc(sdreq->tlv_len + 1, GFP_KERNEL);
+ if (sdreq->tlv == NULL) {
+ kfree(sdreq);
+ return NULL;
+ }
+
+ sdreq->tlv[0] = LLCP_TLV_SDREQ;
+ sdreq->tlv[1] = sdreq->tlv_len - 2;
+ sdreq->tlv[2] = tid;
+
+ sdreq->tid = tid;
+ sdreq->uri = sdreq->tlv + 3;
+ memcpy(sdreq->uri, uri, uri_len);
+
+ sdreq->time = jiffies;
+
+ INIT_HLIST_NODE(&sdreq->node);
+
+ return sdreq;
+}
+
+void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp)
+{
+ kfree(sdp->tlv);
+ kfree(sdp);
+}
+
+void nfc_llcp_free_sdp_tlv_list(struct hlist_head *head)
+{
+ struct nfc_llcp_sdp_tlv *sdp;
+ struct hlist_node *n;
+
+ hlist_for_each_entry_safe(sdp, n, head, node) {
+ hlist_del(&sdp->node);
+
+ nfc_llcp_free_sdp_tlv(sdp);
+ }
+}
+
int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local,
u8 *tlv_array, u16 tlv_array_len)
{
@@ -184,10 +266,10 @@ int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock,
switch (type) {
case LLCP_TLV_MIUX:
- sock->miu = llcp_tlv_miux(tlv) + 128;
+ sock->remote_miu = llcp_tlv_miux(tlv) + 128;
break;
case LLCP_TLV_RW:
- sock->rw = llcp_tlv_rw(tlv);
+ sock->remote_rw = llcp_tlv_rw(tlv);
break;
case LLCP_TLV_SN:
break;
@@ -200,7 +282,8 @@ int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock,
tlv += length + 2;
}
- pr_debug("sock %p rw %d miu %d\n", sock, sock->rw, sock->miu);
+ pr_debug("sock %p rw %d miu %d\n", sock,
+ sock->remote_rw, sock->remote_miu);
return 0;
}
@@ -318,9 +401,9 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock)
struct sk_buff *skb;
u8 *service_name_tlv = NULL, service_name_tlv_length;
u8 *miux_tlv = NULL, miux_tlv_length;
- u8 *rw_tlv = NULL, rw_tlv_length;
+ u8 *rw_tlv = NULL, rw_tlv_length, rw;
int err;
- u16 size = 0;
+ u16 size = 0, miux;
pr_debug("Sending CONNECT\n");
@@ -336,11 +419,16 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock)
size += service_name_tlv_length;
}
- miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0,
+ /* If the socket parameters are not set, use the local ones */
+ miux = be16_to_cpu(sock->miux) > LLCP_MAX_MIUX ?
+ local->miux : sock->miux;
+ rw = sock->rw > LLCP_MAX_RW ? local->rw : sock->rw;
+
+ miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0,
&miux_tlv_length);
size += miux_tlv_length;
- rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &local->rw, 0, &rw_tlv_length);
+ rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length);
size += rw_tlv_length;
pr_debug("SKB size %d SN length %zu\n", size, sock->service_name_len);
@@ -377,9 +465,9 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock)
struct nfc_llcp_local *local;
struct sk_buff *skb;
u8 *miux_tlv = NULL, miux_tlv_length;
- u8 *rw_tlv = NULL, rw_tlv_length;
+ u8 *rw_tlv = NULL, rw_tlv_length, rw;
int err;
- u16 size = 0;
+ u16 size = 0, miux;
pr_debug("Sending CC\n");
@@ -387,11 +475,16 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock)
if (local == NULL)
return -ENODEV;
- miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0,
+ /* If the socket parameters are not set, use the local ones */
+ miux = be16_to_cpu(sock->miux) > LLCP_MAX_MIUX ?
+ local->miux : sock->miux;
+ rw = sock->rw > LLCP_MAX_RW ? local->rw : sock->rw;
+
+ miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0,
&miux_tlv_length);
size += miux_tlv_length;
- rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &local->rw, 0, &rw_tlv_length);
+ rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length);
size += rw_tlv_length;
skb = llcp_allocate_pdu(sock, LLCP_PDU_CC, size);
@@ -416,48 +509,90 @@ error_tlv:
return err;
}
-int nfc_llcp_send_snl(struct nfc_llcp_local *local, u8 tid, u8 sap)
+static struct sk_buff *nfc_llcp_allocate_snl(struct nfc_llcp_local *local,
+ size_t tlv_length)
{
struct sk_buff *skb;
struct nfc_dev *dev;
- u8 *sdres_tlv = NULL, sdres_tlv_length, sdres[2];
u16 size = 0;
- pr_debug("Sending SNL tid 0x%x sap 0x%x\n", tid, sap);
-
if (local == NULL)
- return -ENODEV;
+ return ERR_PTR(-ENODEV);
dev = local->dev;
if (dev == NULL)
- return -ENODEV;
-
- sdres[0] = tid;
- sdres[1] = sap;
- sdres_tlv = nfc_llcp_build_tlv(LLCP_TLV_SDRES, sdres, 0,
- &sdres_tlv_length);
- if (sdres_tlv == NULL)
- return -ENOMEM;
+ return ERR_PTR(-ENODEV);
size += LLCP_HEADER_SIZE;
size += dev->tx_headroom + dev->tx_tailroom + NFC_HEADER_SIZE;
- size += sdres_tlv_length;
+ size += tlv_length;
skb = alloc_skb(size, GFP_KERNEL);
- if (skb == NULL) {
- kfree(sdres_tlv);
- return -ENOMEM;
- }
+ if (skb == NULL)
+ return ERR_PTR(-ENOMEM);
skb_reserve(skb, dev->tx_headroom + NFC_HEADER_SIZE);
skb = llcp_add_header(skb, LLCP_SAP_SDP, LLCP_SAP_SDP, LLCP_PDU_SNL);
- memcpy(skb_put(skb, sdres_tlv_length), sdres_tlv, sdres_tlv_length);
+ return skb;
+}
+
+int nfc_llcp_send_snl_sdres(struct nfc_llcp_local *local,
+ struct hlist_head *tlv_list, size_t tlvs_len)
+{
+ struct nfc_llcp_sdp_tlv *sdp;
+ struct hlist_node *n;
+ struct sk_buff *skb;
+
+ skb = nfc_llcp_allocate_snl(local, tlvs_len);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ hlist_for_each_entry_safe(sdp, n, tlv_list, node) {
+ memcpy(skb_put(skb, sdp->tlv_len), sdp->tlv, sdp->tlv_len);
+
+ hlist_del(&sdp->node);
+
+ nfc_llcp_free_sdp_tlv(sdp);
+ }
skb_queue_tail(&local->tx_queue, skb);
- kfree(sdres_tlv);
+ return 0;
+}
+
+int nfc_llcp_send_snl_sdreq(struct nfc_llcp_local *local,
+ struct hlist_head *tlv_list, size_t tlvs_len)
+{
+ struct nfc_llcp_sdp_tlv *sdreq;
+ struct hlist_node *n;
+ struct sk_buff *skb;
+
+ skb = nfc_llcp_allocate_snl(local, tlvs_len);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ mutex_lock(&local->sdreq_lock);
+
+ if (hlist_empty(&local->pending_sdreqs))
+ mod_timer(&local->sdreq_timer,
+ jiffies + msecs_to_jiffies(3 * local->remote_lto));
+
+ hlist_for_each_entry_safe(sdreq, n, tlv_list, node) {
+ pr_debug("tid %d for %s\n", sdreq->tid, sdreq->uri);
+
+ memcpy(skb_put(skb, sdreq->tlv_len), sdreq->tlv,
+ sdreq->tlv_len);
+
+ hlist_del(&sdreq->node);
+
+ hlist_add_head(&sdreq->node, &local->pending_sdreqs);
+ }
+
+ mutex_unlock(&local->sdreq_lock);
+
+ skb_queue_tail(&local->tx_queue, skb);
return 0;
}
@@ -523,6 +658,7 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
struct nfc_llcp_local *local;
size_t frag_len = 0, remaining_len;
u8 *msg_data, *msg_ptr;
+ u16 remote_miu;
pr_debug("Send I frame len %zd\n", len);
@@ -532,8 +668,8 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
/* Remote is ready but has not acknowledged our frames */
if((sock->remote_ready &&
- skb_queue_len(&sock->tx_pending_queue) >= sock->rw &&
- skb_queue_len(&sock->tx_queue) >= 2 * sock->rw)) {
+ skb_queue_len(&sock->tx_pending_queue) >= sock->remote_rw &&
+ skb_queue_len(&sock->tx_queue) >= 2 * sock->remote_rw)) {
pr_err("Pending queue is full %d frames\n",
skb_queue_len(&sock->tx_pending_queue));
return -ENOBUFS;
@@ -541,7 +677,7 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
/* Remote is not ready and we've been queueing enough frames */
if ((!sock->remote_ready &&
- skb_queue_len(&sock->tx_queue) >= 2 * sock->rw)) {
+ skb_queue_len(&sock->tx_queue) >= 2 * sock->remote_rw)) {
pr_err("Tx queue is full %d frames\n",
skb_queue_len(&sock->tx_queue));
return -ENOBUFS;
@@ -559,9 +695,11 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
remaining_len = len;
msg_ptr = msg_data;
- while (remaining_len > 0) {
+ do {
+ remote_miu = sock->remote_miu > LLCP_MAX_MIU ?
+ local->remote_miu : sock->remote_miu;
- frag_len = min_t(size_t, sock->miu, remaining_len);
+ frag_len = min_t(size_t, remote_miu, remaining_len);
pr_debug("Fragment %zd bytes remaining %zd",
frag_len, remaining_len);
@@ -573,7 +711,8 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
skb_put(pdu, LLCP_SEQUENCE_SIZE);
- memcpy(skb_put(pdu, frag_len), msg_ptr, frag_len);
+ if (likely(frag_len > 0))
+ memcpy(skb_put(pdu, frag_len), msg_ptr, frag_len);
skb_queue_tail(&sock->tx_queue, pdu);
@@ -585,7 +724,7 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
remaining_len -= frag_len;
msg_ptr += frag_len;
- }
+ } while (remaining_len > 0);
kfree(msg_data);
@@ -599,6 +738,7 @@ int nfc_llcp_send_ui_frame(struct nfc_llcp_sock *sock, u8 ssap, u8 dsap,
struct nfc_llcp_local *local;
size_t frag_len = 0, remaining_len;
u8 *msg_ptr, *msg_data;
+ u16 remote_miu;
int err;
pr_debug("Send UI frame len %zd\n", len);
@@ -619,9 +759,11 @@ int nfc_llcp_send_ui_frame(struct nfc_llcp_sock *sock, u8 ssap, u8 dsap,
remaining_len = len;
msg_ptr = msg_data;
- while (remaining_len > 0) {
+ do {
+ remote_miu = sock->remote_miu > LLCP_MAX_MIU ?
+ local->remote_miu : sock->remote_miu;
- frag_len = min_t(size_t, sock->miu, remaining_len);
+ frag_len = min_t(size_t, remote_miu, remaining_len);
pr_debug("Fragment %zd bytes remaining %zd",
frag_len, remaining_len);
@@ -635,14 +777,15 @@ int nfc_llcp_send_ui_frame(struct nfc_llcp_sock *sock, u8 ssap, u8 dsap,
pdu = llcp_add_header(pdu, dsap, ssap, LLCP_PDU_UI);
- memcpy(skb_put(pdu, frag_len), msg_ptr, frag_len);
+ if (likely(frag_len > 0))
+ memcpy(skb_put(pdu, frag_len), msg_ptr, frag_len);
/* No need to check for the peer RW for UI frames */
skb_queue_tail(&local->tx_queue, pdu);
remaining_len -= frag_len;
msg_ptr += frag_len;
- }
+ } while (remaining_len > 0);
kfree(msg_data);
diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp_core.c
index ee25f25f0cd6..158bdbf668cc 100644
--- a/net/nfc/llcp/llcp.c
+++ b/net/nfc/llcp_core.c
@@ -24,13 +24,15 @@
#include <linux/list.h>
#include <linux/nfc.h>
-#include "../nfc.h"
+#include "nfc.h"
#include "llcp.h"
static u8 llcp_magic[3] = {0x46, 0x66, 0x6d};
static struct list_head llcp_devices;
+static void nfc_llcp_rx_skb(struct nfc_llcp_local *local, struct sk_buff *skb);
+
void nfc_llcp_sock_link(struct llcp_sock_list *l, struct sock *sk)
{
write_lock(&l->lock);
@@ -45,6 +47,12 @@ void nfc_llcp_sock_unlink(struct llcp_sock_list *l, struct sock *sk)
write_unlock(&l->lock);
}
+void nfc_llcp_socket_remote_param_init(struct nfc_llcp_sock *sock)
+{
+ sock->remote_rw = LLCP_DEFAULT_RW;
+ sock->remote_miu = LLCP_MAX_MIU + 1;
+}
+
static void nfc_llcp_socket_purge(struct nfc_llcp_sock *sock)
{
struct nfc_llcp_local *local = sock->local;
@@ -68,7 +76,7 @@ static void nfc_llcp_socket_purge(struct nfc_llcp_sock *sock)
}
}
-static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen,
+static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool device,
int err)
{
struct sock *sk;
@@ -108,21 +116,6 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen,
bh_unlock_sock(accept_sk);
}
-
- if (listen == true) {
- bh_unlock_sock(sk);
- continue;
- }
- }
-
- /*
- * If we have a connection less socket bound, we keep it alive
- * if the device is still present.
- */
- if (sk->sk_state == LLCP_BOUND && sk->sk_type == SOCK_DGRAM &&
- listen == true) {
- bh_unlock_sock(sk);
- continue;
}
if (err)
@@ -137,11 +130,8 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen,
write_unlock(&local->sockets.lock);
- /*
- * If we want to keep the listening sockets alive,
- * we don't touch the RAW ones.
- */
- if (listen == true)
+ /* If we still have a device, we keep the RAW sockets alive */
+ if (device == true)
return;
write_lock(&local->raw_sockets.lock);
@@ -173,15 +163,18 @@ struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local)
return local;
}
-static void local_cleanup(struct nfc_llcp_local *local, bool listen)
+static void local_cleanup(struct nfc_llcp_local *local)
{
- nfc_llcp_socket_release(local, listen, ENXIO);
+ nfc_llcp_socket_release(local, false, ENXIO);
del_timer_sync(&local->link_timer);
skb_queue_purge(&local->tx_queue);
cancel_work_sync(&local->tx_work);
cancel_work_sync(&local->rx_work);
cancel_work_sync(&local->timeout_work);
kfree_skb(local->rx_pending);
+ del_timer_sync(&local->sdreq_timer);
+ cancel_work_sync(&local->sdreq_timeout_work);
+ nfc_llcp_free_sdp_tlv_list(&local->pending_sdreqs);
}
static void local_release(struct kref *ref)
@@ -191,7 +184,7 @@ static void local_release(struct kref *ref)
local = container_of(ref, struct nfc_llcp_local, ref);
list_del(&local->list);
- local_cleanup(local, false);
+ local_cleanup(local);
kfree(local);
}
@@ -259,6 +252,47 @@ static void nfc_llcp_symm_timer(unsigned long data)
schedule_work(&local->timeout_work);
}
+static void nfc_llcp_sdreq_timeout_work(struct work_struct *work)
+{
+ unsigned long time;
+ HLIST_HEAD(nl_sdres_list);
+ struct hlist_node *n;
+ struct nfc_llcp_sdp_tlv *sdp;
+ struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local,
+ sdreq_timeout_work);
+
+ mutex_lock(&local->sdreq_lock);
+
+ time = jiffies - msecs_to_jiffies(3 * local->remote_lto);
+
+ hlist_for_each_entry_safe(sdp, n, &local->pending_sdreqs, node) {
+ if (time_after(sdp->time, time))
+ continue;
+
+ sdp->sap = LLCP_SDP_UNBOUND;
+
+ hlist_del(&sdp->node);
+
+ hlist_add_head(&sdp->node, &nl_sdres_list);
+ }
+
+ if (!hlist_empty(&local->pending_sdreqs))
+ mod_timer(&local->sdreq_timer,
+ jiffies + msecs_to_jiffies(3 * local->remote_lto));
+
+ mutex_unlock(&local->sdreq_lock);
+
+ if (!hlist_empty(&nl_sdres_list))
+ nfc_genl_llc_send_sdres(local->dev, &nl_sdres_list);
+}
+
+static void nfc_llcp_sdreq_timer(unsigned long data)
+{
+ struct nfc_llcp_local *local = (struct nfc_llcp_local *) data;
+
+ schedule_work(&local->sdreq_timeout_work);
+}
+
struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev)
{
struct nfc_llcp_local *local, *n;
@@ -802,8 +836,6 @@ static void nfc_llcp_recv_ui(struct nfc_llcp_local *local,
ui_cb->dsap = dsap;
ui_cb->ssap = ssap;
- printk("%s %d %d\n", __func__, dsap, ssap);
-
pr_debug("%d %d\n", dsap, ssap);
/* We're looking for a bound socket, not a client one */
@@ -900,7 +932,9 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local,
new_sock = nfc_llcp_sock(new_sk);
new_sock->dev = local->dev;
new_sock->local = nfc_llcp_local_get(local);
- new_sock->miu = local->remote_miu;
+ new_sock->rw = sock->rw;
+ new_sock->miux = sock->miux;
+ new_sock->remote_miu = local->remote_miu;
new_sock->nfc_protocol = sock->nfc_protocol;
new_sock->dsap = ssap;
new_sock->target_idx = local->target_idx;
@@ -954,11 +988,11 @@ int nfc_llcp_queue_i_frames(struct nfc_llcp_sock *sock)
pr_debug("Remote ready %d tx queue len %d remote rw %d",
sock->remote_ready, skb_queue_len(&sock->tx_pending_queue),
- sock->rw);
+ sock->remote_rw);
/* Try to queue some I frames for transmission */
while (sock->remote_ready &&
- skb_queue_len(&sock->tx_pending_queue) < sock->rw) {
+ skb_queue_len(&sock->tx_pending_queue) < sock->remote_rw) {
struct sk_buff *pdu;
pdu = skb_dequeue(&sock->tx_queue);
@@ -1072,6 +1106,12 @@ static void nfc_llcp_recv_disc(struct nfc_llcp_local *local,
dsap = nfc_llcp_dsap(skb);
ssap = nfc_llcp_ssap(skb);
+ if ((dsap == 0) && (ssap == 0)) {
+ pr_debug("Connection termination");
+ nfc_dep_link_down(local->dev);
+ return;
+ }
+
llcp_sock = nfc_llcp_sock_get(local, dsap, ssap);
if (llcp_sock == NULL) {
nfc_llcp_send_dm(local, dsap, ssap, LLCP_DM_NOCONN);
@@ -1178,6 +1218,10 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
u16 tlv_len, offset;
char *service_name;
size_t service_name_len;
+ struct nfc_llcp_sdp_tlv *sdp;
+ HLIST_HEAD(llc_sdres_list);
+ size_t sdres_tlvs_len;
+ HLIST_HEAD(nl_sdres_list);
dsap = nfc_llcp_dsap(skb);
ssap = nfc_llcp_ssap(skb);
@@ -1192,6 +1236,7 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
tlv = &skb->data[LLCP_HEADER_SIZE];
tlv_len = skb->len - LLCP_HEADER_SIZE;
offset = 0;
+ sdres_tlvs_len = 0;
while (offset < tlv_len) {
type = tlv[0];
@@ -1209,14 +1254,14 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
!strncmp(service_name, "urn:nfc:sn:sdp",
service_name_len)) {
sap = 1;
- goto send_snl;
+ goto add_snl;
}
llcp_sock = nfc_llcp_sock_from_sn(local, service_name,
service_name_len);
if (!llcp_sock) {
sap = 0;
- goto send_snl;
+ goto add_snl;
}
/*
@@ -1233,7 +1278,7 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
if (sap == LLCP_SAP_MAX) {
sap = 0;
- goto send_snl;
+ goto add_snl;
}
client_count =
@@ -1250,8 +1295,37 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
pr_debug("%p %d\n", llcp_sock, sap);
-send_snl:
- nfc_llcp_send_snl(local, tid, sap);
+add_snl:
+ sdp = nfc_llcp_build_sdres_tlv(tid, sap);
+ if (sdp == NULL)
+ goto exit;
+
+ sdres_tlvs_len += sdp->tlv_len;
+ hlist_add_head(&sdp->node, &llc_sdres_list);
+ break;
+
+ case LLCP_TLV_SDRES:
+ mutex_lock(&local->sdreq_lock);
+
+ pr_debug("LLCP_TLV_SDRES: searching tid %d\n", tlv[2]);
+
+ hlist_for_each_entry(sdp, &local->pending_sdreqs, node) {
+ if (sdp->tid != tlv[2])
+ continue;
+
+ sdp->sap = tlv[3];
+
+ pr_debug("Found: uri=%s, sap=%d\n",
+ sdp->uri, sdp->sap);
+
+ hlist_del(&sdp->node);
+
+ hlist_add_head(&sdp->node, &nl_sdres_list);
+
+ break;
+ }
+
+ mutex_unlock(&local->sdreq_lock);
break;
default:
@@ -1262,21 +1336,63 @@ send_snl:
offset += length + 2;
tlv += length + 2;
}
+
+exit:
+ if (!hlist_empty(&nl_sdres_list))
+ nfc_genl_llc_send_sdres(local->dev, &nl_sdres_list);
+
+ if (!hlist_empty(&llc_sdres_list))
+ nfc_llcp_send_snl_sdres(local, &llc_sdres_list, sdres_tlvs_len);
}
-static void nfc_llcp_rx_work(struct work_struct *work)
+static void nfc_llcp_recv_agf(struct nfc_llcp_local *local, struct sk_buff *skb)
{
- struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local,
- rx_work);
- u8 dsap, ssap, ptype;
- struct sk_buff *skb;
+ u8 ptype;
+ u16 pdu_len;
+ struct sk_buff *new_skb;
- skb = local->rx_pending;
- if (skb == NULL) {
- pr_debug("No pending SKB\n");
+ if (skb->len <= LLCP_HEADER_SIZE) {
+ pr_err("Malformed AGF PDU\n");
return;
}
+ skb_pull(skb, LLCP_HEADER_SIZE);
+
+ while (skb->len > LLCP_AGF_PDU_HEADER_SIZE) {
+ pdu_len = skb->data[0] << 8 | skb->data[1];
+
+ skb_pull(skb, LLCP_AGF_PDU_HEADER_SIZE);
+
+ if (pdu_len < LLCP_HEADER_SIZE || pdu_len > skb->len) {
+ pr_err("Malformed AGF PDU\n");
+ return;
+ }
+
+ ptype = nfc_llcp_ptype(skb);
+
+ if (ptype == LLCP_PDU_SYMM || ptype == LLCP_PDU_AGF)
+ goto next;
+
+ new_skb = nfc_alloc_recv_skb(pdu_len, GFP_KERNEL);
+ if (new_skb == NULL) {
+ pr_err("Could not allocate PDU\n");
+ return;
+ }
+
+ memcpy(skb_put(new_skb, pdu_len), skb->data, pdu_len);
+
+ nfc_llcp_rx_skb(local, new_skb);
+
+ kfree_skb(new_skb);
+next:
+ skb_pull(skb, pdu_len);
+ }
+}
+
+static void nfc_llcp_rx_skb(struct nfc_llcp_local *local, struct sk_buff *skb)
+{
+ u8 dsap, ssap, ptype;
+
ptype = nfc_llcp_ptype(skb);
dsap = nfc_llcp_dsap(skb);
ssap = nfc_llcp_ssap(skb);
@@ -1287,10 +1403,6 @@ static void nfc_llcp_rx_work(struct work_struct *work)
print_hex_dump(KERN_DEBUG, "LLCP Rx: ", DUMP_PREFIX_OFFSET,
16, 1, skb->data, skb->len, true);
- __net_timestamp(skb);
-
- nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_RX);
-
switch (ptype) {
case LLCP_PDU_SYMM:
pr_debug("SYMM\n");
@@ -1333,8 +1445,31 @@ static void nfc_llcp_rx_work(struct work_struct *work)
nfc_llcp_recv_hdlc(local, skb);
break;
+ case LLCP_PDU_AGF:
+ pr_debug("AGF frame\n");
+ nfc_llcp_recv_agf(local, skb);
+ break;
+ }
+}
+
+static void nfc_llcp_rx_work(struct work_struct *work)
+{
+ struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local,
+ rx_work);
+ struct sk_buff *skb;
+
+ skb = local->rx_pending;
+ if (skb == NULL) {
+ pr_debug("No pending SKB\n");
+ return;
}
+ __net_timestamp(skb);
+
+ nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_RX);
+
+ nfc_llcp_rx_skb(local, skb);
+
schedule_work(&local->tx_work);
kfree_skb(local->rx_pending);
local->rx_pending = NULL;
@@ -1381,6 +1516,9 @@ void nfc_llcp_mac_is_down(struct nfc_dev *dev)
if (local == NULL)
return;
+ local->remote_miu = LLCP_DEFAULT_MIU;
+ local->remote_lto = LLCP_DEFAULT_LTO;
+
/* Close and purge all existing sockets */
nfc_llcp_socket_release(local, true, 0);
}
@@ -1447,6 +1585,13 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
local->remote_miu = LLCP_DEFAULT_MIU;
local->remote_lto = LLCP_DEFAULT_LTO;
+ mutex_init(&local->sdreq_lock);
+ INIT_HLIST_HEAD(&local->pending_sdreqs);
+ init_timer(&local->sdreq_timer);
+ local->sdreq_timer.data = (unsigned long) local;
+ local->sdreq_timer.function = nfc_llcp_sdreq_timer;
+ INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work);
+
list_add(&local->list, &llcp_devices);
return 0;
@@ -1461,7 +1606,7 @@ void nfc_llcp_unregister_device(struct nfc_dev *dev)
return;
}
- local_cleanup(local, false);
+ local_cleanup(local);
nfc_llcp_local_put(local);
}
diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp_sock.c
index 6c94447ec414..380253eccb74 100644
--- a/net/nfc/llcp/sock.c
+++ b/net/nfc/llcp_sock.c
@@ -24,7 +24,7 @@
#include <linux/module.h>
#include <linux/nfc.h>
-#include "../nfc.h"
+#include "nfc.h"
#include "llcp.h"
static int sock_wait_state(struct sock *sk, int state, unsigned long timeo)
@@ -223,6 +223,156 @@ error:
return ret;
}
+static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, unsigned int optlen)
+{
+ struct sock *sk = sock->sk;
+ struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
+ u32 opt;
+ int err = 0;
+
+ pr_debug("%p optname %d\n", sk, optname);
+
+ if (level != SOL_NFC)
+ return -ENOPROTOOPT;
+
+ lock_sock(sk);
+
+ switch (optname) {
+ case NFC_LLCP_RW:
+ if (sk->sk_state == LLCP_CONNECTED ||
+ sk->sk_state == LLCP_BOUND ||
+ sk->sk_state == LLCP_LISTEN) {
+ err = -EINVAL;
+ break;
+ }
+
+ if (get_user(opt, (u32 __user *) optval)) {
+ err = -EFAULT;
+ break;
+ }
+
+ if (opt > LLCP_MAX_RW) {
+ err = -EINVAL;
+ break;
+ }
+
+ llcp_sock->rw = (u8) opt;
+
+ break;
+
+ case NFC_LLCP_MIUX:
+ if (sk->sk_state == LLCP_CONNECTED ||
+ sk->sk_state == LLCP_BOUND ||
+ sk->sk_state == LLCP_LISTEN) {
+ err = -EINVAL;
+ break;
+ }
+
+ if (get_user(opt, (u32 __user *) optval)) {
+ err = -EFAULT;
+ break;
+ }
+
+ if (opt > LLCP_MAX_MIUX) {
+ err = -EINVAL;
+ break;
+ }
+
+ llcp_sock->miux = cpu_to_be16((u16) opt);
+
+ break;
+
+ default:
+ err = -ENOPROTOOPT;
+ break;
+ }
+
+ release_sock(sk);
+
+ pr_debug("%p rw %d miux %d\n", llcp_sock,
+ llcp_sock->rw, llcp_sock->miux);
+
+ return err;
+}
+
+static int nfc_llcp_getsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ struct nfc_llcp_local *local;
+ struct sock *sk = sock->sk;
+ struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
+ int len, err = 0;
+ u16 miux, remote_miu;
+ u8 rw;
+
+ pr_debug("%p optname %d\n", sk, optname);
+
+ if (level != SOL_NFC)
+ return -ENOPROTOOPT;
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ local = llcp_sock->local;
+ if (!local)
+ return -ENODEV;
+
+ len = min_t(u32, len, sizeof(u32));
+
+ lock_sock(sk);
+
+ switch (optname) {
+ case NFC_LLCP_RW:
+ rw = llcp_sock->rw > LLCP_MAX_RW ? local->rw : llcp_sock->rw;
+ if (put_user(rw, (u32 __user *) optval))
+ err = -EFAULT;
+
+ break;
+
+ case NFC_LLCP_MIUX:
+ miux = be16_to_cpu(llcp_sock->miux) > LLCP_MAX_MIUX ?
+ be16_to_cpu(local->miux) : be16_to_cpu(llcp_sock->miux);
+
+ if (put_user(miux, (u32 __user *) optval))
+ err = -EFAULT;
+
+ break;
+
+ case NFC_LLCP_REMOTE_MIU:
+ remote_miu = llcp_sock->remote_miu > LLCP_MAX_MIU ?
+ local->remote_miu : llcp_sock->remote_miu;
+
+ if (put_user(remote_miu, (u32 __user *) optval))
+ err = -EFAULT;
+
+ break;
+
+ case NFC_LLCP_REMOTE_LTO:
+ if (put_user(local->remote_lto / 10, (u32 __user *) optval))
+ err = -EFAULT;
+
+ break;
+
+ case NFC_LLCP_REMOTE_RW:
+ if (put_user(llcp_sock->remote_rw, (u32 __user *) optval))
+ err = -EFAULT;
+
+ break;
+
+ default:
+ err = -ENOPROTOOPT;
+ break;
+ }
+
+ release_sock(sk);
+
+ if (put_user(len, optlen))
+ return -EFAULT;
+
+ return err;
+}
+
void nfc_llcp_accept_unlink(struct sock *sk)
{
struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
@@ -358,12 +508,13 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
pr_debug("%p %d %d %d\n", sk, llcp_sock->target_idx,
llcp_sock->dsap, llcp_sock->ssap);
- uaddr->sa_family = AF_NFC;
-
+ memset(llcp_addr, 0, sizeof(*llcp_addr));
*len = sizeof(struct sockaddr_nfc_llcp);
+ llcp_addr->sa_family = AF_NFC;
llcp_addr->dev_idx = llcp_sock->dev->idx;
llcp_addr->target_idx = llcp_sock->target_idx;
+ llcp_addr->nfc_protocol = llcp_sock->nfc_protocol;
llcp_addr->dsap = llcp_sock->dsap;
llcp_addr->ssap = llcp_sock->ssap;
llcp_addr->service_name_len = llcp_sock->service_name_len;
@@ -405,7 +556,8 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock,
return llcp_accept_poll(sk);
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR;
+ mask |= POLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
if (!skb_queue_empty(&sk->sk_receive_queue))
mask |= POLLIN | POLLRDNORM;
@@ -543,7 +695,7 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
llcp_sock->dev = dev;
llcp_sock->local = nfc_llcp_local_get(local);
- llcp_sock->miu = llcp_sock->local->remote_miu;
+ llcp_sock->remote_miu = llcp_sock->local->remote_miu;
llcp_sock->ssap = nfc_llcp_get_local_ssap(local);
if (llcp_sock->ssap == LLCP_SAP_MAX) {
ret = -ENOMEM;
@@ -740,8 +892,8 @@ static const struct proto_ops llcp_sock_ops = {
.ioctl = sock_no_ioctl,
.listen = llcp_sock_listen,
.shutdown = sock_no_shutdown,
- .setsockopt = sock_no_setsockopt,
- .getsockopt = sock_no_getsockopt,
+ .setsockopt = nfc_llcp_setsockopt,
+ .getsockopt = nfc_llcp_getsockopt,
.sendmsg = llcp_sock_sendmsg,
.recvmsg = llcp_sock_recvmsg,
.mmap = sock_no_mmap,
@@ -805,12 +957,13 @@ struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp)
llcp_sock->ssap = 0;
llcp_sock->dsap = LLCP_SAP_SDP;
- llcp_sock->rw = LLCP_DEFAULT_RW;
- llcp_sock->miu = LLCP_DEFAULT_MIU;
+ llcp_sock->rw = LLCP_MAX_RW + 1;
+ llcp_sock->miux = cpu_to_be16(LLCP_MAX_MIUX + 1);
llcp_sock->send_n = llcp_sock->send_ack_n = 0;
llcp_sock->recv_n = llcp_sock->recv_ack_n = 0;
llcp_sock->remote_ready = 1;
llcp_sock->reserved_ssap = LLCP_SAP_MAX;
+ nfc_llcp_socket_remote_param_init(llcp_sock);
skb_queue_head_init(&llcp_sock->tx_queue);
skb_queue_head_init(&llcp_sock->tx_pending_queue);
INIT_LIST_HEAD(&llcp_sock->accept_queue);
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 504b883439f1..f0c4d61f37c0 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -28,8 +28,7 @@
#include <linux/slab.h>
#include "nfc.h"
-
-#include "llcp/llcp.h"
+#include "llcp.h"
static struct genl_multicast_group nfc_genl_event_mcgrp = {
.name = NFC_GENL_MCAST_EVENT_NAME,
@@ -53,6 +52,15 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = {
[NFC_ATTR_DEVICE_POWERED] = { .type = NLA_U8 },
[NFC_ATTR_IM_PROTOCOLS] = { .type = NLA_U32 },
[NFC_ATTR_TM_PROTOCOLS] = { .type = NLA_U32 },
+ [NFC_ATTR_LLC_PARAM_LTO] = { .type = NLA_U8 },
+ [NFC_ATTR_LLC_PARAM_RW] = { .type = NLA_U8 },
+ [NFC_ATTR_LLC_PARAM_MIUX] = { .type = NLA_U16 },
+ [NFC_ATTR_LLC_SDP] = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy nfc_sdp_genl_policy[NFC_SDP_ATTR_MAX + 1] = {
+ [NFC_SDP_ATTR_URI] = { .type = NLA_STRING },
+ [NFC_SDP_ATTR_SAP] = { .type = NLA_U8 },
};
static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target,
@@ -348,6 +356,74 @@ free_msg:
return -EMSGSIZE;
}
+int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list)
+{
+ struct sk_buff *msg;
+ struct nlattr *sdp_attr, *uri_attr;
+ struct nfc_llcp_sdp_tlv *sdres;
+ struct hlist_node *n;
+ void *hdr;
+ int rc = -EMSGSIZE;
+ int i;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
+ NFC_EVENT_LLC_SDRES);
+ if (!hdr)
+ goto free_msg;
+
+ if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx))
+ goto nla_put_failure;
+
+ sdp_attr = nla_nest_start(msg, NFC_ATTR_LLC_SDP);
+ if (sdp_attr == NULL) {
+ rc = -ENOMEM;
+ goto nla_put_failure;
+ }
+
+ i = 1;
+ hlist_for_each_entry_safe(sdres, n, sdres_list, node) {
+ pr_debug("uri: %s, sap: %d\n", sdres->uri, sdres->sap);
+
+ uri_attr = nla_nest_start(msg, i++);
+ if (uri_attr == NULL) {
+ rc = -ENOMEM;
+ goto nla_put_failure;
+ }
+
+ if (nla_put_u8(msg, NFC_SDP_ATTR_SAP, sdres->sap))
+ goto nla_put_failure;
+
+ if (nla_put_string(msg, NFC_SDP_ATTR_URI, sdres->uri))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, uri_attr);
+
+ hlist_del(&sdres->node);
+
+ nfc_llcp_free_sdp_tlv(sdres);
+ }
+
+ nla_nest_end(msg, sdp_attr);
+
+ genlmsg_end(msg, hdr);
+
+ return genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_ATOMIC);
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+
+free_msg:
+ nlmsg_free(msg);
+
+ nfc_llcp_free_sdp_tlv_list(sdres_list);
+
+ return rc;
+}
+
static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev,
u32 portid, u32 seq,
struct netlink_callback *cb,
@@ -859,6 +935,96 @@ exit:
return rc;
}
+static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nfc_dev *dev;
+ struct nfc_llcp_local *local;
+ struct nlattr *attr, *sdp_attrs[NFC_SDP_ATTR_MAX+1];
+ u32 idx;
+ u8 tid;
+ char *uri;
+ int rc = 0, rem;
+ size_t uri_len, tlvs_len;
+ struct hlist_head sdreq_list;
+ struct nfc_llcp_sdp_tlv *sdreq;
+
+ if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
+ !info->attrs[NFC_ATTR_LLC_SDP])
+ return -EINVAL;
+
+ idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
+
+ dev = nfc_get_device(idx);
+ if (!dev) {
+ rc = -ENODEV;
+ goto exit;
+ }
+
+ device_lock(&dev->dev);
+
+ if (dev->dep_link_up == false) {
+ rc = -ENOLINK;
+ goto exit;
+ }
+
+ local = nfc_llcp_find_local(dev);
+ if (!local) {
+ nfc_put_device(dev);
+ rc = -ENODEV;
+ goto exit;
+ }
+
+ INIT_HLIST_HEAD(&sdreq_list);
+
+ tlvs_len = 0;
+
+ nla_for_each_nested(attr, info->attrs[NFC_ATTR_LLC_SDP], rem) {
+ rc = nla_parse_nested(sdp_attrs, NFC_SDP_ATTR_MAX, attr,
+ nfc_sdp_genl_policy);
+
+ if (rc != 0) {
+ rc = -EINVAL;
+ goto exit;
+ }
+
+ if (!sdp_attrs[NFC_SDP_ATTR_URI])
+ continue;
+
+ uri_len = nla_len(sdp_attrs[NFC_SDP_ATTR_URI]);
+ if (uri_len == 0)
+ continue;
+
+ uri = nla_data(sdp_attrs[NFC_SDP_ATTR_URI]);
+ if (uri == NULL || *uri == 0)
+ continue;
+
+ tid = local->sdreq_next_tid++;
+
+ sdreq = nfc_llcp_build_sdreq_tlv(tid, uri, uri_len);
+ if (sdreq == NULL) {
+ rc = -ENOMEM;
+ goto exit;
+ }
+
+ tlvs_len += sdreq->tlv_len;
+
+ hlist_add_head(&sdreq->node, &sdreq_list);
+ }
+
+ if (hlist_empty(&sdreq_list)) {
+ rc = -EINVAL;
+ goto exit;
+ }
+
+ rc = nfc_llcp_send_snl_sdreq(local, &sdreq_list, tlvs_len);
+exit:
+ device_unlock(&dev->dev);
+
+ nfc_put_device(dev);
+
+ return rc;
+}
+
static struct genl_ops nfc_genl_ops[] = {
{
.cmd = NFC_CMD_GET_DEVICE,
@@ -913,6 +1079,11 @@ static struct genl_ops nfc_genl_ops[] = {
.doit = nfc_genl_llc_set_params,
.policy = nfc_genl_policy,
},
+ {
+ .cmd = NFC_CMD_LLC_SDREQ,
+ .doit = nfc_genl_llc_sdreq,
+ .policy = nfc_genl_policy,
+ },
};
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index 87d914d2876a..afa1f84ba040 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -46,7 +46,7 @@ struct nfc_rawsock {
#define to_rawsock_sk(_tx_work) \
((struct sock *) container_of(_tx_work, struct nfc_rawsock, tx_work))
-#ifdef CONFIG_NFC_LLCP
+struct nfc_llcp_sdp_tlv;
void nfc_llcp_mac_is_down(struct nfc_dev *dev);
void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx,
@@ -59,60 +59,8 @@ int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb);
struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev);
int __init nfc_llcp_init(void);
void nfc_llcp_exit(void);
-
-#else
-
-static inline void nfc_llcp_mac_is_down(struct nfc_dev *dev)
-{
-}
-
-static inline void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx,
- u8 comm_mode, u8 rf_mode)
-{
-}
-
-static inline int nfc_llcp_register_device(struct nfc_dev *dev)
-{
- return 0;
-}
-
-static inline void nfc_llcp_unregister_device(struct nfc_dev *dev)
-{
-}
-
-static inline int nfc_llcp_set_remote_gb(struct nfc_dev *dev,
- u8 *gb, u8 gb_len)
-{
- return 0;
-}
-
-static inline u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *gb_len)
-{
- *gb_len = 0;
- return NULL;
-}
-
-static inline int nfc_llcp_data_received(struct nfc_dev *dev,
- struct sk_buff *skb)
-{
- return 0;
-}
-
-static inline struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev)
-{
- return NULL;
-}
-
-static inline int nfc_llcp_init(void)
-{
- return 0;
-}
-
-static inline void nfc_llcp_exit(void)
-{
-}
-
-#endif
+void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp);
+void nfc_llcp_free_sdp_tlv_list(struct hlist_head *head);
int __init rawsock_init(void);
void rawsock_exit(void);
@@ -144,6 +92,8 @@ int nfc_genl_dep_link_down_event(struct nfc_dev *dev);
int nfc_genl_tm_activated(struct nfc_dev *dev, u32 protocol);
int nfc_genl_tm_deactivated(struct nfc_dev *dev);
+int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list);
+
struct nfc_dev *nfc_get_device(unsigned int idx);
static inline void nfc_put_device(struct nfc_dev *dev)
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index d4d5363c7ba7..894b6cbdd929 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -98,7 +98,7 @@ static int pop_vlan(struct sk_buff *skb)
if (unlikely(err))
return err;
- __vlan_hwaccel_put_tag(skb, ntohs(tci));
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(tci));
return 0;
}
@@ -110,7 +110,7 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla
/* push down current VLAN tag */
current_tag = vlan_tx_tag_get(skb);
- if (!__vlan_put_tag(skb, current_tag))
+ if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag))
return -ENOMEM;
if (skb->ip_summed == CHECKSUM_COMPLETE)
@@ -118,7 +118,7 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla
+ (2 * ETH_ALEN), VLAN_HLEN, 0));
}
- __vlan_hwaccel_put_tag(skb, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
+ __vlan_hwaccel_put_tag(skb, vlan->vlan_tpid, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
return 0;
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 6980c3e6f066..d12d6b8b5e8b 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -44,6 +44,7 @@
#include <linux/netfilter_ipv4.h>
#include <linux/inetdevice.h>
#include <linux/list.h>
+#include <linux/lockdep.h>
#include <linux/openvswitch.h>
#include <linux/rculist.h>
#include <linux/dmi.h>
@@ -55,39 +56,61 @@
#include "datapath.h"
#include "flow.h"
#include "vport-internal_dev.h"
+#include "vport-netdev.h"
-/**
- * struct ovs_net - Per net-namespace data for ovs.
- * @dps: List of datapaths to enable dumping them all out.
- * Protected by genl_mutex.
- */
-struct ovs_net {
- struct list_head dps;
-};
-
-static int ovs_net_id __read_mostly;
#define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
static void rehash_flow_table(struct work_struct *work);
static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
+int ovs_net_id __read_mostly;
+
+static void ovs_notify(struct sk_buff *skb, struct genl_info *info,
+ struct genl_multicast_group *grp)
+{
+ genl_notify(skb, genl_info_net(info), info->snd_portid,
+ grp->id, info->nlhdr, GFP_KERNEL);
+}
+
/**
* DOC: Locking:
*
- * Writes to device state (add/remove datapath, port, set operations on vports,
- * etc.) are protected by RTNL.
- *
- * Writes to other state (flow table modifications, set miscellaneous datapath
- * parameters, etc.) are protected by genl_mutex. The RTNL lock nests inside
- * genl_mutex.
+ * All writes e.g. Writes to device state (add/remove datapath, port, set
+ * operations on vports, etc.), Writes to other state (flow table
+ * modifications, set miscellaneous datapath parameters, etc.) are protected
+ * by ovs_lock.
*
* Reads are protected by RCU.
*
* There are a few special cases (mostly stats) that have their own
* synchronization but they nest under all of above and don't interact with
* each other.
+ *
+ * The RTNL lock nests inside ovs_mutex.
*/
+static DEFINE_MUTEX(ovs_mutex);
+
+void ovs_lock(void)
+{
+ mutex_lock(&ovs_mutex);
+}
+
+void ovs_unlock(void)
+{
+ mutex_unlock(&ovs_mutex);
+}
+
+#ifdef CONFIG_LOCKDEP
+int lockdep_ovsl_is_held(void)
+{
+ if (debug_locks)
+ return lockdep_is_held(&ovs_mutex);
+ else
+ return 1;
+}
+#endif
+
static struct vport *new_vport(const struct vport_parms *);
static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *,
const struct dp_upcall_info *);
@@ -95,7 +118,7 @@ static int queue_userspace_packet(struct net *, int dp_ifindex,
struct sk_buff *,
const struct dp_upcall_info *);
-/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
+/* Must be called with rcu_read_lock or ovs_mutex. */
static struct datapath *get_dp(struct net *net, int dp_ifindex)
{
struct datapath *dp = NULL;
@@ -113,10 +136,10 @@ static struct datapath *get_dp(struct net *net, int dp_ifindex)
return dp;
}
-/* Must be called with rcu_read_lock or RTNL lock. */
+/* Must be called with rcu_read_lock or ovs_mutex. */
const char *ovs_dp_name(const struct datapath *dp)
{
- struct vport *vport = ovs_vport_rtnl_rcu(dp, OVSP_LOCAL);
+ struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
return vport->ops->get_name(vport);
}
@@ -129,7 +152,7 @@ static int get_dpifindex(struct datapath *dp)
local = ovs_vport_rcu(dp, OVSP_LOCAL);
if (local)
- ifindex = local->ops->get_ifindex(local);
+ ifindex = netdev_vport_priv(local)->dev->ifindex;
else
ifindex = 0;
@@ -168,7 +191,7 @@ struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
return NULL;
}
-/* Called with RTNL lock and genl_lock. */
+/* Called with ovs_mutex. */
static struct vport *new_vport(const struct vport_parms *parms)
{
struct vport *vport;
@@ -180,14 +203,12 @@ static struct vport *new_vport(const struct vport_parms *parms)
hlist_add_head_rcu(&vport->dp_hash_node, head);
}
-
return vport;
}
-/* Called with RTNL lock. */
void ovs_dp_detach_port(struct vport *p)
{
- ASSERT_RTNL();
+ ASSERT_OVSL();
/* First drop references to device. */
hlist_del_rcu(&p->dp_hash_node);
@@ -250,7 +271,8 @@ static struct genl_family dp_packet_genl_family = {
.name = OVS_PACKET_FAMILY,
.version = OVS_PACKET_VERSION,
.maxattr = OVS_PACKET_ATTR_MAX,
- .netnsok = true
+ .netnsok = true,
+ .parallel_ops = true,
};
int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
@@ -337,6 +359,35 @@ static int queue_gso_packets(struct net *net, int dp_ifindex,
return err;
}
+static size_t key_attr_size(void)
+{
+ return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
+ + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
+ + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
+ + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
+ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
+ + nla_total_size(4) /* OVS_KEY_ATTR_8021Q */
+ + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */
+ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
+ + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */
+ + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */
+ + nla_total_size(28); /* OVS_KEY_ATTR_ND */
+}
+
+static size_t upcall_msg_size(const struct sk_buff *skb,
+ const struct nlattr *userdata)
+{
+ size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
+ + nla_total_size(skb->len) /* OVS_PACKET_ATTR_PACKET */
+ + nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */
+
+ /* OVS_PACKET_ATTR_USERDATA */
+ if (userdata)
+ size += NLA_ALIGN(userdata->nla_len);
+
+ return size;
+}
+
static int queue_userspace_packet(struct net *net, int dp_ifindex,
struct sk_buff *skb,
const struct dp_upcall_info *upcall_info)
@@ -345,7 +396,6 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
struct sk_buff *nskb = NULL;
struct sk_buff *user_skb; /* to be queued to userspace */
struct nlattr *nla;
- unsigned int len;
int err;
if (vlan_tx_tag_present(skb)) {
@@ -353,7 +403,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
if (!nskb)
return -ENOMEM;
- nskb = __vlan_put_tag(nskb, vlan_tx_tag_get(nskb));
+ nskb = __vlan_put_tag(nskb, nskb->vlan_proto, vlan_tx_tag_get(nskb));
if (!nskb)
return -ENOMEM;
@@ -366,13 +416,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
goto out;
}
- len = sizeof(struct ovs_header);
- len += nla_total_size(skb->len);
- len += nla_total_size(FLOW_BUFSIZE);
- if (upcall_info->cmd == OVS_PACKET_CMD_ACTION)
- len += nla_total_size(8);
-
- user_skb = genlmsg_new(len, GFP_ATOMIC);
+ user_skb = genlmsg_new(upcall_msg_size(skb, upcall_info->userdata), GFP_ATOMIC);
if (!user_skb) {
err = -ENOMEM;
goto out;
@@ -387,8 +431,9 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
nla_nest_end(user_skb, nla);
if (upcall_info->userdata)
- nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA,
- nla_get_u64(upcall_info->userdata));
+ __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
+ nla_len(upcall_info->userdata),
+ nla_data(upcall_info->userdata));
nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
@@ -402,13 +447,13 @@ out:
return err;
}
-/* Called with genl_mutex. */
+/* Called with ovs_mutex. */
static int flush_flows(struct datapath *dp)
{
struct flow_table *old_table;
struct flow_table *new_table;
- old_table = genl_dereference(dp->table);
+ old_table = ovsl_dereference(dp->table);
new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
if (!new_table)
return -ENOMEM;
@@ -544,7 +589,7 @@ static int validate_userspace(const struct nlattr *attr)
{
static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
[OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
- [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_U64 },
+ [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
};
struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
int error;
@@ -661,8 +706,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
err = -EINVAL;
if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
- !a[OVS_PACKET_ATTR_ACTIONS] ||
- nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN)
+ !a[OVS_PACKET_ATTR_ACTIONS])
goto err;
len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
@@ -672,7 +716,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
goto err;
skb_reserve(packet, NET_IP_ALIGN);
- memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len);
+ nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
skb_reset_mac_header(packet);
eth = eth_hdr(packet);
@@ -680,7 +724,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
/* Normally, setting the skb 'protocol' field would be handled by a
* call to eth_type_trans(), but it assumes there's a sending
* device, which we may not have. */
- if (ntohs(eth->h_proto) >= 1536)
+ if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
packet->protocol = eth->h_proto;
else
packet->protocol = htons(ETH_P_802_2);
@@ -743,7 +787,7 @@ err:
}
static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
- [OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
+ [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
[OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
[OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
};
@@ -759,7 +803,7 @@ static struct genl_ops dp_packet_genl_ops[] = {
static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
{
int i;
- struct flow_table *table = genl_dereference(dp->table);
+ struct flow_table *table = ovsl_dereference(dp->table);
stats->n_flows = ovs_flow_tbl_count(table);
@@ -794,14 +838,25 @@ static struct genl_family dp_flow_genl_family = {
.name = OVS_FLOW_FAMILY,
.version = OVS_FLOW_VERSION,
.maxattr = OVS_FLOW_ATTR_MAX,
- .netnsok = true
+ .netnsok = true,
+ .parallel_ops = true,
};
static struct genl_multicast_group ovs_dp_flow_multicast_group = {
.name = OVS_FLOW_MCGROUP
};
-/* Called with genl_lock. */
+static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
+{
+ return NLMSG_ALIGN(sizeof(struct ovs_header))
+ + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
+ + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
+ + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
+ + nla_total_size(8) /* OVS_FLOW_ATTR_USED */
+ + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
+}
+
+/* Called with ovs_mutex. */
static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
struct sk_buff *skb, u32 portid,
u32 seq, u32 flags, u8 cmd)
@@ -815,8 +870,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
u8 tcp_flags;
int err;
- sf_acts = rcu_dereference_protected(flow->sf_acts,
- lockdep_genl_is_held());
+ sf_acts = ovsl_dereference(flow->sf_acts);
ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
if (!ovs_header)
@@ -879,25 +933,10 @@ error:
static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
{
const struct sw_flow_actions *sf_acts;
- int len;
- sf_acts = rcu_dereference_protected(flow->sf_acts,
- lockdep_genl_is_held());
+ sf_acts = ovsl_dereference(flow->sf_acts);
- /* OVS_FLOW_ATTR_KEY */
- len = nla_total_size(FLOW_BUFSIZE);
- /* OVS_FLOW_ATTR_ACTIONS */
- len += nla_total_size(sf_acts->actions_len);
- /* OVS_FLOW_ATTR_STATS */
- len += nla_total_size(sizeof(struct ovs_flow_stats));
- /* OVS_FLOW_ATTR_TCP_FLAGS */
- len += nla_total_size(1);
- /* OVS_FLOW_ATTR_USED */
- len += nla_total_size(8);
-
- len += NLMSG_ALIGN(sizeof(struct ovs_header));
-
- return genlmsg_new(len, GFP_KERNEL);
+ return genlmsg_new(ovs_flow_cmd_msg_size(sf_acts), GFP_KERNEL);
}
static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
@@ -946,12 +985,13 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
goto error;
}
+ ovs_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
error = -ENODEV;
if (!dp)
- goto error;
+ goto err_unlock_ovs;
- table = genl_dereference(dp->table);
+ table = ovsl_dereference(dp->table);
flow = ovs_flow_tbl_lookup(table, &key, key_len);
if (!flow) {
struct sw_flow_actions *acts;
@@ -959,7 +999,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
/* Bail out if we're not allowed to create a new flow. */
error = -ENOENT;
if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
- goto error;
+ goto err_unlock_ovs;
/* Expand table, if necessary, to make room. */
if (ovs_flow_tbl_need_to_expand(table)) {
@@ -969,7 +1009,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
if (!IS_ERR(new_table)) {
rcu_assign_pointer(dp->table, new_table);
ovs_flow_tbl_deferred_destroy(table);
- table = genl_dereference(dp->table);
+ table = ovsl_dereference(dp->table);
}
}
@@ -977,7 +1017,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
flow = ovs_flow_alloc();
if (IS_ERR(flow)) {
error = PTR_ERR(flow);
- goto error;
+ goto err_unlock_ovs;
}
flow->key = key;
clear_stats(flow);
@@ -1010,11 +1050,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
error = -EEXIST;
if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
- goto error;
+ goto err_unlock_ovs;
/* Update actions. */
- old_acts = rcu_dereference_protected(flow->sf_acts,
- lockdep_genl_is_held());
+ old_acts = ovsl_dereference(flow->sf_acts);
acts_attrs = a[OVS_FLOW_ATTR_ACTIONS];
if (acts_attrs &&
(old_acts->actions_len != nla_len(acts_attrs) ||
@@ -1025,7 +1064,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
new_acts = ovs_flow_actions_alloc(acts_attrs);
error = PTR_ERR(new_acts);
if (IS_ERR(new_acts))
- goto error;
+ goto err_unlock_ovs;
rcu_assign_pointer(flow->sf_acts, new_acts);
ovs_flow_deferred_free_acts(old_acts);
@@ -1041,11 +1080,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
spin_unlock_bh(&flow->lock);
}
}
+ ovs_unlock();
if (!IS_ERR(reply))
- genl_notify(reply, genl_info_net(info), info->snd_portid,
- ovs_dp_flow_multicast_group.id, info->nlhdr,
- GFP_KERNEL);
+ ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
else
netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
@@ -1053,6 +1091,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
error_free_flow:
ovs_flow_free(flow);
+err_unlock_ovs:
+ ovs_unlock();
error:
return error;
}
@@ -1075,21 +1115,32 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
if (err)
return err;
+ ovs_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
- if (!dp)
- return -ENODEV;
+ if (!dp) {
+ err = -ENODEV;
+ goto unlock;
+ }
- table = genl_dereference(dp->table);
+ table = ovsl_dereference(dp->table);
flow = ovs_flow_tbl_lookup(table, &key, key_len);
- if (!flow)
- return -ENOENT;
+ if (!flow) {
+ err = -ENOENT;
+ goto unlock;
+ }
reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
info->snd_seq, OVS_FLOW_CMD_NEW);
- if (IS_ERR(reply))
- return PTR_ERR(reply);
+ if (IS_ERR(reply)) {
+ err = PTR_ERR(reply);
+ goto unlock;
+ }
+ ovs_unlock();
return genlmsg_reply(reply, info);
+unlock:
+ ovs_unlock();
+ return err;
}
static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
@@ -1104,25 +1155,33 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
int err;
int key_len;
+ ovs_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
- if (!dp)
- return -ENODEV;
-
- if (!a[OVS_FLOW_ATTR_KEY])
- return flush_flows(dp);
+ if (!dp) {
+ err = -ENODEV;
+ goto unlock;
+ }
+ if (!a[OVS_FLOW_ATTR_KEY]) {
+ err = flush_flows(dp);
+ goto unlock;
+ }
err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
if (err)
- return err;
+ goto unlock;
- table = genl_dereference(dp->table);
+ table = ovsl_dereference(dp->table);
flow = ovs_flow_tbl_lookup(table, &key, key_len);
- if (!flow)
- return -ENOENT;
+ if (!flow) {
+ err = -ENOENT;
+ goto unlock;
+ }
reply = ovs_flow_cmd_alloc_info(flow);
- if (!reply)
- return -ENOMEM;
+ if (!reply) {
+ err = -ENOMEM;
+ goto unlock;
+ }
ovs_flow_tbl_remove(table, flow);
@@ -1131,10 +1190,13 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
BUG_ON(err < 0);
ovs_flow_deferred_free(flow);
+ ovs_unlock();
- genl_notify(reply, genl_info_net(info), info->snd_portid,
- ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
+ ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
return 0;
+unlock:
+ ovs_unlock();
+ return err;
}
static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1143,11 +1205,14 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct datapath *dp;
struct flow_table *table;
+ ovs_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
- if (!dp)
+ if (!dp) {
+ ovs_unlock();
return -ENODEV;
+ }
- table = genl_dereference(dp->table);
+ table = ovsl_dereference(dp->table);
for (;;) {
struct sw_flow *flow;
@@ -1168,6 +1233,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
cb->args[0] = bucket;
cb->args[1] = obj;
}
+ ovs_unlock();
return skb->len;
}
@@ -1206,13 +1272,24 @@ static struct genl_family dp_datapath_genl_family = {
.name = OVS_DATAPATH_FAMILY,
.version = OVS_DATAPATH_VERSION,
.maxattr = OVS_DP_ATTR_MAX,
- .netnsok = true
+ .netnsok = true,
+ .parallel_ops = true,
};
static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
.name = OVS_DATAPATH_MCGROUP
};
+static size_t ovs_dp_cmd_msg_size(void)
+{
+ size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
+
+ msgsize += nla_total_size(IFNAMSIZ);
+ msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
+
+ return msgsize;
+}
+
static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
u32 portid, u32 seq, u32 flags, u8 cmd)
{
@@ -1251,7 +1328,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
struct sk_buff *skb;
int retval;
- skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ skb = genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
if (!skb)
return ERR_PTR(-ENOMEM);
@@ -1263,7 +1340,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
return skb;
}
-/* Called with genl_mutex and optionally with RTNL lock also. */
+/* Called with ovs_mutex. */
static struct datapath *lookup_datapath(struct net *net,
struct ovs_header *ovs_header,
struct nlattr *a[OVS_DP_ATTR_MAX + 1])
@@ -1297,12 +1374,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
goto err;
- rtnl_lock();
+ ovs_lock();
err = -ENOMEM;
dp = kzalloc(sizeof(*dp), GFP_KERNEL);
if (dp == NULL)
- goto err_unlock_rtnl;
+ goto err_unlock_ovs;
ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
@@ -1353,37 +1430,34 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
list_add_tail(&dp->list_node, &ovs_net->dps);
- rtnl_unlock();
- genl_notify(reply, genl_info_net(info), info->snd_portid,
- ovs_dp_datapath_multicast_group.id, info->nlhdr,
- GFP_KERNEL);
+ ovs_unlock();
+
+ ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
return 0;
err_destroy_local_port:
- ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
+ ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
err_destroy_ports_array:
kfree(dp->ports);
err_destroy_percpu:
free_percpu(dp->stats_percpu);
err_destroy_table:
- ovs_flow_tbl_destroy(genl_dereference(dp->table));
+ ovs_flow_tbl_destroy(ovsl_dereference(dp->table));
err_free_dp:
release_net(ovs_dp_get_net(dp));
kfree(dp);
-err_unlock_rtnl:
- rtnl_unlock();
+err_unlock_ovs:
+ ovs_unlock();
err:
return err;
}
-/* Called with genl_mutex. */
+/* Called with ovs_mutex. */
static void __dp_destroy(struct datapath *dp)
{
int i;
- rtnl_lock();
-
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
struct vport *vport;
struct hlist_node *n;
@@ -1394,14 +1468,11 @@ static void __dp_destroy(struct datapath *dp)
}
list_del(&dp->list_node);
- ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
- /* rtnl_unlock() will wait until all the references to devices that
- * are pending unregistration have been dropped. We do it here to
- * ensure that any internal devices (which contain DP pointers) are
- * fully destroyed before freeing the datapath.
+ /* OVSP_LOCAL is datapath internal port. We need to make sure that
+ * all port in datapath are destroyed first before freeing datapath.
*/
- rtnl_unlock();
+ ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
call_rcu(&dp->rcu, destroy_dp_rcu);
}
@@ -1412,24 +1483,27 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
int err;
+ ovs_lock();
dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
err = PTR_ERR(dp);
if (IS_ERR(dp))
- return err;
+ goto unlock;
reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
info->snd_seq, OVS_DP_CMD_DEL);
err = PTR_ERR(reply);
if (IS_ERR(reply))
- return err;
+ goto unlock;
__dp_destroy(dp);
+ ovs_unlock();
- genl_notify(reply, genl_info_net(info), info->snd_portid,
- ovs_dp_datapath_multicast_group.id, info->nlhdr,
- GFP_KERNEL);
+ ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
return 0;
+unlock:
+ ovs_unlock();
+ return err;
}
static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
@@ -1438,9 +1512,11 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
int err;
+ ovs_lock();
dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
+ err = PTR_ERR(dp);
if (IS_ERR(dp))
- return PTR_ERR(dp);
+ goto unlock;
reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
info->snd_seq, OVS_DP_CMD_NEW);
@@ -1448,31 +1524,45 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
err = PTR_ERR(reply);
netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
ovs_dp_datapath_multicast_group.id, err);
- return 0;
+ err = 0;
+ goto unlock;
}
- genl_notify(reply, genl_info_net(info), info->snd_portid,
- ovs_dp_datapath_multicast_group.id, info->nlhdr,
- GFP_KERNEL);
+ ovs_unlock();
+ ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
return 0;
+unlock:
+ ovs_unlock();
+ return err;
}
static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
{
struct sk_buff *reply;
struct datapath *dp;
+ int err;
+ ovs_lock();
dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
- if (IS_ERR(dp))
- return PTR_ERR(dp);
+ if (IS_ERR(dp)) {
+ err = PTR_ERR(dp);
+ goto unlock;
+ }
reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
info->snd_seq, OVS_DP_CMD_NEW);
- if (IS_ERR(reply))
- return PTR_ERR(reply);
+ if (IS_ERR(reply)) {
+ err = PTR_ERR(reply);
+ goto unlock;
+ }
+ ovs_unlock();
return genlmsg_reply(reply, info);
+
+unlock:
+ ovs_unlock();
+ return err;
}
static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1482,6 +1572,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
int skip = cb->args[0];
int i = 0;
+ ovs_lock();
list_for_each_entry(dp, &ovs_net->dps, list_node) {
if (i >= skip &&
ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
@@ -1490,6 +1581,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
break;
i++;
}
+ ovs_unlock();
cb->args[0] = i;
@@ -1535,14 +1627,15 @@ static struct genl_family dp_vport_genl_family = {
.name = OVS_VPORT_FAMILY,
.version = OVS_VPORT_VERSION,
.maxattr = OVS_VPORT_ATTR_MAX,
- .netnsok = true
+ .netnsok = true,
+ .parallel_ops = true,
};
struct genl_multicast_group ovs_dp_vport_multicast_group = {
.name = OVS_VPORT_MCGROUP
};
-/* Called with RTNL lock or RCU read lock. */
+/* Called with ovs_mutex or RCU read lock. */
static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
u32 portid, u32 seq, u32 flags, u8 cmd)
{
@@ -1581,7 +1674,7 @@ error:
return err;
}
-/* Called with RTNL lock or RCU read lock. */
+/* Called with ovs_mutex or RCU read lock. */
struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
u32 seq, u8 cmd)
{
@@ -1598,7 +1691,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
return skb;
}
-/* Called with RTNL lock or RCU read lock. */
+/* Called with ovs_mutex or RCU read lock. */
static struct vport *lookup_vport(struct net *net,
struct ovs_header *ovs_header,
struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
@@ -1624,9 +1717,9 @@ static struct vport *lookup_vport(struct net *net,
if (!dp)
return ERR_PTR(-ENODEV);
- vport = ovs_vport_rtnl_rcu(dp, port_no);
+ vport = ovs_vport_ovsl_rcu(dp, port_no);
if (!vport)
- return ERR_PTR(-ENOENT);
+ return ERR_PTR(-ENODEV);
return vport;
} else
return ERR_PTR(-EINVAL);
@@ -1648,7 +1741,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
!a[OVS_VPORT_ATTR_UPCALL_PID])
goto exit;
- rtnl_lock();
+ ovs_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
err = -ENODEV;
if (!dp)
@@ -1661,7 +1754,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (port_no >= DP_MAX_PORTS)
goto exit_unlock;
- vport = ovs_vport_rtnl_rcu(dp, port_no);
+ vport = ovs_vport_ovsl(dp, port_no);
err = -EBUSY;
if (vport)
goto exit_unlock;
@@ -1671,7 +1764,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
err = -EFBIG;
goto exit_unlock;
}
- vport = ovs_vport_rtnl(dp, port_no);
+ vport = ovs_vport_ovsl(dp, port_no);
if (!vport)
break;
}
@@ -1697,11 +1790,11 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_dp_detach_port(vport);
goto exit_unlock;
}
- genl_notify(reply, genl_info_net(info), info->snd_portid,
- ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
+
+ ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
exit_unlock:
- rtnl_unlock();
+ ovs_unlock();
exit:
return err;
}
@@ -1713,7 +1806,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
struct vport *vport;
int err;
- rtnl_lock();
+ ovs_lock();
vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
err = PTR_ERR(vport);
if (IS_ERR(vport))
@@ -1742,8 +1835,9 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
info->snd_seq, 0, OVS_VPORT_CMD_NEW);
BUG_ON(err < 0);
- genl_notify(reply, genl_info_net(info), info->snd_portid,
- ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
+ ovs_unlock();
+ ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
+ return 0;
rtnl_unlock();
return 0;
@@ -1751,7 +1845,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
exit_free:
kfree_skb(reply);
exit_unlock:
- rtnl_unlock();
+ ovs_unlock();
return err;
}
@@ -1762,7 +1856,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
struct vport *vport;
int err;
- rtnl_lock();
+ ovs_lock();
vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
err = PTR_ERR(vport);
if (IS_ERR(vport))
@@ -1782,11 +1876,10 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
err = 0;
ovs_dp_detach_port(vport);
- genl_notify(reply, genl_info_net(info), info->snd_portid,
- ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
+ ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
exit_unlock:
- rtnl_unlock();
+ ovs_unlock();
return err;
}
@@ -1946,13 +2039,13 @@ static void rehash_flow_table(struct work_struct *work)
struct datapath *dp;
struct net *net;
- genl_lock();
+ ovs_lock();
rtnl_lock();
for_each_net(net) {
struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
list_for_each_entry(dp, &ovs_net->dps, list_node) {
- struct flow_table *old_table = genl_dereference(dp->table);
+ struct flow_table *old_table = ovsl_dereference(dp->table);
struct flow_table *new_table;
new_table = ovs_flow_tbl_rehash(old_table);
@@ -1963,8 +2056,7 @@ static void rehash_flow_table(struct work_struct *work)
}
}
rtnl_unlock();
- genl_unlock();
-
+ ovs_unlock();
schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
}
@@ -1973,18 +2065,21 @@ static int __net_init ovs_init_net(struct net *net)
struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
INIT_LIST_HEAD(&ovs_net->dps);
+ INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
return 0;
}
static void __net_exit ovs_exit_net(struct net *net)
{
- struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
struct datapath *dp, *dp_next;
+ struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
- genl_lock();
+ ovs_lock();
list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
__dp_destroy(dp);
- genl_unlock();
+ ovs_unlock();
+
+ cancel_work_sync(&ovs_net->dp_notify_work);
}
static struct pernet_operations ovs_net_ops = {
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 031dfbf37c93..16b840695216 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -57,10 +57,9 @@ struct dp_stats_percpu {
* struct datapath - datapath for flow-based packet switching
* @rcu: RCU callback head for deferred destruction.
* @list_node: Element in global 'dps' list.
- * @n_flows: Number of flows currently in flow table.
- * @table: Current flow table. Protected by genl_lock and RCU.
+ * @table: Current flow table. Protected by ovs_mutex and RCU.
* @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by
- * RTNL and RCU.
+ * ovs_mutex and RCU.
* @stats_percpu: Per-CPU datapath statistics.
* @net: Reference to net namespace.
*
@@ -86,26 +85,6 @@ struct datapath {
#endif
};
-struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no);
-
-static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no)
-{
- WARN_ON_ONCE(!rcu_read_lock_held());
- return ovs_lookup_vport(dp, port_no);
-}
-
-static inline struct vport *ovs_vport_rtnl_rcu(const struct datapath *dp, int port_no)
-{
- WARN_ON_ONCE(!rcu_read_lock_held() && !rtnl_is_locked());
- return ovs_lookup_vport(dp, port_no);
-}
-
-static inline struct vport *ovs_vport_rtnl(const struct datapath *dp, int port_no)
-{
- ASSERT_RTNL();
- return ovs_lookup_vport(dp, port_no);
-}
-
/**
* struct ovs_skb_cb - OVS data in skb CB
* @flow: The flow associated with this packet. May be %NULL if no flow.
@@ -119,7 +98,7 @@ struct ovs_skb_cb {
* struct dp_upcall - metadata to include with a packet to send to userspace
* @cmd: One of %OVS_PACKET_CMD_*.
* @key: Becomes %OVS_PACKET_ATTR_KEY. Must be nonnull.
- * @userdata: If nonnull, its u64 value is extracted and passed to userspace as
+ * @userdata: If nonnull, its variable-length value is passed to userspace as
* %OVS_PACKET_ATTR_USERDATA.
* @pid: Netlink PID to which packet should be sent. If @pid is 0 then no
* packet is sent and the packet is accounted in the datapath's @n_lost
@@ -132,6 +111,30 @@ struct dp_upcall_info {
u32 portid;
};
+/**
+ * struct ovs_net - Per net-namespace data for ovs.
+ * @dps: List of datapaths to enable dumping them all out.
+ * Protected by genl_mutex.
+ */
+struct ovs_net {
+ struct list_head dps;
+ struct work_struct dp_notify_work;
+};
+
+extern int ovs_net_id;
+void ovs_lock(void);
+void ovs_unlock(void);
+
+#ifdef CONFIG_LOCKDEP
+int lockdep_ovsl_is_held(void);
+#else
+#define lockdep_ovsl_is_held() 1
+#endif
+
+#define ASSERT_OVSL() WARN_ON(unlikely(!lockdep_ovsl_is_held()))
+#define ovsl_dereference(p) \
+ rcu_dereference_protected(p, lockdep_ovsl_is_held())
+
static inline struct net *ovs_dp_get_net(struct datapath *dp)
{
return read_pnet(&dp->net);
@@ -142,6 +145,26 @@ static inline void ovs_dp_set_net(struct datapath *dp, struct net *net)
write_pnet(&dp->net, net);
}
+struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no);
+
+static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no)
+{
+ WARN_ON_ONCE(!rcu_read_lock_held());
+ return ovs_lookup_vport(dp, port_no);
+}
+
+static inline struct vport *ovs_vport_ovsl_rcu(const struct datapath *dp, int port_no)
+{
+ WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
+ return ovs_lookup_vport(dp, port_no);
+}
+
+static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_no)
+{
+ ASSERT_OVSL();
+ return ovs_lookup_vport(dp, port_no);
+}
+
extern struct notifier_block ovs_dp_device_notifier;
extern struct genl_multicast_group ovs_dp_vport_multicast_group;
@@ -155,4 +178,5 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
u8 cmd);
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
+void ovs_dp_notify_wq(struct work_struct *work);
#endif /* datapath.h */
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index 5558350e0d33..ef4feec6cd84 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -18,46 +18,78 @@
#include <linux/netdevice.h>
#include <net/genetlink.h>
+#include <net/netns/generic.h>
#include "datapath.h"
#include "vport-internal_dev.h"
#include "vport-netdev.h"
+static void dp_detach_port_notify(struct vport *vport)
+{
+ struct sk_buff *notify;
+ struct datapath *dp;
+
+ dp = vport->dp;
+ notify = ovs_vport_cmd_build_info(vport, 0, 0,
+ OVS_VPORT_CMD_DEL);
+ ovs_dp_detach_port(vport);
+ if (IS_ERR(notify)) {
+ netlink_set_err(ovs_dp_get_net(dp)->genl_sock, 0,
+ ovs_dp_vport_multicast_group.id,
+ PTR_ERR(notify));
+ return;
+ }
+
+ genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0,
+ ovs_dp_vport_multicast_group.id,
+ GFP_KERNEL);
+}
+
+void ovs_dp_notify_wq(struct work_struct *work)
+{
+ struct ovs_net *ovs_net = container_of(work, struct ovs_net, dp_notify_work);
+ struct datapath *dp;
+
+ ovs_lock();
+ list_for_each_entry(dp, &ovs_net->dps, list_node) {
+ int i;
+
+ for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
+ struct vport *vport;
+ struct hlist_node *n;
+
+ hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) {
+ struct netdev_vport *netdev_vport;
+
+ if (vport->ops->type != OVS_VPORT_TYPE_NETDEV)
+ continue;
+
+ netdev_vport = netdev_vport_priv(vport);
+ if (netdev_vport->dev->reg_state == NETREG_UNREGISTERED ||
+ netdev_vport->dev->reg_state == NETREG_UNREGISTERING)
+ dp_detach_port_notify(vport);
+ }
+ }
+ }
+ ovs_unlock();
+}
+
static int dp_device_event(struct notifier_block *unused, unsigned long event,
void *ptr)
{
+ struct ovs_net *ovs_net;
struct net_device *dev = ptr;
- struct vport *vport;
+ struct vport *vport = NULL;
- if (ovs_is_internal_dev(dev))
- vport = ovs_internal_dev_get_vport(dev);
- else
+ if (!ovs_is_internal_dev(dev))
vport = ovs_netdev_get_vport(dev);
if (!vport)
return NOTIFY_DONE;
- switch (event) {
- case NETDEV_UNREGISTER:
- if (!ovs_is_internal_dev(dev)) {
- struct sk_buff *notify;
- struct datapath *dp = vport->dp;
-
- notify = ovs_vport_cmd_build_info(vport, 0, 0,
- OVS_VPORT_CMD_DEL);
- ovs_dp_detach_port(vport);
- if (IS_ERR(notify)) {
- netlink_set_err(ovs_dp_get_net(dp)->genl_sock, 0,
- ovs_dp_vport_multicast_group.id,
- PTR_ERR(notify));
- break;
- }
-
- genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0,
- ovs_dp_vport_multicast_group.id,
- GFP_KERNEL);
- }
- break;
+ if (event == NETDEV_UNREGISTER) {
+ ovs_net = net_generic(dev_net(dev), ovs_net_id);
+ queue_work(system_wq, &ovs_net->dp_notify_work);
}
return NOTIFY_DONE;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 67a2b783fe70..b15321a2228c 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -211,7 +211,7 @@ struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *actions)
return ERR_PTR(-ENOMEM);
sfa->actions_len = actions_len;
- memcpy(sfa->actions, nla_data(actions), actions_len);
+ nla_memcpy(sfa->actions, actions, actions_len);
return sfa;
}
@@ -466,7 +466,7 @@ static __be16 parse_ethertype(struct sk_buff *skb)
proto = *(__be16 *) skb->data;
__skb_pull(skb, sizeof(__be16));
- if (ntohs(proto) >= 1536)
+ if (ntohs(proto) >= ETH_P_802_3_MIN)
return proto;
if (skb->len < sizeof(struct llc_snap_hdr))
@@ -483,7 +483,7 @@ static __be16 parse_ethertype(struct sk_buff *skb)
__skb_pull(skb, sizeof(struct llc_snap_hdr));
- if (ntohs(llc->ethertype) >= 1536)
+ if (ntohs(llc->ethertype) >= ETH_P_802_3_MIN)
return llc->ethertype;
return htons(ETH_P_802_2);
@@ -1038,7 +1038,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
swkey->eth.type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
- if (ntohs(swkey->eth.type) < 1536)
+ if (ntohs(swkey->eth.type) < ETH_P_802_3_MIN)
return -EINVAL;
attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
} else {
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index a7bb60ff3b5b..0875fde65b9c 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -138,27 +138,6 @@ int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *,
void ovs_flow_used(struct sw_flow *, struct sk_buff *);
u64 ovs_flow_used_time(unsigned long flow_jiffies);
-/* Upper bound on the length of a nlattr-formatted flow key. The longest
- * nlattr-formatted flow key would be:
- *
- * struct pad nl hdr total
- * ------ --- ------ -----
- * OVS_KEY_ATTR_PRIORITY 4 -- 4 8
- * OVS_KEY_ATTR_IN_PORT 4 -- 4 8
- * OVS_KEY_ATTR_SKB_MARK 4 -- 4 8
- * OVS_KEY_ATTR_ETHERNET 12 -- 4 16
- * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype)
- * OVS_KEY_ATTR_8021Q 4 -- 4 8
- * OVS_KEY_ATTR_ENCAP 0 -- 4 4 (VLAN encapsulation)
- * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (inner VLAN ethertype)
- * OVS_KEY_ATTR_IPV6 40 -- 4 44
- * OVS_KEY_ATTR_ICMPV6 2 2 4 8
- * OVS_KEY_ATTR_ND 28 -- 4 32
- * -------------------------------------------------
- * total 152
- */
-#define FLOW_BUFSIZE 152
-
int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
const struct nlattr *);
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 0531de6c7a4a..84e0a0379186 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -63,16 +63,6 @@ static struct rtnl_link_stats64 *internal_dev_get_stats(struct net_device *netde
return stats;
}
-static int internal_dev_mac_addr(struct net_device *dev, void *p)
-{
- struct sockaddr *addr = p;
-
- if (!is_valid_ether_addr(addr->sa_data))
- return -EADDRNOTAVAIL;
- memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
- return 0;
-}
-
/* Called with rcu_read_lock_bh. */
static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
{
@@ -126,7 +116,7 @@ static const struct net_device_ops internal_dev_netdev_ops = {
.ndo_open = internal_dev_open,
.ndo_stop = internal_dev_stop,
.ndo_start_xmit = internal_dev_xmit,
- .ndo_set_mac_address = internal_dev_mac_addr,
+ .ndo_set_mac_address = eth_mac_addr,
.ndo_change_mtu = internal_dev_change_mtu,
.ndo_get_stats64 = internal_dev_get_stats,
};
@@ -138,6 +128,7 @@ static void do_setup(struct net_device *netdev)
netdev->netdev_ops = &internal_dev_netdev_ops;
netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
+ netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
netdev->destructor = internal_dev_destructor;
SET_ETHTOOL_OPS(netdev, &internal_dev_ethtool_ops);
netdev->tx_queue_len = 0;
@@ -146,7 +137,7 @@ static void do_setup(struct net_device *netdev)
NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO;
netdev->vlan_features = netdev->features;
- netdev->features |= NETIF_F_HW_VLAN_TX;
+ netdev->features |= NETIF_F_HW_VLAN_CTAG_TX;
netdev->hw_features = netdev->features & ~NETIF_F_LLTX;
eth_hw_addr_random(netdev);
}
@@ -182,16 +173,19 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
if (vport->port_no == OVSP_LOCAL)
netdev_vport->dev->features |= NETIF_F_NETNS_LOCAL;
+ rtnl_lock();
err = register_netdevice(netdev_vport->dev);
if (err)
goto error_free_netdev;
dev_set_promiscuity(netdev_vport->dev, 1);
+ rtnl_unlock();
netif_start_queue(netdev_vport->dev);
return vport;
error_free_netdev:
+ rtnl_unlock();
free_netdev(netdev_vport->dev);
error_free_vport:
ovs_vport_free(vport);
@@ -204,10 +198,13 @@ static void internal_dev_destroy(struct vport *vport)
struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
netif_stop_queue(netdev_vport->dev);
+ rtnl_lock();
dev_set_promiscuity(netdev_vport->dev, -1);
/* unregister_netdevice() waits for an RCU grace period. */
unregister_netdevice(netdev_vport->dev);
+
+ rtnl_unlock();
}
static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
@@ -235,7 +232,6 @@ const struct vport_ops ovs_internal_vport_ops = {
.create = internal_dev_create,
.destroy = internal_dev_destroy,
.get_name = ovs_netdev_get_name,
- .get_ifindex = ovs_netdev_get_ifindex,
.send = internal_dev_recv,
};
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 2130d61c384a..4f01c6d2ffa4 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -100,16 +100,20 @@ static struct vport *netdev_create(const struct vport_parms *parms)
goto error_put;
}
+ rtnl_lock();
err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook,
vport);
if (err)
- goto error_put;
+ goto error_unlock;
dev_set_promiscuity(netdev_vport->dev, 1);
netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
+ rtnl_unlock();
return vport;
+error_unlock:
+ rtnl_unlock();
error_put:
dev_put(netdev_vport->dev);
error_free_vport:
@@ -131,9 +135,11 @@ static void netdev_destroy(struct vport *vport)
{
struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+ rtnl_lock();
netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
netdev_rx_handler_unregister(netdev_vport->dev);
dev_set_promiscuity(netdev_vport->dev, -1);
+ rtnl_unlock();
call_rcu(&netdev_vport->rcu, free_port_rcu);
}
@@ -144,12 +150,6 @@ const char *ovs_netdev_get_name(const struct vport *vport)
return netdev_vport->dev->name;
}
-int ovs_netdev_get_ifindex(const struct vport *vport)
-{
- const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
- return netdev_vport->dev->ifindex;
-}
-
static unsigned int packet_length(const struct sk_buff *skb)
{
unsigned int length = skb->len - ETH_HLEN;
@@ -200,6 +200,5 @@ const struct vport_ops ovs_netdev_vport_ops = {
.create = netdev_create,
.destroy = netdev_destroy,
.get_name = ovs_netdev_get_name,
- .get_ifindex = ovs_netdev_get_ifindex,
.send = netdev_send,
};
diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h
index 6478079b3417..a3cb3a32cd77 100644
--- a/net/openvswitch/vport-netdev.h
+++ b/net/openvswitch/vport-netdev.h
@@ -40,6 +40,5 @@ netdev_vport_priv(const struct vport *vport)
const char *ovs_netdev_get_name(const struct vport *);
const char *ovs_netdev_get_config(const struct vport *);
-int ovs_netdev_get_ifindex(const struct vport *);
#endif /* vport_netdev.h */
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index f6b8132ce4cb..720623190eaa 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -40,7 +40,7 @@ static const struct vport_ops *vport_ops_list[] = {
&ovs_internal_vport_ops,
};
-/* Protected by RCU read lock for reading, RTNL lock for writing. */
+/* Protected by RCU read lock for reading, ovs_mutex for writing. */
static struct hlist_head *dev_table;
#define VPORT_HASH_BUCKETS 1024
@@ -80,7 +80,7 @@ static struct hlist_head *hash_bucket(struct net *net, const char *name)
*
* @name: name of port to find
*
- * Must be called with RTNL or RCU read lock.
+ * Must be called with ovs or RCU read lock.
*/
struct vport *ovs_vport_locate(struct net *net, const char *name)
{
@@ -128,7 +128,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
vport->ops = ops;
INIT_HLIST_NODE(&vport->dp_hash_node);
- vport->percpu_stats = alloc_percpu(struct vport_percpu_stats);
+ vport->percpu_stats = alloc_percpu(struct pcpu_tstats);
if (!vport->percpu_stats) {
kfree(vport);
return ERR_PTR(-ENOMEM);
@@ -161,7 +161,7 @@ void ovs_vport_free(struct vport *vport)
* @parms: Information about new vport.
*
* Creates a new vport with the specified configuration (which is dependent on
- * device type). RTNL lock must be held.
+ * device type). ovs_mutex must be held.
*/
struct vport *ovs_vport_add(const struct vport_parms *parms)
{
@@ -169,8 +169,6 @@ struct vport *ovs_vport_add(const struct vport_parms *parms)
int err = 0;
int i;
- ASSERT_RTNL();
-
for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) {
if (vport_ops_list[i]->type == parms->type) {
struct hlist_head *bucket;
@@ -201,12 +199,10 @@ out:
* @port: New configuration.
*
* Modifies an existing device with the specified configuration (which is
- * dependent on device type). RTNL lock must be held.
+ * dependent on device type). ovs_mutex must be held.
*/
int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
{
- ASSERT_RTNL();
-
if (!vport->ops->set_options)
return -EOPNOTSUPP;
return vport->ops->set_options(vport, options);
@@ -218,11 +214,11 @@ int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
* @vport: vport to delete.
*
* Detaches @vport from its datapath and destroys it. It is possible to fail
- * for reasons such as lack of memory. RTNL lock must be held.
+ * for reasons such as lack of memory. ovs_mutex must be held.
*/
void ovs_vport_del(struct vport *vport)
{
- ASSERT_RTNL();
+ ASSERT_OVSL();
hlist_del_rcu(&vport->hash_node);
@@ -237,7 +233,7 @@ void ovs_vport_del(struct vport *vport)
*
* Retrieves transmit, receive, and error stats for the given device.
*
- * Must be called with RTNL lock or rcu_read_lock.
+ * Must be called with ovs_mutex or rcu_read_lock.
*/
void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
{
@@ -264,16 +260,16 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
spin_unlock_bh(&vport->stats_lock);
for_each_possible_cpu(i) {
- const struct vport_percpu_stats *percpu_stats;
- struct vport_percpu_stats local_stats;
+ const struct pcpu_tstats *percpu_stats;
+ struct pcpu_tstats local_stats;
unsigned int start;
percpu_stats = per_cpu_ptr(vport->percpu_stats, i);
do {
- start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
+ start = u64_stats_fetch_begin_bh(&percpu_stats->syncp);
local_stats = *percpu_stats;
- } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
+ } while (u64_stats_fetch_retry_bh(&percpu_stats->syncp, start));
stats->rx_bytes += local_stats.rx_bytes;
stats->rx_packets += local_stats.rx_packets;
@@ -296,22 +292,24 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
* negative error code if a real error occurred. If an error occurs, @skb is
* left unmodified.
*
- * Must be called with RTNL lock or rcu_read_lock.
+ * Must be called with ovs_mutex or rcu_read_lock.
*/
int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
{
struct nlattr *nla;
+ int err;
+
+ if (!vport->ops->get_options)
+ return 0;
nla = nla_nest_start(skb, OVS_VPORT_ATTR_OPTIONS);
if (!nla)
return -EMSGSIZE;
- if (vport->ops->get_options) {
- int err = vport->ops->get_options(vport, skb);
- if (err) {
- nla_nest_cancel(skb, nla);
- return err;
- }
+ err = vport->ops->get_options(vport, skb);
+ if (err) {
+ nla_nest_cancel(skb, nla);
+ return err;
}
nla_nest_end(skb, nla);
@@ -329,13 +327,13 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
*/
void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
{
- struct vport_percpu_stats *stats;
+ struct pcpu_tstats *stats;
stats = this_cpu_ptr(vport->percpu_stats);
- u64_stats_update_begin(&stats->sync);
+ u64_stats_update_begin(&stats->syncp);
stats->rx_packets++;
stats->rx_bytes += skb->len;
- u64_stats_update_end(&stats->sync);
+ u64_stats_update_end(&stats->syncp);
ovs_dp_process_received_packet(vport, skb);
}
@@ -346,7 +344,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
* @vport: vport on which to send the packet
* @skb: skb to send
*
- * Sends the given packet and returns the length of data sent. Either RTNL
+ * Sends the given packet and returns the length of data sent. Either ovs
* lock or rcu_read_lock must be held.
*/
int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
@@ -354,14 +352,14 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
int sent = vport->ops->send(vport, skb);
if (likely(sent)) {
- struct vport_percpu_stats *stats;
+ struct pcpu_tstats *stats;
stats = this_cpu_ptr(vport->percpu_stats);
- u64_stats_update_begin(&stats->sync);
+ u64_stats_update_begin(&stats->syncp);
stats->tx_packets++;
stats->tx_bytes += sent;
- u64_stats_update_end(&stats->sync);
+ u64_stats_update_end(&stats->syncp);
}
return sent;
}
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 3f7961ea3c56..68a377bc0841 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -19,6 +19,7 @@
#ifndef VPORT_H
#define VPORT_H 1
+#include <linux/if_tunnel.h>
#include <linux/list.h>
#include <linux/netlink.h>
#include <linux/openvswitch.h>
@@ -50,14 +51,6 @@ int ovs_vport_send(struct vport *, struct sk_buff *);
/* The following definitions are for implementers of vport devices: */
-struct vport_percpu_stats {
- u64 rx_bytes;
- u64 rx_packets;
- u64 tx_bytes;
- u64 tx_packets;
- struct u64_stats_sync sync;
-};
-
struct vport_err_stats {
u64 rx_dropped;
u64 rx_errors;
@@ -68,10 +61,10 @@ struct vport_err_stats {
/**
* struct vport - one port within a datapath
* @rcu: RCU callback head for deferred destruction.
- * @port_no: Index into @dp's @ports array.
* @dp: Datapath to which this port belongs.
* @upcall_portid: The Netlink port to use for packets received on this port that
* miss the flow table.
+ * @port_no: Index into @dp's @ports array.
* @hash_node: Element in @dev_table hash table in vport.c.
* @dp_hash_node: Element in @datapath->ports hash table in datapath.c.
* @ops: Class structure.
@@ -81,15 +74,15 @@ struct vport_err_stats {
*/
struct vport {
struct rcu_head rcu;
- u16 port_no;
struct datapath *dp;
u32 upcall_portid;
+ u16 port_no;
struct hlist_node hash_node;
struct hlist_node dp_hash_node;
const struct vport_ops *ops;
- struct vport_percpu_stats __percpu *percpu_stats;
+ struct pcpu_tstats __percpu *percpu_stats;
spinlock_t stats_lock;
struct vport_err_stats err_stats;
@@ -131,24 +124,22 @@ struct vport_parms {
* have any configuration.
* @get_name: Get the device's name.
* @get_config: Get the device's configuration.
- * @get_ifindex: Get the system interface index associated with the device.
* May be null if the device does not have an ifindex.
* @send: Send a packet on the device. Returns the length of the packet sent.
*/
struct vport_ops {
enum ovs_vport_type type;
- /* Called with RTNL lock. */
+ /* Called with ovs_mutex. */
struct vport *(*create)(const struct vport_parms *);
void (*destroy)(struct vport *);
int (*set_options)(struct vport *, struct nlattr *);
int (*get_options)(const struct vport *, struct sk_buff *);
- /* Called with rcu_read_lock or RTNL lock. */
+ /* Called with rcu_read_lock or ovs_mutex. */
const char *(*get_name)(const struct vport *);
void (*get_config)(const struct vport *, void *);
- int (*get_ifindex)(const struct vport *);
int (*send)(struct vport *, struct sk_buff *);
};
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 1d6793dbfbae..20a1bd0e6549 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -158,10 +158,16 @@ struct packet_mreq_max {
unsigned char mr_address[MAX_ADDR_LEN];
};
+union tpacket_uhdr {
+ struct tpacket_hdr *h1;
+ struct tpacket2_hdr *h2;
+ struct tpacket3_hdr *h3;
+ void *raw;
+};
+
static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
int closing, int tx_ring);
-
#define V3_ALIGNMENT (8)
#define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT))
@@ -181,6 +187,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
struct packet_sock;
static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
+static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *orig_dev);
static void *packet_previous_frame(struct packet_sock *po,
struct packet_ring_buffer *rb,
@@ -288,11 +296,7 @@ static inline __pure struct page *pgv_to_page(void *addr)
static void __packet_set_status(struct packet_sock *po, void *frame, int status)
{
- union {
- struct tpacket_hdr *h1;
- struct tpacket2_hdr *h2;
- void *raw;
- } h;
+ union tpacket_uhdr h;
h.raw = frame;
switch (po->tp_version) {
@@ -315,11 +319,7 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)
static int __packet_get_status(struct packet_sock *po, void *frame)
{
- union {
- struct tpacket_hdr *h1;
- struct tpacket2_hdr *h2;
- void *raw;
- } h;
+ union tpacket_uhdr h;
smp_rmb();
@@ -339,17 +339,66 @@ static int __packet_get_status(struct packet_sock *po, void *frame)
}
}
+static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts,
+ unsigned int flags)
+{
+ struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
+
+ if (shhwtstamps) {
+ if ((flags & SOF_TIMESTAMPING_SYS_HARDWARE) &&
+ ktime_to_timespec_cond(shhwtstamps->syststamp, ts))
+ return TP_STATUS_TS_SYS_HARDWARE;
+ if ((flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
+ ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts))
+ return TP_STATUS_TS_RAW_HARDWARE;
+ }
+
+ if (ktime_to_timespec_cond(skb->tstamp, ts))
+ return TP_STATUS_TS_SOFTWARE;
+
+ return 0;
+}
+
+static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame,
+ struct sk_buff *skb)
+{
+ union tpacket_uhdr h;
+ struct timespec ts;
+ __u32 ts_status;
+
+ if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
+ return 0;
+
+ h.raw = frame;
+ switch (po->tp_version) {
+ case TPACKET_V1:
+ h.h1->tp_sec = ts.tv_sec;
+ h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
+ break;
+ case TPACKET_V2:
+ h.h2->tp_sec = ts.tv_sec;
+ h.h2->tp_nsec = ts.tv_nsec;
+ break;
+ case TPACKET_V3:
+ default:
+ WARN(1, "TPACKET version not supported.\n");
+ BUG();
+ }
+
+ /* one flush is safe, as both fields always lie on the same cacheline */
+ flush_dcache_page(pgv_to_page(&h.h1->tp_sec));
+ smp_wmb();
+
+ return ts_status;
+}
+
static void *packet_lookup_frame(struct packet_sock *po,
struct packet_ring_buffer *rb,
unsigned int position,
int status)
{
unsigned int pg_vec_pos, frame_offset;
- union {
- struct tpacket_hdr *h1;
- struct tpacket2_hdr *h2;
- void *raw;
- } h;
+ union tpacket_uhdr h;
pg_vec_pos = position / rb->frames_per_block;
frame_offset = position % rb->frames_per_block;
@@ -479,7 +528,7 @@ static void init_prb_bdqc(struct packet_sock *po,
p1->hdrlen = po->tp_hdrlen;
p1->version = po->tp_version;
p1->last_kactive_blk_num = 0;
- po->stats_u.stats3.tp_freeze_q_cnt = 0;
+ po->stats.stats3.tp_freeze_q_cnt = 0;
if (req_u->req3.tp_retire_blk_tov)
p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov;
else
@@ -647,7 +696,7 @@ static void prb_close_block(struct tpacket_kbdq_core *pkc1,
struct tpacket3_hdr *last_pkt;
struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
- if (po->stats.tp_drops)
+ if (po->stats.stats3.tp_drops)
status |= TP_STATUS_LOSING;
last_pkt = (struct tpacket3_hdr *)pkc1->prev;
@@ -693,36 +742,33 @@ static void prb_open_block(struct tpacket_kbdq_core *pkc1,
smp_rmb();
- if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd1))) {
+ /* We could have just memset this but we will lose the
+ * flexibility of making the priv area sticky
+ */
- /* We could have just memset this but we will lose the
- * flexibility of making the priv area sticky
- */
- BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
- BLOCK_NUM_PKTS(pbd1) = 0;
- BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
- getnstimeofday(&ts);
- h1->ts_first_pkt.ts_sec = ts.tv_sec;
- h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
- pkc1->pkblk_start = (char *)pbd1;
- pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
- BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
- BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
- pbd1->version = pkc1->version;
- pkc1->prev = pkc1->nxt_offset;
- pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
- prb_thaw_queue(pkc1);
- _prb_refresh_rx_retire_blk_timer(pkc1);
+ BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
+ BLOCK_NUM_PKTS(pbd1) = 0;
+ BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
- smp_wmb();
+ getnstimeofday(&ts);
- return;
- }
+ h1->ts_first_pkt.ts_sec = ts.tv_sec;
+ h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
+
+ pkc1->pkblk_start = (char *)pbd1;
+ pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
+
+ BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
+ BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
+
+ pbd1->version = pkc1->version;
+ pkc1->prev = pkc1->nxt_offset;
+ pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
+
+ prb_thaw_queue(pkc1);
+ _prb_refresh_rx_retire_blk_timer(pkc1);
- WARN(1, "ERROR block:%p is NOT FREE status:%d kactive_blk_num:%d\n",
- pbd1, BLOCK_STATUS(pbd1), pkc1->kactive_blk_num);
- dump_stack();
- BUG();
+ smp_wmb();
}
/*
@@ -752,7 +798,7 @@ static void prb_freeze_queue(struct tpacket_kbdq_core *pkc,
struct packet_sock *po)
{
pkc->reset_pending_on_curr_blk = 1;
- po->stats_u.stats3.tp_freeze_q_cnt++;
+ po->stats.stats3.tp_freeze_q_cnt++;
}
#define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT))
@@ -813,10 +859,6 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
prb_close_block(pkc, pbd, po, status);
return;
}
-
- WARN(1, "ERROR-pbd[%d]:%p\n", pkc->kactive_blk_num, pbd);
- dump_stack();
- BUG();
}
static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc,
@@ -973,11 +1015,11 @@ static void *packet_current_rx_frame(struct packet_sock *po,
static void *prb_lookup_block(struct packet_sock *po,
struct packet_ring_buffer *rb,
- unsigned int previous,
+ unsigned int idx,
int status)
{
struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
- struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, previous);
+ struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx);
if (status != BLOCK_STATUS(pbd))
return NULL;
@@ -1041,6 +1083,29 @@ static void packet_increment_head(struct packet_ring_buffer *buff)
buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
}
+static bool packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
+{
+ struct sock *sk = &po->sk;
+ bool has_room;
+
+ if (po->prot_hook.func != tpacket_rcv)
+ return (atomic_read(&sk->sk_rmem_alloc) + skb->truesize)
+ <= sk->sk_rcvbuf;
+
+ spin_lock(&sk->sk_receive_queue.lock);
+ if (po->tp_version == TPACKET_V3)
+ has_room = prb_lookup_block(po, &po->rx_ring,
+ po->rx_ring.prb_bdqc.kactive_blk_num,
+ TP_STATUS_KERNEL);
+ else
+ has_room = packet_lookup_frame(po, &po->rx_ring,
+ po->rx_ring.head,
+ TP_STATUS_KERNEL);
+ spin_unlock(&sk->sk_receive_queue.lock);
+
+ return has_room;
+}
+
static void packet_sock_destruct(struct sock *sk)
{
skb_queue_purge(&sk->sk_error_queue);
@@ -1066,16 +1131,16 @@ static int fanout_rr_next(struct packet_fanout *f, unsigned int num)
return x;
}
-static struct sock *fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num)
+static unsigned int fanout_demux_hash(struct packet_fanout *f,
+ struct sk_buff *skb,
+ unsigned int num)
{
- u32 idx, hash = skb->rxhash;
-
- idx = ((u64)hash * num) >> 32;
-
- return f->arr[idx];
+ return (((u64)skb->rxhash) * num) >> 32;
}
-static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num)
+static unsigned int fanout_demux_lb(struct packet_fanout *f,
+ struct sk_buff *skb,
+ unsigned int num)
{
int cur, old;
@@ -1083,14 +1148,40 @@ static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb
while ((old = atomic_cmpxchg(&f->rr_cur, cur,
fanout_rr_next(f, num))) != cur)
cur = old;
- return f->arr[cur];
+ return cur;
}
-static struct sock *fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *skb, unsigned int num)
+static unsigned int fanout_demux_cpu(struct packet_fanout *f,
+ struct sk_buff *skb,
+ unsigned int num)
{
- unsigned int cpu = smp_processor_id();
+ return smp_processor_id() % num;
+}
+
+static unsigned int fanout_demux_rollover(struct packet_fanout *f,
+ struct sk_buff *skb,
+ unsigned int idx, unsigned int skip,
+ unsigned int num)
+{
+ unsigned int i, j;
+
+ i = j = min_t(int, f->next[idx], num - 1);
+ do {
+ if (i != skip && packet_rcv_has_room(pkt_sk(f->arr[i]), skb)) {
+ if (i != j)
+ f->next[idx] = i;
+ return i;
+ }
+ if (++i == num)
+ i = 0;
+ } while (i != j);
- return f->arr[cpu % num];
+ return idx;
+}
+
+static bool fanout_has_flag(struct packet_fanout *f, u16 flag)
+{
+ return f->flags & (flag >> 8);
}
static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
@@ -1099,7 +1190,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
struct packet_fanout *f = pt->af_packet_priv;
unsigned int num = f->num_members;
struct packet_sock *po;
- struct sock *sk;
+ unsigned int idx;
if (!net_eq(dev_net(dev), read_pnet(&f->net)) ||
!num) {
@@ -1110,23 +1201,31 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
switch (f->type) {
case PACKET_FANOUT_HASH:
default:
- if (f->defrag) {
+ if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) {
skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET);
if (!skb)
return 0;
}
skb_get_rxhash(skb);
- sk = fanout_demux_hash(f, skb, num);
+ idx = fanout_demux_hash(f, skb, num);
break;
case PACKET_FANOUT_LB:
- sk = fanout_demux_lb(f, skb, num);
+ idx = fanout_demux_lb(f, skb, num);
break;
case PACKET_FANOUT_CPU:
- sk = fanout_demux_cpu(f, skb, num);
+ idx = fanout_demux_cpu(f, skb, num);
+ break;
+ case PACKET_FANOUT_ROLLOVER:
+ idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num);
break;
}
- po = pkt_sk(sk);
+ po = pkt_sk(f->arr[idx]);
+ if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER) &&
+ unlikely(!packet_rcv_has_room(po, skb))) {
+ idx = fanout_demux_rollover(f, skb, idx, idx, num);
+ po = pkt_sk(f->arr[idx]);
+ }
return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
}
@@ -1175,10 +1274,13 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
struct packet_sock *po = pkt_sk(sk);
struct packet_fanout *f, *match;
u8 type = type_flags & 0xff;
- u8 defrag = (type_flags & PACKET_FANOUT_FLAG_DEFRAG) ? 1 : 0;
+ u8 flags = type_flags >> 8;
int err;
switch (type) {
+ case PACKET_FANOUT_ROLLOVER:
+ if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)
+ return -EINVAL;
case PACKET_FANOUT_HASH:
case PACKET_FANOUT_LB:
case PACKET_FANOUT_CPU:
@@ -1203,7 +1305,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
}
}
err = -EINVAL;
- if (match && match->defrag != defrag)
+ if (match && match->flags != flags)
goto out;
if (!match) {
err = -ENOMEM;
@@ -1213,7 +1315,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
write_pnet(&match->net, sock_net(sk));
match->id = id;
match->type = type;
- match->defrag = defrag;
+ match->flags = flags;
atomic_set(&match->rr_cur, 0);
INIT_LIST_HEAD(&match->list);
spin_lock_init(&match->lock);
@@ -1443,13 +1545,14 @@ retry:
skb->dev = dev;
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
- err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
- if (err < 0)
- goto out_unlock;
+
+ sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
if (unlikely(extra_len == 4))
skb->no_fcs = 1;
+ skb_probe_transport_header(skb, 0);
+
dev_queue_xmit(skb);
rcu_read_unlock();
return len;
@@ -1577,7 +1680,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
nf_reset(skb);
spin_lock(&sk->sk_receive_queue.lock);
- po->stats.tp_packets++;
+ po->stats.stats1.tp_packets++;
skb->dropcount = atomic_read(&sk->sk_drops);
__skb_queue_tail(&sk->sk_receive_queue, skb);
spin_unlock(&sk->sk_receive_queue.lock);
@@ -1586,7 +1689,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
drop_n_acct:
spin_lock(&sk->sk_receive_queue.lock);
- po->stats.tp_drops++;
+ po->stats.stats1.tp_drops++;
atomic_inc(&sk->sk_drops);
spin_unlock(&sk->sk_receive_queue.lock);
@@ -1606,21 +1709,15 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
struct sock *sk;
struct packet_sock *po;
struct sockaddr_ll *sll;
- union {
- struct tpacket_hdr *h1;
- struct tpacket2_hdr *h2;
- struct tpacket3_hdr *h3;
- void *raw;
- } h;
+ union tpacket_uhdr h;
u8 *skb_head = skb->data;
int skb_len = skb->len;
unsigned int snaplen, res;
unsigned long status = TP_STATUS_USER;
unsigned short macoff, netoff, hdrlen;
struct sk_buff *copy_skb = NULL;
- struct timeval tv;
struct timespec ts;
- struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
+ __u32 ts_status;
if (skb->pkt_type == PACKET_LOOPBACK)
goto drop;
@@ -1692,10 +1789,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
* Anyways, moving it for V1/V2 only as V3 doesn't need this
* at packet level.
*/
- if (po->stats.tp_drops)
+ if (po->stats.stats1.tp_drops)
status |= TP_STATUS_LOSING;
}
- po->stats.tp_packets++;
+ po->stats.stats1.tp_packets++;
if (copy_skb) {
status |= TP_STATUS_COPY;
__skb_queue_tail(&sk->sk_receive_queue, copy_skb);
@@ -1704,24 +1801,19 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
+ if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
+ getnstimeofday(&ts);
+
+ status |= ts_status;
+
switch (po->tp_version) {
case TPACKET_V1:
h.h1->tp_len = skb->len;
h.h1->tp_snaplen = snaplen;
h.h1->tp_mac = macoff;
h.h1->tp_net = netoff;
- if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
- && shhwtstamps->syststamp.tv64)
- tv = ktime_to_timeval(shhwtstamps->syststamp);
- else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
- && shhwtstamps->hwtstamp.tv64)
- tv = ktime_to_timeval(shhwtstamps->hwtstamp);
- else if (skb->tstamp.tv64)
- tv = ktime_to_timeval(skb->tstamp);
- else
- do_gettimeofday(&tv);
- h.h1->tp_sec = tv.tv_sec;
- h.h1->tp_usec = tv.tv_usec;
+ h.h1->tp_sec = ts.tv_sec;
+ h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
hdrlen = sizeof(*h.h1);
break;
case TPACKET_V2:
@@ -1729,16 +1821,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
h.h2->tp_snaplen = snaplen;
h.h2->tp_mac = macoff;
h.h2->tp_net = netoff;
- if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
- && shhwtstamps->syststamp.tv64)
- ts = ktime_to_timespec(shhwtstamps->syststamp);
- else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
- && shhwtstamps->hwtstamp.tv64)
- ts = ktime_to_timespec(shhwtstamps->hwtstamp);
- else if (skb->tstamp.tv64)
- ts = ktime_to_timespec(skb->tstamp);
- else
- getnstimeofday(&ts);
h.h2->tp_sec = ts.tv_sec;
h.h2->tp_nsec = ts.tv_nsec;
if (vlan_tx_tag_present(skb)) {
@@ -1759,16 +1841,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
h.h3->tp_snaplen = snaplen;
h.h3->tp_mac = macoff;
h.h3->tp_net = netoff;
- if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
- && shhwtstamps->syststamp.tv64)
- ts = ktime_to_timespec(shhwtstamps->syststamp);
- else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
- && shhwtstamps->hwtstamp.tv64)
- ts = ktime_to_timespec(shhwtstamps->hwtstamp);
- else if (skb->tstamp.tv64)
- ts = ktime_to_timespec(skb->tstamp);
- else
- getnstimeofday(&ts);
h.h3->tp_sec = ts.tv_sec;
h.h3->tp_nsec = ts.tv_nsec;
hdrlen = sizeof(*h.h3);
@@ -1819,7 +1891,7 @@ drop:
return 0;
ring_is_full:
- po->stats.tp_drops++;
+ po->stats.stats1.tp_drops++;
spin_unlock(&sk->sk_receive_queue.lock);
sk->sk_data_ready(sk, 0);
@@ -1833,10 +1905,14 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
void *ph;
if (likely(po->tx_ring.pg_vec)) {
+ __u32 ts;
+
ph = skb_shinfo(skb)->destructor_arg;
BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
atomic_dec(&po->tx_ring.pending);
- __packet_set_status(po, ph, TP_STATUS_AVAILABLE);
+
+ ts = __packet_set_timestamp(po, ph, skb);
+ __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
}
sock_wfree(skb);
@@ -1846,11 +1922,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
void *frame, struct net_device *dev, int size_max,
__be16 proto, unsigned char *addr, int hlen)
{
- union {
- struct tpacket_hdr *h1;
- struct tpacket2_hdr *h2;
- void *raw;
- } ph;
+ union tpacket_uhdr ph;
int to_write, offset, len, tp_len, nr_frags, len_max;
struct socket *sock = po->sk.sk_socket;
struct page *page;
@@ -1863,6 +1935,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
skb->dev = dev;
skb->priority = po->sk.sk_priority;
skb->mark = po->sk.sk_mark;
+ sock_tx_timestamp(&po->sk, &skb_shinfo(skb)->tx_flags);
skb_shinfo(skb)->destructor_arg = ph.raw;
switch (po->tp_version) {
@@ -1880,6 +1953,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
skb_reserve(skb, hlen);
skb_reset_network_header(skb);
+ skb_probe_transport_header(skb, 0);
if (po->tp_tx_has_off) {
int off_min, off_max, off;
@@ -2247,9 +2321,8 @@ static int packet_snd(struct socket *sock,
err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
if (err)
goto out_free;
- err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
- if (err < 0)
- goto out_free;
+
+ sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
if (!gso_type && (len > dev->mtu + reserve + extra_len)) {
/* Earlier code assumed this would be a VLAN pkt,
@@ -2289,6 +2362,8 @@ static int packet_snd(struct socket *sock,
len += vnet_hdr_len;
}
+ skb_probe_transport_header(skb, reserve);
+
if (unlikely(extra_len == 4))
skb->no_fcs = 1;
@@ -2776,12 +2851,11 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
return -EOPNOTSUPP;
uaddr->sa_family = AF_PACKET;
+ memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data));
rcu_read_lock();
dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
if (dev)
- strncpy(uaddr->sa_data, dev->name, 14);
- else
- memset(uaddr->sa_data, 0, 14);
+ strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
rcu_read_unlock();
*uaddr_len = sizeof(*uaddr);
@@ -3165,8 +3239,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
struct sock *sk = sock->sk;
struct packet_sock *po = pkt_sk(sk);
void *data = &val;
- struct tpacket_stats st;
- union tpacket_stats_u st_u;
+ union tpacket_stats_u st;
if (level != SOL_PACKET)
return -ENOPROTOOPT;
@@ -3180,22 +3253,18 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
switch (optname) {
case PACKET_STATISTICS:
spin_lock_bh(&sk->sk_receive_queue.lock);
+ memcpy(&st, &po->stats, sizeof(st));
+ memset(&po->stats, 0, sizeof(po->stats));
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
+
if (po->tp_version == TPACKET_V3) {
lv = sizeof(struct tpacket_stats_v3);
- memcpy(&st_u.stats3, &po->stats,
- sizeof(struct tpacket_stats));
- st_u.stats3.tp_freeze_q_cnt =
- po->stats_u.stats3.tp_freeze_q_cnt;
- st_u.stats3.tp_packets += po->stats.tp_drops;
- data = &st_u.stats3;
+ data = &st.stats3;
} else {
lv = sizeof(struct tpacket_stats);
- st = po->stats;
- st.tp_packets += st.tp_drops;
- data = &st;
+ data = &st.stats1;
}
- memset(&po->stats, 0, sizeof(st));
- spin_unlock_bh(&sk->sk_receive_queue.lock);
+
break;
case PACKET_AUXDATA:
val = po->auxdata;
@@ -3240,7 +3309,8 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
case PACKET_FANOUT:
val = (po->fanout ?
((u32)po->fanout->id |
- ((u32)po->fanout->type << 16)) :
+ ((u32)po->fanout->type << 16) |
+ ((u32)po->fanout->flags << 24)) :
0);
break;
case PACKET_TX_HAS_OFF:
diff --git a/net/packet/diag.c b/net/packet/diag.c
index d3fcd1ebef7e..a9584a2f6d69 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -125,8 +125,10 @@ static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb)
return ret;
}
-static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req,
- u32 portid, u32 seq, u32 flags, int sk_ino)
+static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
+ struct packet_diag_req *req,
+ struct user_namespace *user_ns,
+ u32 portid, u32 seq, u32 flags, int sk_ino)
{
struct nlmsghdr *nlh;
struct packet_diag_msg *rp;
@@ -147,6 +149,11 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag
pdiag_put_info(po, skb))
goto out_nlmsg_trim;
+ if ((req->pdiag_show & PACKET_SHOW_INFO) &&
+ nla_put_u32(skb, PACKET_DIAG_UID,
+ from_kuid_munged(user_ns, sock_i_uid(sk))))
+ goto out_nlmsg_trim;
+
if ((req->pdiag_show & PACKET_SHOW_MCLIST) &&
pdiag_put_mclist(po, skb))
goto out_nlmsg_trim;
@@ -159,6 +166,14 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag
pdiag_put_fanout(po, skb))
goto out_nlmsg_trim;
+ if ((req->pdiag_show & PACKET_SHOW_MEMINFO) &&
+ sock_diag_put_meminfo(sk, skb, PACKET_DIAG_MEMINFO))
+ goto out_nlmsg_trim;
+
+ if ((req->pdiag_show & PACKET_SHOW_FILTER) &&
+ sock_diag_put_filterinfo(user_ns, sk, skb, PACKET_DIAG_FILTER))
+ goto out_nlmsg_trim;
+
return nlmsg_end(skb, nlh);
out_nlmsg_trim:
@@ -183,9 +198,11 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (num < s_num)
goto next;
- if (sk_diag_fill(sk, skb, req, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
- sock_i_ino(sk)) < 0)
+ if (sk_diag_fill(sk, skb, req,
+ sk_user_ns(NETLINK_CB(cb->skb).sk),
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ sock_i_ino(sk)) < 0)
goto done;
next:
num++;
diff --git a/net/packet/internal.h b/net/packet/internal.h
index e84cab8cb7a9..c4e4b4561207 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -54,6 +54,7 @@ struct pgv {
struct packet_ring_buffer {
struct pgv *pg_vec;
+
unsigned int head;
unsigned int frames_per_block;
unsigned int frame_size;
@@ -63,8 +64,9 @@ struct packet_ring_buffer {
unsigned int pg_vec_pages;
unsigned int pg_vec_len;
- struct tpacket_kbdq_core prb_bdqc;
atomic_t pending;
+
+ struct tpacket_kbdq_core prb_bdqc;
};
extern struct mutex fanout_mutex;
@@ -77,10 +79,11 @@ struct packet_fanout {
unsigned int num_members;
u16 id;
u8 type;
- u8 defrag;
+ u8 flags;
atomic_t rr_cur;
struct list_head list;
struct sock *arr[PACKET_FANOUT_MAX];
+ int next[PACKET_FANOUT_MAX];
spinlock_t lock;
atomic_t sk_ref;
struct packet_type prot_hook ____cacheline_aligned_in_smp;
@@ -90,8 +93,7 @@ struct packet_sock {
/* struct sock has to be the first member of packet_sock */
struct sock sk;
struct packet_fanout *fanout;
- struct tpacket_stats stats;
- union tpacket_stats_u stats_u;
+ union tpacket_stats_u stats;
struct packet_ring_buffer rx_ring;
struct packet_ring_buffer tx_ring;
int copy_thresh;
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 0193630d3061..dc15f4300808 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -61,7 +61,7 @@ static const struct nla_policy ifa_phonet_policy[IFA_MAX+1] = {
[IFA_LOCAL] = { .type = NLA_U8 },
};
-static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr)
+static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[IFA_MAX+1];
@@ -224,7 +224,7 @@ static const struct nla_policy rtm_phonet_policy[RTA_MAX+1] = {
[RTA_OIF] = { .type = NLA_U32 },
};
-static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr)
+static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[RTA_MAX+1];
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 9b9be5279f5d..1cec5e4f3a5e 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -587,7 +587,7 @@ static ssize_t rfkill_name_show(struct device *dev,
static const char *rfkill_get_type_str(enum rfkill_type type)
{
- BUILD_BUG_ON(NUM_RFKILL_TYPES != RFKILL_TYPE_FM + 1);
+ BUILD_BUG_ON(NUM_RFKILL_TYPES != RFKILL_TYPE_NFC + 1);
switch (type) {
case RFKILL_TYPE_WLAN:
@@ -604,6 +604,8 @@ static const char *rfkill_get_type_str(enum rfkill_type type)
return "gps";
case RFKILL_TYPE_FM:
return "fm";
+ case RFKILL_TYPE_NFC:
+ return "nfc";
default:
BUG();
}
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index 78fc0937948d..fb076cd6f808 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -131,6 +131,7 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
rfkill->pwr_clk = clk_get(&pdev->dev, pdata->power_clk_name);
if (IS_ERR(rfkill->pwr_clk)) {
pr_warn("%s: can't find pwr_clk.\n", __func__);
+ ret = PTR_ERR(rfkill->pwr_clk);
goto fail_shutdown_name;
}
}
@@ -152,9 +153,11 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
}
rfkill->rfkill_dev = rfkill_alloc(pdata->name, &pdev->dev, pdata->type,
- &rfkill_gpio_ops, rfkill);
- if (!rfkill->rfkill_dev)
+ &rfkill_gpio_ops, rfkill);
+ if (!rfkill->rfkill_dev) {
+ ret = -ENOMEM;
goto fail_shutdown;
+ }
ret = rfkill_register(rfkill->rfkill_dev);
if (ret < 0)
diff --git a/net/rfkill/rfkill-regulator.c b/net/rfkill/rfkill-regulator.c
index 4b5ab21ecb24..d11ac79246e4 100644
--- a/net/rfkill/rfkill-regulator.c
+++ b/net/rfkill/rfkill-regulator.c
@@ -51,7 +51,7 @@ static int rfkill_regulator_set_block(void *data, bool blocked)
return 0;
}
-struct rfkill_ops rfkill_regulator_ops = {
+static struct rfkill_ops rfkill_regulator_ops = {
.set_block = rfkill_regulator_set_block,
};
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 8579c4bb20c9..fd7072827a40 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -982,7 +982,7 @@ done:
return ret;
}
-static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_ACT_MAX + 1];
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 08fa1e8a4ca4..3a4c0caa1f7d 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -166,15 +166,17 @@ static int tcf_csum_ipv4_igmp(struct sk_buff *skb,
return 1;
}
-static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h,
+static int tcf_csum_ipv6_icmp(struct sk_buff *skb,
unsigned int ihl, unsigned int ipl)
{
struct icmp6hdr *icmp6h;
+ const struct ipv6hdr *ip6h;
icmp6h = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmp6h));
if (icmp6h == NULL)
return 0;
+ ip6h = ipv6_hdr(skb);
icmp6h->icmp6_cksum = 0;
skb->csum = csum_partial(icmp6h, ipl - ihl, 0);
icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
@@ -186,15 +188,17 @@ static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h,
return 1;
}
-static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph,
+static int tcf_csum_ipv4_tcp(struct sk_buff *skb,
unsigned int ihl, unsigned int ipl)
{
struct tcphdr *tcph;
+ const struct iphdr *iph;
tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
if (tcph == NULL)
return 0;
+ iph = ip_hdr(skb);
tcph->check = 0;
skb->csum = csum_partial(tcph, ipl - ihl, 0);
tcph->check = tcp_v4_check(ipl - ihl,
@@ -205,15 +209,17 @@ static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph,
return 1;
}
-static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h,
+static int tcf_csum_ipv6_tcp(struct sk_buff *skb,
unsigned int ihl, unsigned int ipl)
{
struct tcphdr *tcph;
+ const struct ipv6hdr *ip6h;
tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
if (tcph == NULL)
return 0;
+ ip6h = ipv6_hdr(skb);
tcph->check = 0;
skb->csum = csum_partial(tcph, ipl - ihl, 0);
tcph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
@@ -225,10 +231,11 @@ static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h,
return 1;
}
-static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph,
+static int tcf_csum_ipv4_udp(struct sk_buff *skb,
unsigned int ihl, unsigned int ipl, int udplite)
{
struct udphdr *udph;
+ const struct iphdr *iph;
u16 ul;
/*
@@ -242,6 +249,7 @@ static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph,
if (udph == NULL)
return 0;
+ iph = ip_hdr(skb);
ul = ntohs(udph->len);
if (udplite || udph->check) {
@@ -276,10 +284,11 @@ ignore_obscure_skb:
return 1;
}
-static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h,
+static int tcf_csum_ipv6_udp(struct sk_buff *skb,
unsigned int ihl, unsigned int ipl, int udplite)
{
struct udphdr *udph;
+ const struct ipv6hdr *ip6h;
u16 ul;
/*
@@ -293,6 +302,7 @@ static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h,
if (udph == NULL)
return 0;
+ ip6h = ipv6_hdr(skb);
ul = ntohs(udph->len);
udph->check = 0;
@@ -328,7 +338,7 @@ ignore_obscure_skb:
static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
{
- struct iphdr *iph;
+ const struct iphdr *iph;
int ntkoff;
ntkoff = skb_network_offset(skb);
@@ -353,19 +363,19 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
break;
case IPPROTO_TCP:
if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
- if (!tcf_csum_ipv4_tcp(skb, iph, iph->ihl * 4,
+ if (!tcf_csum_ipv4_tcp(skb, iph->ihl * 4,
ntohs(iph->tot_len)))
goto fail;
break;
case IPPROTO_UDP:
if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
- if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4,
+ if (!tcf_csum_ipv4_udp(skb, iph->ihl * 4,
ntohs(iph->tot_len), 0))
goto fail;
break;
case IPPROTO_UDPLITE:
if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
- if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4,
+ if (!tcf_csum_ipv4_udp(skb, iph->ihl * 4,
ntohs(iph->tot_len), 1))
goto fail;
break;
@@ -377,7 +387,7 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
goto fail;
- ip_send_check(iph);
+ ip_send_check(ip_hdr(skb));
}
return 1;
@@ -456,6 +466,7 @@ static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags)
ixhl = ipv6_optlen(ip6xh);
if (!pskb_may_pull(skb, hl + ixhl + ntkoff))
goto fail;
+ ip6xh = (void *)(skb_network_header(skb) + hl);
if ((nexthdr == NEXTHDR_HOP) &&
!(tcf_csum_ipv6_hopopts(ip6xh, ixhl, &pl)))
goto fail;
@@ -464,25 +475,25 @@ static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags)
break;
case IPPROTO_ICMPV6:
if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP)
- if (!tcf_csum_ipv6_icmp(skb, ip6h,
+ if (!tcf_csum_ipv6_icmp(skb,
hl, pl + sizeof(*ip6h)))
goto fail;
goto done;
case IPPROTO_TCP:
if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
- if (!tcf_csum_ipv6_tcp(skb, ip6h,
+ if (!tcf_csum_ipv6_tcp(skb,
hl, pl + sizeof(*ip6h)))
goto fail;
goto done;
case IPPROTO_UDP:
if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
- if (!tcf_csum_ipv6_udp(skb, ip6h, hl,
+ if (!tcf_csum_ipv6_udp(skb, hl,
pl + sizeof(*ip6h), 0))
goto fail;
goto done;
case IPPROTO_UDPLITE:
if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
- if (!tcf_csum_ipv6_udp(skb, ip6h, hl,
+ if (!tcf_csum_ipv6_udp(skb, hl,
pl + sizeof(*ip6h), 1))
goto fail;
goto done;
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index e0f6de64afec..60d88b6b9560 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -8,7 +8,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * Copyright: Jamal Hadi Salim (2002-4)
+ * Copyright: Jamal Hadi Salim (2002-13)
*/
#include <linux/types.h>
@@ -303,17 +303,44 @@ static struct tc_action_ops act_ipt_ops = {
.walk = tcf_generic_walker
};
-MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
+static struct tc_action_ops act_xt_ops = {
+ .kind = "xt",
+ .hinfo = &ipt_hash_info,
+ .type = TCA_ACT_IPT,
+ .capab = TCA_CAP_NONE,
+ .owner = THIS_MODULE,
+ .act = tcf_ipt,
+ .dump = tcf_ipt_dump,
+ .cleanup = tcf_ipt_cleanup,
+ .lookup = tcf_hash_search,
+ .init = tcf_ipt_init,
+ .walk = tcf_generic_walker
+};
+
+MODULE_AUTHOR("Jamal Hadi Salim(2002-13)");
MODULE_DESCRIPTION("Iptables target actions");
MODULE_LICENSE("GPL");
+MODULE_ALIAS("act_xt");
static int __init ipt_init_module(void)
{
- return tcf_register_action(&act_ipt_ops);
+ int ret1, ret2;
+ ret1 = tcf_register_action(&act_xt_ops);
+ if (ret1 < 0)
+ printk("Failed to load xt action\n");
+ ret2 = tcf_register_action(&act_ipt_ops);
+ if (ret2 < 0)
+ printk("Failed to load ipt action\n");
+
+ if (ret1 < 0 && ret2 < 0)
+ return ret1;
+ else
+ return 0;
}
static void __exit ipt_cleanup_module(void)
{
+ tcf_unregister_action(&act_xt_ops);
tcf_unregister_action(&act_ipt_ops);
}
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 823463adbd21..189e3c5b3d09 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -231,14 +231,14 @@ override:
}
if (R_tab) {
police->rate_present = true;
- psched_ratecfg_precompute(&police->rate, R_tab->rate.rate);
+ psched_ratecfg_precompute(&police->rate, &R_tab->rate);
qdisc_put_rtab(R_tab);
} else {
police->rate_present = false;
}
if (P_tab) {
police->peak_present = true;
- psched_ratecfg_precompute(&police->peak, P_tab->rate.rate);
+ psched_ratecfg_precompute(&police->peak, &P_tab->rate);
qdisc_put_rtab(P_tab);
} else {
police->peak_present = false;
@@ -376,9 +376,9 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
};
if (police->rate_present)
- opt.rate.rate = psched_ratecfg_getrate(&police->rate);
+ psched_ratecfg_getrate(&opt.rate, &police->rate);
if (police->peak_present)
- opt.peakrate.rate = psched_ratecfg_getrate(&police->peak);
+ psched_ratecfg_getrate(&opt.peakrate, &police->peak);
if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt))
goto nla_put_failure;
if (police->tcfp_result &&
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 964f5e4f4b8a..8e118af90973 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -22,7 +22,6 @@
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/kmod.h>
-#include <linux/netlink.h>
#include <linux/err.h>
#include <linux/slab.h>
#include <net/net_namespace.h>
@@ -118,7 +117,7 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
/* Add/change/delete/get a filter node */
-static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_MAX + 1];
@@ -141,7 +140,12 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
if ((n->nlmsg_type != RTM_GETTFILTER) && !capable(CAP_NET_ADMIN))
return -EPERM;
+
replay:
+ err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL);
+ if (err < 0)
+ return err;
+
t = nlmsg_data(n);
protocol = TC_H_MIN(t->tcm_info);
prio = TC_H_MAJ(t->tcm_info);
@@ -164,10 +168,6 @@ replay:
if (dev == NULL)
return -ENODEV;
- err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL);
- if (err < 0)
- return err;
-
/* Find qdisc */
if (!parent) {
q = dev->qdisc;
@@ -427,7 +427,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
const struct Qdisc_class_ops *cops;
struct tcf_dump_args arg;
- if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
+ if (nlmsg_len(cb->nlh) < sizeof(*tcm))
return skb->len;
dev = __dev_get_by_index(net, tcm->tcm_ifindex);
if (!dev)
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index aa36a8c8b33b..7881e2fccbc2 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -393,7 +393,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
return -EOPNOTSUPP;
if ((keymask & (FLOW_KEY_SKUID|FLOW_KEY_SKGID)) &&
- sk_user_ns(NETLINK_CB(in_skb).ssk) != &init_user_ns)
+ sk_user_ns(NETLINK_CB(in_skb).sk) != &init_user_ns)
return -EOPNOTSUPP;
}
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index 3130320997e2..938b7cbf5627 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -83,7 +83,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
opt.dim = set->dim;
opt.flags = set->flags;
opt.cmdflags = 0;
- opt.timeout = ~0u;
+ opt.ext.timeout = ~0u;
network_offset = skb_network_offset(skb);
skb_pull(skb, network_offset);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index c297e2a8e2a1..281c1bded1f6 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -291,17 +291,18 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *ta
{
struct qdisc_rate_table *rtab;
+ if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
+ nla_len(tab) != TC_RTAB_SIZE)
+ return NULL;
+
for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
- if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
+ if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
+ !memcmp(&rtab->data, nla_data(tab), 1024)) {
rtab->refcnt++;
return rtab;
}
}
- if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
- nla_len(tab) != TC_RTAB_SIZE)
- return NULL;
-
rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
if (rtab) {
rtab->rate = *r;
@@ -971,13 +972,13 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
* Delete/get qdisc.
*/
-static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
{
struct net *net = sock_net(skb->sk);
struct tcmsg *tcm = nlmsg_data(n);
struct nlattr *tca[TCA_MAX + 1];
struct net_device *dev;
- u32 clid = tcm->tcm_parent;
+ u32 clid;
struct Qdisc *q = NULL;
struct Qdisc *p = NULL;
int err;
@@ -985,14 +986,15 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
if ((n->nlmsg_type != RTM_GETQDISC) && !capable(CAP_NET_ADMIN))
return -EPERM;
- dev = __dev_get_by_index(net, tcm->tcm_ifindex);
- if (!dev)
- return -ENODEV;
-
err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
if (err < 0)
return err;
+ dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+ if (!dev)
+ return -ENODEV;
+
+ clid = tcm->tcm_parent;
if (clid) {
if (clid != TC_H_ROOT) {
if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
@@ -1038,7 +1040,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
* Create/change qdisc.
*/
-static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
{
struct net *net = sock_net(skb->sk);
struct tcmsg *tcm;
@@ -1053,6 +1055,10 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
replay:
/* Reinit, just in case something touches this. */
+ err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
+ if (err < 0)
+ return err;
+
tcm = nlmsg_data(n);
clid = tcm->tcm_parent;
q = p = NULL;
@@ -1061,9 +1067,6 @@ replay:
if (!dev)
return -ENODEV;
- err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
- if (err < 0)
- return err;
if (clid) {
if (clid != TC_H_ROOT) {
@@ -1372,7 +1375,7 @@ done:
-static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
{
struct net *net = sock_net(skb->sk);
struct tcmsg *tcm = nlmsg_data(n);
@@ -1382,22 +1385,22 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
const struct Qdisc_class_ops *cops;
unsigned long cl = 0;
unsigned long new_cl;
- u32 portid = tcm->tcm_parent;
- u32 clid = tcm->tcm_handle;
- u32 qid = TC_H_MAJ(clid);
+ u32 portid;
+ u32 clid;
+ u32 qid;
int err;
if ((n->nlmsg_type != RTM_GETTCLASS) && !capable(CAP_NET_ADMIN))
return -EPERM;
- dev = __dev_get_by_index(net, tcm->tcm_ifindex);
- if (!dev)
- return -ENODEV;
-
err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
if (err < 0)
return err;
+ dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+ if (!dev)
+ return -ENODEV;
+
/*
parent == TC_H_UNSPEC - unspecified parent.
parent == TC_H_ROOT - class is root, which has no parent.
@@ -1413,6 +1416,10 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
/* Step 1. Determine qdisc handle X:0 */
+ portid = tcm->tcm_parent;
+ clid = tcm->tcm_handle;
+ qid = TC_H_MAJ(clid);
+
if (portid != TC_H_ROOT) {
u32 qid1 = TC_H_MAJ(portid);
@@ -1636,7 +1643,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
struct net_device *dev;
int t, s_t;
- if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
+ if (nlmsg_len(cb->nlh) < sizeof(*tcm))
return 0;
dev = dev_get_by_index(net, tcm->tcm_ifindex);
if (!dev)
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index cc37dd52ecf9..ef53ab8d0aae 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -80,7 +80,7 @@ struct choke_sched_data {
/* deliver a random number between 0 and N - 1 */
static u32 random_N(unsigned int N)
{
- return reciprocal_divide(random32(), N);
+ return reciprocal_divide(prandom_u32(), N);
}
/* number of elements in queue including holes */
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index eac7e0ee23c1..20224086cc28 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -898,14 +898,16 @@ void dev_shutdown(struct net_device *dev)
WARN_ON(timer_pending(&dev->watchdog_timer));
}
-void psched_ratecfg_precompute(struct psched_ratecfg *r, u32 rate)
+void psched_ratecfg_precompute(struct psched_ratecfg *r,
+ const struct tc_ratespec *conf)
{
u64 factor;
u64 mult;
int shift;
- r->rate_bps = (u64)rate << 3;
- r->shift = 0;
+ memset(r, 0, sizeof(*r));
+ r->overhead = conf->overhead;
+ r->rate_bps = (u64)conf->rate << 3;
r->mult = 1;
/*
* Calibrate mult, shift so that token counting is accurate
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 571f1d211f4d..adaedd79389c 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -109,7 +109,7 @@ struct htb_class {
} un;
struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
struct rb_node pq_node; /* node for event queue */
- psched_time_t pq_key;
+ s64 pq_key;
int prio_activity; /* for which prios are we active */
enum htb_cmode cmode; /* current mode of the class */
@@ -121,10 +121,10 @@ struct htb_class {
/* token bucket parameters */
struct psched_ratecfg rate;
struct psched_ratecfg ceil;
- s64 buffer, cbuffer; /* token bucket depth/rate */
- psched_tdiff_t mbuffer; /* max wait time */
- s64 tokens, ctokens; /* current number of tokens */
- psched_time_t t_c; /* checkpoint time */
+ s64 buffer, cbuffer; /* token bucket depth/rate */
+ s64 mbuffer; /* max wait time */
+ s64 tokens, ctokens; /* current number of tokens */
+ s64 t_c; /* checkpoint time */
};
struct htb_sched {
@@ -141,15 +141,15 @@ struct htb_sched {
struct rb_root wait_pq[TC_HTB_MAXDEPTH];
/* time of nearest event per level (row) */
- psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
+ s64 near_ev_cache[TC_HTB_MAXDEPTH];
int defcls; /* class where unclassified flows go to */
/* filters for qdisc itself */
struct tcf_proto *filter_list;
- int rate2quantum; /* quant = rate / rate2quantum */
- psched_time_t now; /* cached dequeue time */
+ int rate2quantum; /* quant = rate / rate2quantum */
+ s64 now; /* cached dequeue time */
struct qdisc_watchdog watchdog;
/* non shaped skbs; let them go directly thru */
@@ -664,8 +664,8 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
* next pending event (0 for no event in pq, q->now for too many events).
* Note: Applied are events whose have cl->pq_key <= q->now.
*/
-static psched_time_t htb_do_events(struct htb_sched *q, int level,
- unsigned long start)
+static s64 htb_do_events(struct htb_sched *q, int level,
+ unsigned long start)
{
/* don't run for longer than 2 jiffies; 2 is used instead of
* 1 to simplify things when jiffy is going to be incremented
@@ -857,7 +857,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
struct sk_buff *skb;
struct htb_sched *q = qdisc_priv(sch);
int level;
- psched_time_t next_event;
+ s64 next_event;
unsigned long start_at;
/* try to dequeue direct packets as high prio (!) to minimize cpu work */
@@ -880,7 +880,7 @@ ok:
for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
/* common case optimization - skip event handler quickly */
int m;
- psched_time_t event;
+ s64 event;
if (q->now >= q->near_ev_cache[level]) {
event = htb_do_events(q, level, start_at);
@@ -981,6 +981,7 @@ static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
[TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) },
[TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
[TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
+ [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
};
static void htb_work_func(struct work_struct *work)
@@ -994,7 +995,7 @@ static void htb_work_func(struct work_struct *work)
static int htb_init(struct Qdisc *sch, struct nlattr *opt)
{
struct htb_sched *q = qdisc_priv(sch);
- struct nlattr *tb[TCA_HTB_INIT + 1];
+ struct nlattr *tb[TCA_HTB_MAX + 1];
struct tc_htb_glob *gopt;
int err;
int i;
@@ -1002,20 +1003,16 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy);
+ err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
if (err < 0)
return err;
- if (tb[TCA_HTB_INIT] == NULL) {
- pr_err("HTB: hey probably you have bad tc tool ?\n");
+ if (!tb[TCA_HTB_INIT])
return -EINVAL;
- }
+
gopt = nla_data(tb[TCA_HTB_INIT]);
- if (gopt->version != HTB_VER >> 16) {
- pr_err("HTB: need tc/htb version %d (minor is %d), you have %d\n",
- HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
+ if (gopt->version != HTB_VER >> 16)
return -EINVAL;
- }
err = qdisc_class_hash_init(&q->clhash);
if (err < 0)
@@ -1027,10 +1024,13 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
INIT_WORK(&q->work, htb_work_func);
skb_queue_head_init(&q->direct_queue);
- q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
- if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
- q->direct_qlen = 2;
-
+ if (tb[TCA_HTB_DIRECT_QLEN])
+ q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
+ else {
+ q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
+ if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
+ q->direct_qlen = 2;
+ }
if ((q->rate2quantum = gopt->rate2quantum) < 1)
q->rate2quantum = 1;
q->defcls = gopt->defcls;
@@ -1056,7 +1056,8 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
nest = nla_nest_start(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
- if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt))
+ if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
+ nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
goto nla_put_failure;
nla_nest_end(skb, nest);
@@ -1089,9 +1090,9 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
memset(&opt, 0, sizeof(opt));
- opt.rate.rate = psched_ratecfg_getrate(&cl->rate);
+ psched_ratecfg_getrate(&opt.rate, &cl->rate);
opt.buffer = PSCHED_NS2TICKS(cl->buffer);
- opt.ceil.rate = psched_ratecfg_getrate(&cl->ceil);
+ psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
opt.quantum = cl->quantum;
opt.prio = cl->prio;
@@ -1116,8 +1117,8 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
if (!cl->level && cl->un.leaf.q)
cl->qstats.qlen = cl->un.leaf.q->q.qlen;
- cl->xstats.tokens = cl->tokens;
- cl->xstats.ctokens = cl->ctokens;
+ cl->xstats.tokens = PSCHED_NS2TICKS(cl->tokens);
+ cl->xstats.ctokens = PSCHED_NS2TICKS(cl->ctokens);
if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
@@ -1199,7 +1200,7 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
parent->tokens = parent->buffer;
parent->ctokens = parent->cbuffer;
- parent->t_c = psched_get_time();
+ parent->t_c = ktime_to_ns(ktime_get());
parent->cmode = HTB_CAN_SEND;
}
@@ -1311,7 +1312,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl = (struct htb_class *)*arg, *parent;
struct nlattr *opt = tca[TCA_OPTIONS];
- struct nlattr *tb[__TCA_HTB_MAX];
+ struct nlattr *tb[TCA_HTB_MAX + 1];
struct tc_htb_opt *hopt;
/* extract all subattrs from opt attr */
@@ -1416,8 +1417,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
/* set class to be in HTB_CAN_SEND state */
cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
- cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC; /* 1min */
- cl->t_c = psched_get_time();
+ cl->mbuffer = 60ULL * NSEC_PER_SEC; /* 1min */
+ cl->t_c = ktime_to_ns(ktime_get());
cl->cmode = HTB_CAN_SEND;
/* attach to the hash list and parent's family */
@@ -1458,8 +1459,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
cl->prio = TC_HTB_NUMPRIO - 1;
}
- psched_ratecfg_precompute(&cl->rate, hopt->rate.rate);
- psched_ratecfg_precompute(&cl->ceil, hopt->ceil.rate);
+ psched_ratecfg_precompute(&cl->rate, &hopt->rate);
+ psched_ratecfg_precompute(&cl->ceil, &hopt->ceil);
cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
cl->cbuffer = PSCHED_TICKS2NS(hopt->buffer);
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index c8388f3c3426..e478d316602b 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -298,9 +298,9 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
q->tokens = q->buffer;
q->ptokens = q->mtu;
- psched_ratecfg_precompute(&q->rate, rtab->rate.rate);
+ psched_ratecfg_precompute(&q->rate, &rtab->rate);
if (ptab) {
- psched_ratecfg_precompute(&q->peak, ptab->rate.rate);
+ psched_ratecfg_precompute(&q->peak, &ptab->rate);
q->peak_present = true;
} else {
q->peak_present = false;
@@ -350,9 +350,9 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
goto nla_put_failure;
opt.limit = q->limit;
- opt.rate.rate = psched_ratecfg_getrate(&q->rate);
+ psched_ratecfg_getrate(&opt.rate, &q->rate);
if (q->peak_present)
- opt.peakrate.rate = psched_ratecfg_getrate(&q->peak);
+ psched_ratecfg_getrate(&opt.peakrate, &q->peak);
else
memset(&opt.peakrate, 0, sizeof(opt.peakrate));
opt.mtu = PSCHED_NS2TICKS(q->mtu);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index d2709e2b7be6..91cfd8f94a19 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -66,13 +66,6 @@ static void sctp_assoc_bh_rcv(struct work_struct *work);
static void sctp_assoc_free_asconf_acks(struct sctp_association *asoc);
static void sctp_assoc_free_asconf_queue(struct sctp_association *asoc);
-/* Keep track of the new idr low so that we don't re-use association id
- * numbers too fast. It is protected by they idr spin lock is in the
- * range of 1 - INT_MAX.
- */
-static u32 idr_low = 1;
-
-
/* 1st Level Abstractions. */
/* Initialize a new association from provided memory. */
@@ -104,8 +97,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
/* Initialize the object handling fields. */
atomic_set(&asoc->base.refcnt, 1);
- asoc->base.dead = 0;
- asoc->base.malloced = 0;
+ asoc->base.dead = false;
/* Initialize the bind addr area. */
sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port);
@@ -371,7 +363,6 @@ struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep,
if (!sctp_association_init(asoc, ep, sk, scope, gfp))
goto fail_init;
- asoc->base.malloced = 1;
SCTP_DBG_OBJCNT_INC(assoc);
SCTP_DEBUG_PRINTK("Created asoc %p\n", asoc);
@@ -409,7 +400,7 @@ void sctp_association_free(struct sctp_association *asoc)
/* Mark as dead, so other users can know this structure is
* going away.
*/
- asoc->base.dead = 1;
+ asoc->base.dead = true;
/* Dispose of any data lying around in the outqueue. */
sctp_outq_free(&asoc->outqueue);
@@ -484,10 +475,8 @@ static void sctp_association_destroy(struct sctp_association *asoc)
WARN_ON(atomic_read(&asoc->rmem_alloc));
- if (asoc->base.malloced) {
- kfree(asoc);
- SCTP_DBG_OBJCNT_DEC(assoc);
- }
+ kfree(asoc);
+ SCTP_DBG_OBJCNT_DEC(assoc);
}
/* Change the primary destination address for the peer. */
@@ -1601,13 +1590,8 @@ int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp)
if (preload)
idr_preload(gfp);
spin_lock_bh(&sctp_assocs_id_lock);
- /* 0 is not a valid id, idr_low is always >= 1 */
- ret = idr_alloc(&sctp_assocs_id, asoc, idr_low, 0, GFP_NOWAIT);
- if (ret >= 0) {
- idr_low = ret + 1;
- if (idr_low == INT_MAX)
- idr_low = 1;
- }
+ /* 0 is not a valid assoc_id, must be >= 1 */
+ ret = idr_alloc_cyclic(&sctp_assocs_id, asoc, 1, 0, GFP_NOWAIT);
spin_unlock_bh(&sctp_assocs_id_lock);
if (preload)
idr_preload_end();
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index d886b3bf84f5..41145fe31813 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -131,8 +131,6 @@ int sctp_bind_addr_dup(struct sctp_bind_addr *dest,
*/
void sctp_bind_addr_init(struct sctp_bind_addr *bp, __u16 port)
{
- bp->malloced = 0;
-
INIT_LIST_HEAD(&bp->address_list);
bp->port = port;
}
@@ -155,11 +153,6 @@ void sctp_bind_addr_free(struct sctp_bind_addr *bp)
{
/* Empty the bind address list. */
sctp_bind_addr_clean(bp);
-
- if (bp->malloced) {
- kfree(bp);
- SCTP_DBG_OBJCNT_DEC(bind_addr);
- }
}
/* Add an address to the bind address list in the SCTP_bind_addr structure. */
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 12ed45dbe75d..5fbd7bc6bb11 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -121,8 +121,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
/* Initialize the basic object fields. */
atomic_set(&ep->base.refcnt, 1);
- ep->base.dead = 0;
- ep->base.malloced = 1;
+ ep->base.dead = false;
/* Create an input queue. */
sctp_inq_init(&ep->base.inqueue);
@@ -198,7 +197,7 @@ struct sctp_endpoint *sctp_endpoint_new(struct sock *sk, gfp_t gfp)
goto fail;
if (!sctp_endpoint_init(ep, sk, gfp))
goto fail_init;
- ep->base.malloced = 1;
+
SCTP_DBG_OBJCNT_INC(ep);
return ep;
@@ -234,7 +233,7 @@ void sctp_endpoint_add_asoc(struct sctp_endpoint *ep,
*/
void sctp_endpoint_free(struct sctp_endpoint *ep)
{
- ep->base.dead = 1;
+ ep->base.dead = true;
ep->base.sk->sk_state = SCTP_SS_CLOSED;
@@ -279,11 +278,8 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
if (ep->base.sk)
sock_put(ep->base.sk);
- /* Finally, free up our memory. */
- if (ep->base.malloced) {
- kfree(ep);
- SCTP_DBG_OBJCNT_DEC(ep);
- }
+ kfree(ep);
+ SCTP_DBG_OBJCNT_DEC(ep);
}
/* Hold a reference to an endpoint. */
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index 2d5ad280de38..3221d073448c 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -58,8 +58,6 @@ void sctp_inq_init(struct sctp_inq *queue)
/* Create a task for delivering data. */
INIT_WORK(&queue->immediate, NULL);
-
- queue->malloced = 0;
}
/* Release the memory associated with an SCTP inqueue. */
@@ -80,11 +78,6 @@ void sctp_inq_free(struct sctp_inq *queue)
sctp_chunk_free(queue->in_progress);
queue->in_progress = NULL;
}
-
- if (queue->malloced) {
- /* Dump the master memory segment. */
- kfree(queue);
- }
}
/* Put a new packet in an SCTP inqueue.
diff --git a/net/sctp/output.c b/net/sctp/output.c
index f5200a2ad852..bbef4a7a9b56 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -136,7 +136,7 @@ struct sctp_packet *sctp_packet_init(struct sctp_packet *packet,
packet->overhead = overhead;
sctp_packet_reset(packet);
packet->vtag = 0;
- packet->malloced = 0;
+
return packet;
}
@@ -151,9 +151,6 @@ void sctp_packet_free(struct sctp_packet *packet)
list_del_init(&chunk->list);
sctp_chunk_free(chunk);
}
-
- if (packet->malloced)
- kfree(packet);
}
/* This routine tries to append the chunk to the offered packet. If adding
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 01dca753db16..be35e2dbcc9a 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -206,6 +206,8 @@ static inline int sctp_cacc_skip(struct sctp_transport *primary,
*/
void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
{
+ memset(q, 0, sizeof(struct sctp_outq));
+
q->asoc = asoc;
INIT_LIST_HEAD(&q->out_chunk_list);
INIT_LIST_HEAD(&q->control_chunk_list);
@@ -213,13 +215,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
INIT_LIST_HEAD(&q->sacked);
INIT_LIST_HEAD(&q->abandoned);
- q->fast_rtx = 0;
- q->outstanding_bytes = 0;
q->empty = 1;
- q->cork = 0;
-
- q->malloced = 0;
- q->out_qlen = 0;
}
/* Free the outqueue structure and any related pending chunks.
@@ -295,10 +291,6 @@ void sctp_outq_free(struct sctp_outq *q)
{
/* Throw away leftover chunks. */
__sctp_outq_teardown(q);
-
- /* If we were kmalloc()'d, free the memory. */
- if (q->malloced)
- kfree(q);
}
/* Put a new chunk in an sctp_outq. */
@@ -707,11 +699,10 @@ redo:
/* Cork the outqueue so queued chunks are really queued. */
int sctp_outq_uncork(struct sctp_outq *q)
{
- int error = 0;
if (q->cork)
q->cork = 0;
- error = sctp_outq_flush(q, 0);
- return error;
+
+ return sctp_outq_flush(q, 0);
}
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
index ad0dba870341..e62c22535be4 100644
--- a/net/sctp/probe.c
+++ b/net/sctp/probe.c
@@ -63,7 +63,7 @@ static struct {
struct timespec tstart;
} sctpw;
-static void printl(const char *fmt, ...)
+static __printf(1, 2) void printl(const char *fmt, ...)
{
va_list args;
int len;
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index ab3bba8cb0a8..4e45ee35d0db 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -295,7 +295,8 @@ static void * sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos)
seq_printf(seq, " ASSOC SOCK STY SST ST HBKT "
"ASSOC-ID TX_QUEUE RX_QUEUE UID INODE LPORT "
"RPORT LADDRS <-> RADDRS "
- "HBINT INS OUTS MAXRT T1X T2X RTXC\n");
+ "HBINT INS OUTS MAXRT T1X T2X RTXC "
+ "wmema wmemq sndbuf rcvbuf\n");
return (void *)pos;
}
@@ -349,11 +350,16 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
sctp_seq_dump_local_addrs(seq, epb);
seq_printf(seq, "<-> ");
sctp_seq_dump_remote_addrs(seq, assoc);
- seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d ",
+ seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d "
+ "%8d %8d %8d %8d",
assoc->hbinterval, assoc->c.sinit_max_instreams,
assoc->c.sinit_num_ostreams, assoc->max_retrans,
assoc->init_retries, assoc->shutdown_retries,
- assoc->rtx_data_chunks);
+ assoc->rtx_data_chunks,
+ atomic_read(&sk->sk_wmem_alloc),
+ sk->sk_wmem_queued,
+ sk->sk_sndbuf,
+ sk->sk_rcvbuf);
seq_printf(seq, "\n");
}
read_unlock(&head->lock);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 1c2e46cb9191..eaee00c61139 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1403,7 +1403,7 @@ SCTP_STATIC __init int sctp_init(void)
/* Allocate and initialize the endpoint hash table. */
sctp_ep_hashsize = 64;
- sctp_ep_hashtable = (struct sctp_hashbucket *)
+ sctp_ep_hashtable =
kmalloc(64 * sizeof(struct sctp_hashbucket), GFP_KERNEL);
if (!sctp_ep_hashtable) {
pr_err("Failed endpoint_hash alloc\n");
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index b9070736b8d9..6abb1caf9836 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1119,9 +1119,10 @@ static int __sctp_connect(struct sock* sk,
/* Make sure the destination port is correctly set
* in all addresses.
*/
- if (asoc && asoc->peer.port && asoc->peer.port != port)
+ if (asoc && asoc->peer.port && asoc->peer.port != port) {
+ err = -EINVAL;
goto out_free;
-
+ }
/* Check if there already is a matching association on the
* endpoint (other than the one created here).
@@ -4002,6 +4003,12 @@ SCTP_STATIC void sctp_destroy_sock(struct sock *sk)
/* Release our hold on the endpoint. */
sp = sctp_sk(sk);
+ /* This could happen during socket init, thus we bail out
+ * early, since the rest of the below is not setup either.
+ */
+ if (sp->ep == NULL)
+ return;
+
if (sp->do_auto_asconf) {
sp->do_auto_asconf = 0;
list_del(&sp->auto_asconf_list);
@@ -6185,7 +6192,8 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
/* Is there any exceptional events? */
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR;
+ mask |= POLLERR |
+ sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0;
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= POLLRDHUP | POLLIN | POLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
diff --git a/net/sctp/ssnmap.c b/net/sctp/ssnmap.c
index 825ea94415b3..da8603523808 100644
--- a/net/sctp/ssnmap.c
+++ b/net/sctp/ssnmap.c
@@ -74,7 +74,6 @@ struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out,
if (!sctp_ssnmap_init(retval, in, out))
goto fail_map;
- retval->malloced = 1;
SCTP_DBG_OBJCNT_INC(ssnmap);
return retval;
@@ -118,14 +117,16 @@ void sctp_ssnmap_clear(struct sctp_ssnmap *map)
/* Dispose of a ssnmap. */
void sctp_ssnmap_free(struct sctp_ssnmap *map)
{
- if (map && map->malloced) {
- int size;
-
- size = sctp_ssnmap_size(map->in.len, map->out.len);
- if (size <= KMALLOC_MAX_SIZE)
- kfree(map);
- else
- free_pages((unsigned long)map, get_order(size));
- SCTP_DBG_OBJCNT_DEC(ssnmap);
- }
+ int size;
+
+ if (unlikely(!map))
+ return;
+
+ size = sctp_ssnmap_size(map->in.len, map->out.len);
+ if (size <= KMALLOC_MAX_SIZE)
+ kfree(map);
+ else
+ free_pages((unsigned long)map, get_order(size));
+
+ SCTP_DBG_OBJCNT_DEC(ssnmap);
}
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index fafd2a461ba0..098f1d5f769e 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -123,7 +123,6 @@ struct sctp_transport *sctp_transport_new(struct net *net,
if (!sctp_transport_init(net, transport, addr, gfp))
goto fail_init;
- transport->malloced = 1;
SCTP_DBG_OBJCNT_INC(transport);
return transport;
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 0fd5b3d2df03..04e3d470f877 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -68,7 +68,6 @@ struct sctp_ulpq *sctp_ulpq_init(struct sctp_ulpq *ulpq,
skb_queue_head_init(&ulpq->reasm);
skb_queue_head_init(&ulpq->lobby);
ulpq->pd_mode = 0;
- ulpq->malloced = 0;
return ulpq;
}
@@ -96,8 +95,6 @@ void sctp_ulpq_flush(struct sctp_ulpq *ulpq)
void sctp_ulpq_free(struct sctp_ulpq *ulpq)
{
sctp_ulpq_flush(ulpq);
- if (ulpq->malloced)
- kfree(ulpq);
}
/* Process an incoming DATA chunk. */
diff --git a/net/socket.c b/net/socket.c
index 88f759adf3af..4ca1526db756 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -600,7 +600,7 @@ void sock_release(struct socket *sock)
}
EXPORT_SYMBOL(sock_release);
-int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
+void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
{
*tx_flags = 0;
if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
@@ -609,7 +609,6 @@ int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
*tx_flags |= SKBTX_SW_TSTAMP;
if (sock_flag(sk, SOCK_WIFI_STATUS))
*tx_flags |= SKBTX_WIFI_STATUS;
- return 0;
}
EXPORT_SYMBOL(sock_tx_timestamp);
@@ -682,16 +681,6 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
}
EXPORT_SYMBOL(kernel_sendmsg);
-static int ktime2ts(ktime_t kt, struct timespec *ts)
-{
- if (kt.tv64) {
- *ts = ktime_to_timespec(kt);
- return 1;
- } else {
- return 0;
- }
-}
-
/*
* called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
*/
@@ -724,17 +713,15 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
memset(ts, 0, sizeof(ts));
- if (skb->tstamp.tv64 &&
- sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) {
- skb_get_timestampns(skb, ts + 0);
+ if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) &&
+ ktime_to_timespec_cond(skb->tstamp, ts + 0))
empty = 0;
- }
if (shhwtstamps) {
if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
- ktime2ts(shhwtstamps->syststamp, ts + 1))
+ ktime_to_timespec_cond(shhwtstamps->syststamp, ts + 1))
empty = 0;
if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
- ktime2ts(shhwtstamps->hwtstamp, ts + 2))
+ ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2))
empty = 0;
}
if (!empty)
@@ -1173,15 +1160,6 @@ static int sock_mmap(struct file *file, struct vm_area_struct *vma)
static int sock_close(struct inode *inode, struct file *filp)
{
- /*
- * It was possible the inode is NULL we were
- * closing an unfinished socket.
- */
-
- if (!inode) {
- printk(KERN_DEBUG "sock_close: NULL inode\n");
- return 0;
- }
sock_release(SOCKET_I(inode));
return 0;
}
@@ -1978,7 +1956,7 @@ struct used_address {
unsigned int name_len;
};
-static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
+static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
struct msghdr *msg_sys, unsigned int flags,
struct used_address *used_address)
{
@@ -2093,22 +2071,30 @@ out:
* BSD sendmsg interface
*/
-SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags)
+long __sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
{
int fput_needed, err;
struct msghdr msg_sys;
- struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
+ struct socket *sock;
+ sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
goto out;
- err = __sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
+ err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
fput_light(sock->file, fput_needed);
out:
return err;
}
+SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags)
+{
+ if (flags & MSG_CMSG_COMPAT)
+ return -EINVAL;
+ return __sys_sendmsg(fd, msg, flags);
+}
+
/*
* Linux sendmmsg interface
*/
@@ -2139,15 +2125,16 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
while (datagrams < vlen) {
if (MSG_CMSG_COMPAT & flags) {
- err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
- &msg_sys, flags, &used_address);
+ err = ___sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
+ &msg_sys, flags, &used_address);
if (err < 0)
break;
err = __put_user(err, &compat_entry->msg_len);
++compat_entry;
} else {
- err = __sys_sendmsg(sock, (struct msghdr __user *)entry,
- &msg_sys, flags, &used_address);
+ err = ___sys_sendmsg(sock,
+ (struct msghdr __user *)entry,
+ &msg_sys, flags, &used_address);
if (err < 0)
break;
err = put_user(err, &entry->msg_len);
@@ -2171,10 +2158,12 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
unsigned int, vlen, unsigned int, flags)
{
+ if (flags & MSG_CMSG_COMPAT)
+ return -EINVAL;
return __sys_sendmmsg(fd, mmsg, vlen, flags);
}
-static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
+static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
struct msghdr *msg_sys, unsigned int flags, int nosec)
{
struct compat_msghdr __user *msg_compat =
@@ -2266,23 +2255,31 @@ out:
* BSD recvmsg interface
*/
-SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
- unsigned int, flags)
+long __sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags)
{
int fput_needed, err;
struct msghdr msg_sys;
- struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
+ struct socket *sock;
+ sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
goto out;
- err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0);
+ err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
fput_light(sock->file, fput_needed);
out:
return err;
}
+SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
+ unsigned int, flags)
+{
+ if (flags & MSG_CMSG_COMPAT)
+ return -EINVAL;
+ return __sys_recvmsg(fd, msg, flags);
+}
+
/*
* Linux recvmmsg interface
*/
@@ -2320,17 +2317,18 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
* No need to ask LSM for more than the first datagram.
*/
if (MSG_CMSG_COMPAT & flags) {
- err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
- &msg_sys, flags & ~MSG_WAITFORONE,
- datagrams);
+ err = ___sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
+ &msg_sys, flags & ~MSG_WAITFORONE,
+ datagrams);
if (err < 0)
break;
err = __put_user(err, &compat_entry->msg_len);
++compat_entry;
} else {
- err = __sys_recvmsg(sock, (struct msghdr __user *)entry,
- &msg_sys, flags & ~MSG_WAITFORONE,
- datagrams);
+ err = ___sys_recvmsg(sock,
+ (struct msghdr __user *)entry,
+ &msg_sys, flags & ~MSG_WAITFORONE,
+ datagrams);
if (err < 0)
break;
err = put_user(err, &entry->msg_len);
@@ -2397,6 +2395,9 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
int datagrams;
struct timespec timeout_sys;
+ if (flags & MSG_CMSG_COMPAT)
+ return -EINVAL;
+
if (!timeout)
return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
@@ -2434,7 +2435,7 @@ static const unsigned char nargs[21] = {
SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
{
- unsigned long a[6];
+ unsigned long a[AUDITSC_ARGS];
unsigned long a0, a1;
int err;
unsigned int len;
@@ -2450,7 +2451,9 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
if (copy_from_user(a, args, len))
return -EFAULT;
- audit_socketcall(nargs[call] / sizeof(unsigned long), a);
+ err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
+ if (err)
+ return err;
a0 = a[0];
a1 = a[1];
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 516fe2caac2c..241b54f30204 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -3,6 +3,7 @@ config SUNRPC
config SUNRPC_GSS
tristate
+ select OID_REGISTRY
config SUNRPC_BACKCHANNEL
bool
@@ -24,7 +25,6 @@ config SUNRPC_XPRT_RDMA
config SUNRPC_SWAP
bool
depends on SUNRPC
- select NETVM
config RPCSEC_GSS_KRB5
tristate "Secure RPC: Kerberos V mechanism"
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index f5294047df77..ed2fdd210c0b 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -82,7 +82,7 @@ MODULE_PARM_DESC(auth_hashtable_size, "RPC credential cache hashtable size");
static u32
pseudoflavor_to_flavor(u32 flavor) {
- if (flavor >= RPC_AUTH_MAXFLAVOR)
+ if (flavor > RPC_AUTH_MAXFLAVOR)
return RPC_AUTH_GSS;
return flavor;
}
@@ -124,6 +124,79 @@ rpcauth_unregister(const struct rpc_authops *ops)
EXPORT_SYMBOL_GPL(rpcauth_unregister);
/**
+ * rpcauth_get_pseudoflavor - check if security flavor is supported
+ * @flavor: a security flavor
+ * @info: a GSS mech OID, quality of protection, and service value
+ *
+ * Verifies that an appropriate kernel module is available or already loaded.
+ * Returns an equivalent pseudoflavor, or RPC_AUTH_MAXFLAVOR if "flavor" is
+ * not supported locally.
+ */
+rpc_authflavor_t
+rpcauth_get_pseudoflavor(rpc_authflavor_t flavor, struct rpcsec_gss_info *info)
+{
+ const struct rpc_authops *ops;
+ rpc_authflavor_t pseudoflavor;
+
+ ops = auth_flavors[flavor];
+ if (ops == NULL)
+ request_module("rpc-auth-%u", flavor);
+ spin_lock(&rpc_authflavor_lock);
+ ops = auth_flavors[flavor];
+ if (ops == NULL || !try_module_get(ops->owner)) {
+ spin_unlock(&rpc_authflavor_lock);
+ return RPC_AUTH_MAXFLAVOR;
+ }
+ spin_unlock(&rpc_authflavor_lock);
+
+ pseudoflavor = flavor;
+ if (ops->info2flavor != NULL)
+ pseudoflavor = ops->info2flavor(info);
+
+ module_put(ops->owner);
+ return pseudoflavor;
+}
+EXPORT_SYMBOL_GPL(rpcauth_get_pseudoflavor);
+
+/**
+ * rpcauth_get_gssinfo - find GSS tuple matching a GSS pseudoflavor
+ * @pseudoflavor: GSS pseudoflavor to match
+ * @info: rpcsec_gss_info structure to fill in
+ *
+ * Returns zero and fills in "info" if pseudoflavor matches a
+ * supported mechanism.
+ */
+int
+rpcauth_get_gssinfo(rpc_authflavor_t pseudoflavor, struct rpcsec_gss_info *info)
+{
+ rpc_authflavor_t flavor = pseudoflavor_to_flavor(pseudoflavor);
+ const struct rpc_authops *ops;
+ int result;
+
+ if (flavor >= RPC_AUTH_MAXFLAVOR)
+ return -EINVAL;
+
+ ops = auth_flavors[flavor];
+ if (ops == NULL)
+ request_module("rpc-auth-%u", flavor);
+ spin_lock(&rpc_authflavor_lock);
+ ops = auth_flavors[flavor];
+ if (ops == NULL || !try_module_get(ops->owner)) {
+ spin_unlock(&rpc_authflavor_lock);
+ return -ENOENT;
+ }
+ spin_unlock(&rpc_authflavor_lock);
+
+ result = -ENOENT;
+ if (ops->flavor2info != NULL)
+ result = ops->flavor2info(pseudoflavor, info);
+
+ module_put(ops->owner);
+ return result;
+}
+EXPORT_SYMBOL_GPL(rpcauth_get_gssinfo);
+
+/**
* rpcauth_list_flavors - discover registered flavors and pseudoflavors
* @array: array to fill in
* @size: size of "array"
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 9e4cb59ef9f0..14e9e53e63d5 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -5,7 +5,8 @@
obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o
auth_rpcgss-y := auth_gss.o gss_generic_token.o \
- gss_mech_switch.o svcauth_gss.o
+ gss_mech_switch.o svcauth_gss.o \
+ gss_rpc_upcall.o gss_rpc_xdr.o
obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 5257d2982ba5..fc2f78d6a9b4 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -52,6 +52,8 @@
#include <linux/sunrpc/gss_api.h>
#include <asm/uaccess.h>
+#include "../netns.h"
+
static const struct rpc_authops authgss_ops;
static const struct rpc_credops gss_credops;
@@ -85,8 +87,6 @@ struct gss_auth {
};
/* pipe_version >= 0 if and only if someone has a pipe open. */
-static int pipe_version = -1;
-static atomic_t pipe_users = ATOMIC_INIT(0);
static DEFINE_SPINLOCK(pipe_version_lock);
static struct rpc_wait_queue pipe_version_rpc_waitqueue;
static DECLARE_WAIT_QUEUE_HEAD(pipe_version_waitqueue);
@@ -238,7 +238,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
p = ERR_PTR(-EFAULT);
goto err;
}
- ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, GFP_NOFS);
+ ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, NULL, GFP_NOFS);
if (ret < 0) {
p = ERR_PTR(ret);
goto err;
@@ -266,24 +266,27 @@ struct gss_upcall_msg {
char databuf[UPCALL_BUF_LEN];
};
-static int get_pipe_version(void)
+static int get_pipe_version(struct net *net)
{
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
int ret;
spin_lock(&pipe_version_lock);
- if (pipe_version >= 0) {
- atomic_inc(&pipe_users);
- ret = pipe_version;
+ if (sn->pipe_version >= 0) {
+ atomic_inc(&sn->pipe_users);
+ ret = sn->pipe_version;
} else
ret = -EAGAIN;
spin_unlock(&pipe_version_lock);
return ret;
}
-static void put_pipe_version(void)
+static void put_pipe_version(struct net *net)
{
- if (atomic_dec_and_lock(&pipe_users, &pipe_version_lock)) {
- pipe_version = -1;
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+ if (atomic_dec_and_lock(&sn->pipe_users, &pipe_version_lock)) {
+ sn->pipe_version = -1;
spin_unlock(&pipe_version_lock);
}
}
@@ -291,9 +294,10 @@ static void put_pipe_version(void)
static void
gss_release_msg(struct gss_upcall_msg *gss_msg)
{
+ struct net *net = rpc_net_ns(gss_msg->auth->client);
if (!atomic_dec_and_test(&gss_msg->count))
return;
- put_pipe_version();
+ put_pipe_version(net);
BUG_ON(!list_empty(&gss_msg->list));
if (gss_msg->ctx != NULL)
gss_put_ctx(gss_msg->ctx);
@@ -439,7 +443,10 @@ static void gss_encode_msg(struct gss_upcall_msg *gss_msg,
struct rpc_clnt *clnt,
const char *service_name)
{
- if (pipe_version == 0)
+ struct net *net = rpc_net_ns(clnt);
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+ if (sn->pipe_version == 0)
gss_encode_v0_msg(gss_msg);
else /* pipe_version == 1 */
gss_encode_v1_msg(gss_msg, clnt, service_name);
@@ -455,7 +462,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt,
gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS);
if (gss_msg == NULL)
return ERR_PTR(-ENOMEM);
- vers = get_pipe_version();
+ vers = get_pipe_version(rpc_net_ns(clnt));
if (vers < 0) {
kfree(gss_msg);
return ERR_PTR(vers);
@@ -559,24 +566,34 @@ out:
static inline int
gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
{
+ struct net *net = rpc_net_ns(gss_auth->client);
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
struct rpc_pipe *pipe;
struct rpc_cred *cred = &gss_cred->gc_base;
struct gss_upcall_msg *gss_msg;
+ unsigned long timeout;
DEFINE_WAIT(wait);
- int err = 0;
+ int err;
dprintk("RPC: %s for uid %u\n",
__func__, from_kuid(&init_user_ns, cred->cr_uid));
retry:
+ err = 0;
+ /* Default timeout is 15s unless we know that gssd is not running */
+ timeout = 15 * HZ;
+ if (!sn->gssd_running)
+ timeout = HZ >> 2;
gss_msg = gss_setup_upcall(gss_auth->client, gss_auth, cred);
if (PTR_ERR(gss_msg) == -EAGAIN) {
err = wait_event_interruptible_timeout(pipe_version_waitqueue,
- pipe_version >= 0, 15*HZ);
- if (pipe_version < 0) {
+ sn->pipe_version >= 0, timeout);
+ if (sn->pipe_version < 0) {
+ if (err == 0)
+ sn->gssd_running = 0;
warn_gssd();
err = -EACCES;
}
- if (err)
+ if (err < 0)
goto out;
goto retry;
}
@@ -707,20 +724,22 @@ out:
static int gss_pipe_open(struct inode *inode, int new_version)
{
+ struct net *net = inode->i_sb->s_fs_info;
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
int ret = 0;
spin_lock(&pipe_version_lock);
- if (pipe_version < 0) {
+ if (sn->pipe_version < 0) {
/* First open of any gss pipe determines the version: */
- pipe_version = new_version;
+ sn->pipe_version = new_version;
rpc_wake_up(&pipe_version_rpc_waitqueue);
wake_up(&pipe_version_waitqueue);
- } else if (pipe_version != new_version) {
+ } else if (sn->pipe_version != new_version) {
/* Trying to open a pipe of a different version */
ret = -EBUSY;
goto out;
}
- atomic_inc(&pipe_users);
+ atomic_inc(&sn->pipe_users);
out:
spin_unlock(&pipe_version_lock);
return ret;
@@ -740,6 +759,7 @@ static int gss_pipe_open_v1(struct inode *inode)
static void
gss_pipe_release(struct inode *inode)
{
+ struct net *net = inode->i_sb->s_fs_info;
struct rpc_pipe *pipe = RPC_I(inode)->pipe;
struct gss_upcall_msg *gss_msg;
@@ -758,7 +778,7 @@ restart:
}
spin_unlock(&pipe->lock);
- put_pipe_version();
+ put_pipe_version(net);
}
static void
@@ -867,8 +887,7 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
err = -EINVAL;
gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor);
if (!gss_auth->mech) {
- printk(KERN_WARNING "%s: Pseudoflavor %d not found!\n",
- __func__, flavor);
+ dprintk("RPC: Pseudoflavor %d not found!\n", flavor);
goto err_free;
}
gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor);
@@ -1641,6 +1660,8 @@ static const struct rpc_authops authgss_ops = {
.pipes_create = gss_pipes_dentries_create,
.pipes_destroy = gss_pipes_dentries_destroy,
.list_pseudoflavors = gss_mech_list_pseudoflavors,
+ .info2flavor = gss_mech_info2flavor,
+ .flavor2info = gss_mech_flavor2info,
};
static const struct rpc_credops gss_credops = {
@@ -1733,6 +1754,7 @@ static void __exit exit_rpcsec_gss(void)
rcu_barrier(); /* Wait for completion of call_rcu()'s */
}
+MODULE_ALIAS("rpc-auth-6");
MODULE_LICENSE("GPL");
module_param_named(expired_cred_retry_delay,
gss_expired_cred_retry_delay,
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index d3611f11a8df..0d3c158ef8fa 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -679,6 +679,7 @@ out_err:
static int
gss_import_sec_context_kerberos(const void *p, size_t len,
struct gss_ctx *ctx_id,
+ time_t *endtime,
gfp_t gfp_mask)
{
const void *end = (const void *)((const char *)p + len);
@@ -694,9 +695,11 @@ gss_import_sec_context_kerberos(const void *p, size_t len,
else
ret = gss_import_v2_context(p, end, ctx, gfp_mask);
- if (ret == 0)
+ if (ret == 0) {
ctx_id->internal_ctx_id = ctx;
- else
+ if (endtime)
+ *endtime = ctx->endtime;
+ } else
kfree(ctx);
dprintk("RPC: %s: returning %d\n", __func__, ret);
@@ -729,16 +732,19 @@ static const struct gss_api_ops gss_kerberos_ops = {
static struct pf_desc gss_kerberos_pfs[] = {
[0] = {
.pseudoflavor = RPC_AUTH_GSS_KRB5,
+ .qop = GSS_C_QOP_DEFAULT,
.service = RPC_GSS_SVC_NONE,
.name = "krb5",
},
[1] = {
.pseudoflavor = RPC_AUTH_GSS_KRB5I,
+ .qop = GSS_C_QOP_DEFAULT,
.service = RPC_GSS_SVC_INTEGRITY,
.name = "krb5i",
},
[2] = {
.pseudoflavor = RPC_AUTH_GSS_KRB5P,
+ .qop = GSS_C_QOP_DEFAULT,
.service = RPC_GSS_SVC_PRIVACY,
.name = "krb5p",
},
@@ -750,11 +756,12 @@ MODULE_ALIAS("rpc-auth-gss-krb5p");
MODULE_ALIAS("rpc-auth-gss-390003");
MODULE_ALIAS("rpc-auth-gss-390004");
MODULE_ALIAS("rpc-auth-gss-390005");
+MODULE_ALIAS("rpc-auth-gss-1.2.840.113554.1.2.2");
static struct gss_api_mech gss_kerberos_mech = {
.gm_name = "krb5",
.gm_owner = THIS_MODULE,
- .gm_oid = {9, (void *)"\x2a\x86\x48\x86\xf7\x12\x01\x02\x02"},
+ .gm_oid = { 9, "\x2a\x86\x48\x86\xf7\x12\x01\x02\x02" },
.gm_ops = &gss_kerberos_ops,
.gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs),
.gm_pfs = gss_kerberos_pfs,
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 88edec929d73..1da52d1406fc 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -130,8 +130,8 @@ gss_krb5_make_confounder(char *p, u32 conflen)
/* initialize to random value */
if (i == 0) {
- i = random32();
- i = (i << 32) | random32();
+ i = prandom_u32();
+ i = (i << 32) | prandom_u32();
}
switch (conflen) {
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index f0f4eee63a35..defa9d33925c 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -36,6 +36,7 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/module.h>
+#include <linux/oid_registry.h>
#include <linux/sunrpc/msg_prot.h>
#include <linux/sunrpc/gss_asn1.h>
#include <linux/sunrpc/auth_gss.h>
@@ -102,8 +103,13 @@ out:
return status;
}
-int
-gss_mech_register(struct gss_api_mech *gm)
+/**
+ * gss_mech_register - register a GSS mechanism
+ * @gm: GSS mechanism handle
+ *
+ * Returns zero if successful, or a negative errno.
+ */
+int gss_mech_register(struct gss_api_mech *gm)
{
int status;
@@ -116,11 +122,14 @@ gss_mech_register(struct gss_api_mech *gm)
dprintk("RPC: registered gss mechanism %s\n", gm->gm_name);
return 0;
}
-
EXPORT_SYMBOL_GPL(gss_mech_register);
-void
-gss_mech_unregister(struct gss_api_mech *gm)
+/**
+ * gss_mech_unregister - release a GSS mechanism
+ * @gm: GSS mechanism handle
+ *
+ */
+void gss_mech_unregister(struct gss_api_mech *gm)
{
spin_lock(&registered_mechs_lock);
list_del(&gm->gm_list);
@@ -128,18 +137,14 @@ gss_mech_unregister(struct gss_api_mech *gm)
dprintk("RPC: unregistered gss mechanism %s\n", gm->gm_name);
gss_mech_free(gm);
}
-
EXPORT_SYMBOL_GPL(gss_mech_unregister);
-struct gss_api_mech *
-gss_mech_get(struct gss_api_mech *gm)
+static struct gss_api_mech *gss_mech_get(struct gss_api_mech *gm)
{
__module_get(gm->gm_owner);
return gm;
}
-EXPORT_SYMBOL_GPL(gss_mech_get);
-
static struct gss_api_mech *
_gss_mech_get_by_name(const char *name)
{
@@ -169,12 +174,16 @@ struct gss_api_mech * gss_mech_get_by_name(const char *name)
}
return gm;
}
-EXPORT_SYMBOL_GPL(gss_mech_get_by_name);
-struct gss_api_mech *
-gss_mech_get_by_OID(struct xdr_netobj *obj)
+struct gss_api_mech *gss_mech_get_by_OID(struct rpcsec_gss_oid *obj)
{
struct gss_api_mech *pos, *gm = NULL;
+ char buf[32];
+
+ if (sprint_oid(obj->data, obj->len, buf, sizeof(buf)) < 0)
+ return NULL;
+ dprintk("RPC: %s(%s)\n", __func__, buf);
+ request_module("rpc-auth-gss-%s", buf);
spin_lock(&registered_mechs_lock);
list_for_each_entry(pos, &registered_mechs, gm_list) {
@@ -188,11 +197,8 @@ gss_mech_get_by_OID(struct xdr_netobj *obj)
}
spin_unlock(&registered_mechs_lock);
return gm;
-
}
-EXPORT_SYMBOL_GPL(gss_mech_get_by_OID);
-
static inline int
mech_supports_pseudoflavor(struct gss_api_mech *gm, u32 pseudoflavor)
{
@@ -237,8 +243,6 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
return gm;
}
-EXPORT_SYMBOL_GPL(gss_mech_get_by_pseudoflavor);
-
/**
* gss_mech_list_pseudoflavors - Discover registered GSS pseudoflavors
* @array: array to fill in
@@ -268,19 +272,82 @@ int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr, int size)
return i;
}
-u32
-gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service)
+/**
+ * gss_svc_to_pseudoflavor - map a GSS service number to a pseudoflavor
+ * @gm: GSS mechanism handle
+ * @qop: GSS quality-of-protection value
+ * @service: GSS service value
+ *
+ * Returns a matching security flavor, or RPC_AUTH_MAXFLAVOR if none is found.
+ */
+rpc_authflavor_t gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 qop,
+ u32 service)
{
int i;
for (i = 0; i < gm->gm_pf_num; i++) {
- if (gm->gm_pfs[i].service == service) {
+ if (gm->gm_pfs[i].qop == qop &&
+ gm->gm_pfs[i].service == service) {
return gm->gm_pfs[i].pseudoflavor;
}
}
- return RPC_AUTH_MAXFLAVOR; /* illegal value */
+ return RPC_AUTH_MAXFLAVOR;
+}
+
+/**
+ * gss_mech_info2flavor - look up a pseudoflavor given a GSS tuple
+ * @info: a GSS mech OID, quality of protection, and service value
+ *
+ * Returns a matching pseudoflavor, or RPC_AUTH_MAXFLAVOR if the tuple is
+ * not supported.
+ */
+rpc_authflavor_t gss_mech_info2flavor(struct rpcsec_gss_info *info)
+{
+ rpc_authflavor_t pseudoflavor;
+ struct gss_api_mech *gm;
+
+ gm = gss_mech_get_by_OID(&info->oid);
+ if (gm == NULL)
+ return RPC_AUTH_MAXFLAVOR;
+
+ pseudoflavor = gss_svc_to_pseudoflavor(gm, info->qop, info->service);
+
+ gss_mech_put(gm);
+ return pseudoflavor;
+}
+
+/**
+ * gss_mech_flavor2info - look up a GSS tuple for a given pseudoflavor
+ * @pseudoflavor: GSS pseudoflavor to match
+ * @info: rpcsec_gss_info structure to fill in
+ *
+ * Returns zero and fills in "info" if pseudoflavor matches a
+ * supported mechanism. Otherwise a negative errno is returned.
+ */
+int gss_mech_flavor2info(rpc_authflavor_t pseudoflavor,
+ struct rpcsec_gss_info *info)
+{
+ struct gss_api_mech *gm;
+ int i;
+
+ gm = gss_mech_get_by_pseudoflavor(pseudoflavor);
+ if (gm == NULL)
+ return -ENOENT;
+
+ for (i = 0; i < gm->gm_pf_num; i++) {
+ if (gm->gm_pfs[i].pseudoflavor == pseudoflavor) {
+ memcpy(info->oid.data, gm->gm_oid.data, gm->gm_oid.len);
+ info->oid.len = gm->gm_oid.len;
+ info->qop = gm->gm_pfs[i].qop;
+ info->service = gm->gm_pfs[i].service;
+ gss_mech_put(gm);
+ return 0;
+ }
+ }
+
+ gss_mech_put(gm);
+ return -ENOENT;
}
-EXPORT_SYMBOL_GPL(gss_svc_to_pseudoflavor);
u32
gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor)
@@ -294,8 +361,6 @@ gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor)
return 0;
}
-EXPORT_SYMBOL_GPL(gss_pseudoflavor_to_service);
-
char *
gss_service_to_auth_domain_name(struct gss_api_mech *gm, u32 service)
{
@@ -308,8 +373,6 @@ gss_service_to_auth_domain_name(struct gss_api_mech *gm, u32 service)
return NULL;
}
-EXPORT_SYMBOL_GPL(gss_service_to_auth_domain_name);
-
void
gss_mech_put(struct gss_api_mech * gm)
{
@@ -317,22 +380,21 @@ gss_mech_put(struct gss_api_mech * gm)
module_put(gm->gm_owner);
}
-EXPORT_SYMBOL_GPL(gss_mech_put);
-
/* The mech could probably be determined from the token instead, but it's just
* as easy for now to pass it in. */
int
gss_import_sec_context(const void *input_token, size_t bufsize,
struct gss_api_mech *mech,
struct gss_ctx **ctx_id,
+ time_t *endtime,
gfp_t gfp_mask)
{
if (!(*ctx_id = kzalloc(sizeof(**ctx_id), gfp_mask)))
return -ENOMEM;
(*ctx_id)->mech_type = gss_mech_get(mech);
- return mech->gm_ops
- ->gss_import_sec_context(input_token, bufsize, *ctx_id, gfp_mask);
+ return mech->gm_ops->gss_import_sec_context(input_token, bufsize,
+ *ctx_id, endtime, gfp_mask);
}
/* gss_get_mic: compute a mic over message and return mic_token. */
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
new file mode 100644
index 000000000000..d304f41260f2
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -0,0 +1,358 @@
+/*
+ * linux/net/sunrpc/gss_rpc_upcall.c
+ *
+ * Copyright (C) 2012 Simo Sorce <simo@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/types.h>
+#include <linux/un.h>
+
+#include <linux/sunrpc/svcauth.h>
+#include "gss_rpc_upcall.h"
+
+#define GSSPROXY_SOCK_PATHNAME "/var/run/gssproxy.sock"
+
+#define GSSPROXY_PROGRAM (400112u)
+#define GSSPROXY_VERS_1 (1u)
+
+/*
+ * Encoding/Decoding functions
+ */
+
+enum {
+ GSSX_NULL = 0, /* Unused */
+ GSSX_INDICATE_MECHS = 1,
+ GSSX_GET_CALL_CONTEXT = 2,
+ GSSX_IMPORT_AND_CANON_NAME = 3,
+ GSSX_EXPORT_CRED = 4,
+ GSSX_IMPORT_CRED = 5,
+ GSSX_ACQUIRE_CRED = 6,
+ GSSX_STORE_CRED = 7,
+ GSSX_INIT_SEC_CONTEXT = 8,
+ GSSX_ACCEPT_SEC_CONTEXT = 9,
+ GSSX_RELEASE_HANDLE = 10,
+ GSSX_GET_MIC = 11,
+ GSSX_VERIFY = 12,
+ GSSX_WRAP = 13,
+ GSSX_UNWRAP = 14,
+ GSSX_WRAP_SIZE_LIMIT = 15,
+};
+
+#define PROC(proc, name) \
+[GSSX_##proc] = { \
+ .p_proc = GSSX_##proc, \
+ .p_encode = (kxdreproc_t)gssx_enc_##name, \
+ .p_decode = (kxdrdproc_t)gssx_dec_##name, \
+ .p_arglen = GSSX_ARG_##name##_sz, \
+ .p_replen = GSSX_RES_##name##_sz, \
+ .p_statidx = GSSX_##proc, \
+ .p_name = #proc, \
+}
+
+static struct rpc_procinfo gssp_procedures[] = {
+ PROC(INDICATE_MECHS, indicate_mechs),
+ PROC(GET_CALL_CONTEXT, get_call_context),
+ PROC(IMPORT_AND_CANON_NAME, import_and_canon_name),
+ PROC(EXPORT_CRED, export_cred),
+ PROC(IMPORT_CRED, import_cred),
+ PROC(ACQUIRE_CRED, acquire_cred),
+ PROC(STORE_CRED, store_cred),
+ PROC(INIT_SEC_CONTEXT, init_sec_context),
+ PROC(ACCEPT_SEC_CONTEXT, accept_sec_context),
+ PROC(RELEASE_HANDLE, release_handle),
+ PROC(GET_MIC, get_mic),
+ PROC(VERIFY, verify),
+ PROC(WRAP, wrap),
+ PROC(UNWRAP, unwrap),
+ PROC(WRAP_SIZE_LIMIT, wrap_size_limit),
+};
+
+
+
+/*
+ * Common transport functions
+ */
+
+static const struct rpc_program gssp_program;
+
+static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt)
+{
+ static const struct sockaddr_un gssp_localaddr = {
+ .sun_family = AF_LOCAL,
+ .sun_path = GSSPROXY_SOCK_PATHNAME,
+ };
+ struct rpc_create_args args = {
+ .net = net,
+ .protocol = XPRT_TRANSPORT_LOCAL,
+ .address = (struct sockaddr *)&gssp_localaddr,
+ .addrsize = sizeof(gssp_localaddr),
+ .servername = "localhost",
+ .program = &gssp_program,
+ .version = GSSPROXY_VERS_1,
+ .authflavor = RPC_AUTH_NULL,
+ /*
+ * Note we want connection to be done in the caller's
+ * filesystem namespace. We therefore turn off the idle
+ * timeout, which would result in reconnections being
+ * done without the correct namespace:
+ */
+ .flags = RPC_CLNT_CREATE_NOPING |
+ RPC_CLNT_CREATE_NO_IDLE_TIMEOUT
+ };
+ struct rpc_clnt *clnt;
+ int result = 0;
+
+ clnt = rpc_create(&args);
+ if (IS_ERR(clnt)) {
+ dprintk("RPC: failed to create AF_LOCAL gssproxy "
+ "client (errno %ld).\n", PTR_ERR(clnt));
+ result = -PTR_ERR(clnt);
+ *_clnt = NULL;
+ goto out;
+ }
+
+ dprintk("RPC: created new gssp local client (gssp_local_clnt: "
+ "%p)\n", clnt);
+ *_clnt = clnt;
+
+out:
+ return result;
+}
+
+void init_gssp_clnt(struct sunrpc_net *sn)
+{
+ mutex_init(&sn->gssp_lock);
+ sn->gssp_clnt = NULL;
+ init_waitqueue_head(&sn->gssp_wq);
+}
+
+int set_gssp_clnt(struct net *net)
+{
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+ struct rpc_clnt *clnt;
+ int ret;
+
+ mutex_lock(&sn->gssp_lock);
+ ret = gssp_rpc_create(net, &clnt);
+ if (!ret) {
+ if (sn->gssp_clnt)
+ rpc_shutdown_client(sn->gssp_clnt);
+ sn->gssp_clnt = clnt;
+ }
+ mutex_unlock(&sn->gssp_lock);
+ wake_up(&sn->gssp_wq);
+ return ret;
+}
+
+void clear_gssp_clnt(struct sunrpc_net *sn)
+{
+ mutex_lock(&sn->gssp_lock);
+ if (sn->gssp_clnt) {
+ rpc_shutdown_client(sn->gssp_clnt);
+ sn->gssp_clnt = NULL;
+ }
+ mutex_unlock(&sn->gssp_lock);
+}
+
+static struct rpc_clnt *get_gssp_clnt(struct sunrpc_net *sn)
+{
+ struct rpc_clnt *clnt;
+
+ mutex_lock(&sn->gssp_lock);
+ clnt = sn->gssp_clnt;
+ if (clnt)
+ atomic_inc(&clnt->cl_count);
+ mutex_unlock(&sn->gssp_lock);
+ return clnt;
+}
+
+static int gssp_call(struct net *net, struct rpc_message *msg)
+{
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+ struct rpc_clnt *clnt;
+ int status;
+
+ clnt = get_gssp_clnt(sn);
+ if (!clnt)
+ return -EIO;
+ status = rpc_call_sync(clnt, msg, 0);
+ if (status < 0) {
+ dprintk("gssp: rpc_call returned error %d\n", -status);
+ switch (status) {
+ case -EPROTONOSUPPORT:
+ status = -EINVAL;
+ break;
+ case -ECONNREFUSED:
+ case -ETIMEDOUT:
+ case -ENOTCONN:
+ status = -EAGAIN;
+ break;
+ case -ERESTARTSYS:
+ if (signalled ())
+ status = -EINTR;
+ break;
+ default:
+ break;
+ }
+ }
+ rpc_release_client(clnt);
+ return status;
+}
+
+
+/*
+ * Public functions
+ */
+
+/* numbers somewhat arbitrary but large enough for current needs */
+#define GSSX_MAX_OUT_HANDLE 128
+#define GSSX_MAX_SRC_PRINC 256
+#define GSSX_KMEMBUF (GSSX_max_output_handle_sz + \
+ GSSX_max_oid_sz + \
+ GSSX_max_princ_sz + \
+ sizeof(struct svc_cred))
+
+int gssp_accept_sec_context_upcall(struct net *net,
+ struct gssp_upcall_data *data)
+{
+ struct gssx_ctx ctxh = {
+ .state = data->in_handle
+ };
+ struct gssx_arg_accept_sec_context arg = {
+ .input_token = data->in_token,
+ };
+ struct gssx_ctx rctxh = {
+ /*
+ * pass in the max length we expect for each of these
+ * buffers but let the xdr code kmalloc them:
+ */
+ .exported_context_token.len = GSSX_max_output_handle_sz,
+ .mech.len = GSS_OID_MAX_LEN,
+ .src_name.display_name.len = GSSX_max_princ_sz
+ };
+ struct gssx_res_accept_sec_context res = {
+ .context_handle = &rctxh,
+ .output_token = &data->out_token
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &gssp_procedures[GSSX_ACCEPT_SEC_CONTEXT],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ .rpc_cred = NULL, /* FIXME ? */
+ };
+ struct xdr_netobj client_name = { 0 , NULL };
+ int ret;
+
+ if (data->in_handle.len != 0)
+ arg.context_handle = &ctxh;
+ res.output_token->len = GSSX_max_output_token_sz;
+
+ /* use nfs/ for targ_name ? */
+
+ ret = gssp_call(net, &msg);
+
+ /* we need to fetch all data even in case of error so
+ * that we can free special strctures is they have been allocated */
+ data->major_status = res.status.major_status;
+ data->minor_status = res.status.minor_status;
+ if (res.context_handle) {
+ data->out_handle = rctxh.exported_context_token;
+ data->mech_oid.len = rctxh.mech.len;
+ memcpy(data->mech_oid.data, rctxh.mech.data,
+ data->mech_oid.len);
+ client_name = rctxh.src_name.display_name;
+ }
+
+ if (res.options.count == 1) {
+ gssx_buffer *value = &res.options.data[0].value;
+ /* Currently we only decode CREDS_VALUE, if we add
+ * anything else we'll have to loop and match on the
+ * option name */
+ if (value->len == 1) {
+ /* steal group info from struct svc_cred */
+ data->creds = *(struct svc_cred *)value->data;
+ data->found_creds = 1;
+ }
+ /* whether we use it or not, free data */
+ kfree(value->data);
+ }
+
+ if (res.options.count != 0) {
+ kfree(res.options.data);
+ }
+
+ /* convert to GSS_NT_HOSTBASED_SERVICE form and set into creds */
+ if (data->found_creds && client_name.data != NULL) {
+ char *c;
+
+ data->creds.cr_principal = kstrndup(client_name.data,
+ client_name.len, GFP_KERNEL);
+ if (data->creds.cr_principal) {
+ /* terminate and remove realm part */
+ c = strchr(data->creds.cr_principal, '@');
+ if (c) {
+ *c = '\0';
+
+ /* change service-hostname delimiter */
+ c = strchr(data->creds.cr_principal, '/');
+ if (c) *c = '@';
+ }
+ if (!c) {
+ /* not a service principal */
+ kfree(data->creds.cr_principal);
+ data->creds.cr_principal = NULL;
+ }
+ }
+ }
+ kfree(client_name.data);
+
+ return ret;
+}
+
+void gssp_free_upcall_data(struct gssp_upcall_data *data)
+{
+ kfree(data->in_handle.data);
+ kfree(data->out_handle.data);
+ kfree(data->out_token.data);
+ kfree(data->mech_oid.data);
+ free_svc_cred(&data->creds);
+}
+
+/*
+ * Initialization stuff
+ */
+
+static const struct rpc_version gssp_version1 = {
+ .number = GSSPROXY_VERS_1,
+ .nrprocs = ARRAY_SIZE(gssp_procedures),
+ .procs = gssp_procedures,
+};
+
+static const struct rpc_version *gssp_version[] = {
+ NULL,
+ &gssp_version1,
+};
+
+static struct rpc_stat gssp_stats;
+
+static const struct rpc_program gssp_program = {
+ .name = "gssproxy",
+ .number = GSSPROXY_PROGRAM,
+ .nrvers = ARRAY_SIZE(gssp_version),
+ .version = gssp_version,
+ .stats = &gssp_stats,
+};
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.h b/net/sunrpc/auth_gss/gss_rpc_upcall.h
new file mode 100644
index 000000000000..1e542aded90a
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.h
@@ -0,0 +1,48 @@
+/*
+ * linux/net/sunrpc/gss_rpc_upcall.h
+ *
+ * Copyright (C) 2012 Simo Sorce <simo@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _GSS_RPC_UPCALL_H
+#define _GSS_RPC_UPCALL_H
+
+#include <linux/sunrpc/gss_api.h>
+#include <linux/sunrpc/auth_gss.h>
+#include "gss_rpc_xdr.h"
+#include "../netns.h"
+
+struct gssp_upcall_data {
+ struct xdr_netobj in_handle;
+ struct gssp_in_token in_token;
+ struct xdr_netobj out_handle;
+ struct xdr_netobj out_token;
+ struct rpcsec_gss_oid mech_oid;
+ struct svc_cred creds;
+ int found_creds;
+ int major_status;
+ int minor_status;
+};
+
+int gssp_accept_sec_context_upcall(struct net *net,
+ struct gssp_upcall_data *data);
+void gssp_free_upcall_data(struct gssp_upcall_data *data);
+
+void init_gssp_clnt(struct sunrpc_net *);
+int set_gssp_clnt(struct net *);
+void clear_gssp_clnt(struct sunrpc_net *);
+#endif /* _GSS_RPC_UPCALL_H */
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
new file mode 100644
index 000000000000..357f613df7ff
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -0,0 +1,840 @@
+/*
+ * GSS Proxy upcall module
+ *
+ * Copyright (C) 2012 Simo Sorce <simo@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/sunrpc/svcauth.h>
+#include "gss_rpc_xdr.h"
+
+static int gssx_enc_bool(struct xdr_stream *xdr, int v)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 4);
+ if (unlikely(p == NULL))
+ return -ENOSPC;
+ *p = v ? xdr_one : xdr_zero;
+ return 0;
+}
+
+static int gssx_dec_bool(struct xdr_stream *xdr, u32 *v)
+{
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ return -ENOSPC;
+ *v = be32_to_cpu(*p);
+ return 0;
+}
+
+static int gssx_enc_buffer(struct xdr_stream *xdr,
+ gssx_buffer *buf)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, sizeof(u32) + buf->len);
+ if (!p)
+ return -ENOSPC;
+ xdr_encode_opaque(p, buf->data, buf->len);
+ return 0;
+}
+
+static int gssx_enc_in_token(struct xdr_stream *xdr,
+ struct gssp_in_token *in)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
+ return -ENOSPC;
+ *p = cpu_to_be32(in->page_len);
+
+ /* all we need to do is to write pages */
+ xdr_write_pages(xdr, in->pages, in->page_base, in->page_len);
+
+ return 0;
+}
+
+
+static int gssx_dec_buffer(struct xdr_stream *xdr,
+ gssx_buffer *buf)
+{
+ u32 length;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ return -ENOSPC;
+
+ length = be32_to_cpup(p);
+ p = xdr_inline_decode(xdr, length);
+ if (unlikely(p == NULL))
+ return -ENOSPC;
+
+ if (buf->len == 0) {
+ /* we intentionally are not interested in this buffer */
+ return 0;
+ }
+ if (length > buf->len)
+ return -ENOSPC;
+
+ if (!buf->data) {
+ buf->data = kmemdup(p, length, GFP_KERNEL);
+ if (!buf->data)
+ return -ENOMEM;
+ } else {
+ memcpy(buf->data, p, length);
+ }
+ buf->len = length;
+ return 0;
+}
+
+static int gssx_enc_option(struct xdr_stream *xdr,
+ struct gssx_option *opt)
+{
+ int err;
+
+ err = gssx_enc_buffer(xdr, &opt->option);
+ if (err)
+ return err;
+ err = gssx_enc_buffer(xdr, &opt->value);
+ return err;
+}
+
+static int gssx_dec_option(struct xdr_stream *xdr,
+ struct gssx_option *opt)
+{
+ int err;
+
+ err = gssx_dec_buffer(xdr, &opt->option);
+ if (err)
+ return err;
+ err = gssx_dec_buffer(xdr, &opt->value);
+ return err;
+}
+
+static int dummy_enc_opt_array(struct xdr_stream *xdr,
+ struct gssx_option_array *oa)
+{
+ __be32 *p;
+
+ if (oa->count != 0)
+ return -EINVAL;
+
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
+ return -ENOSPC;
+ *p = 0;
+
+ return 0;
+}
+
+static int dummy_dec_opt_array(struct xdr_stream *xdr,
+ struct gssx_option_array *oa)
+{
+ struct gssx_option dummy;
+ u32 count, i;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ return -ENOSPC;
+ count = be32_to_cpup(p++);
+ memset(&dummy, 0, sizeof(dummy));
+ for (i = 0; i < count; i++) {
+ gssx_dec_option(xdr, &dummy);
+ }
+
+ oa->count = 0;
+ oa->data = NULL;
+ return 0;
+}
+
+static int get_s32(void **p, void *max, s32 *res)
+{
+ void *base = *p;
+ void *next = (void *)((char *)base + sizeof(s32));
+ if (unlikely(next > max || next < base))
+ return -EINVAL;
+ memcpy(res, base, sizeof(s32));
+ *p = next;
+ return 0;
+}
+
+static int gssx_dec_linux_creds(struct xdr_stream *xdr,
+ struct svc_cred *creds)
+{
+ u32 length;
+ __be32 *p;
+ void *q, *end;
+ s32 tmp;
+ int N, i, err;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ return -ENOSPC;
+
+ length = be32_to_cpup(p);
+
+ /* FIXME: we do not want to use the scratch buffer for this one
+ * may need to use functions that allows us to access an io vector
+ * directly */
+ p = xdr_inline_decode(xdr, length);
+ if (unlikely(p == NULL))
+ return -ENOSPC;
+
+ q = p;
+ end = q + length;
+
+ /* uid */
+ err = get_s32(&q, end, &tmp);
+ if (err)
+ return err;
+ creds->cr_uid = make_kuid(&init_user_ns, tmp);
+
+ /* gid */
+ err = get_s32(&q, end, &tmp);
+ if (err)
+ return err;
+ creds->cr_gid = make_kgid(&init_user_ns, tmp);
+
+ /* number of additional gid's */
+ err = get_s32(&q, end, &tmp);
+ if (err)
+ return err;
+ N = tmp;
+ creds->cr_group_info = groups_alloc(N);
+ if (creds->cr_group_info == NULL)
+ return -ENOMEM;
+
+ /* gid's */
+ for (i = 0; i < N; i++) {
+ kgid_t kgid;
+ err = get_s32(&q, end, &tmp);
+ if (err)
+ goto out_free_groups;
+ err = -EINVAL;
+ kgid = make_kgid(&init_user_ns, tmp);
+ if (!gid_valid(kgid))
+ goto out_free_groups;
+ GROUP_AT(creds->cr_group_info, i) = kgid;
+ }
+
+ return 0;
+out_free_groups:
+ groups_free(creds->cr_group_info);
+ return err;
+}
+
+static int gssx_dec_option_array(struct xdr_stream *xdr,
+ struct gssx_option_array *oa)
+{
+ struct svc_cred *creds;
+ u32 count, i;
+ __be32 *p;
+ int err;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ return -ENOSPC;
+ count = be32_to_cpup(p++);
+ if (!count)
+ return 0;
+
+ /* we recognize only 1 currently: CREDS_VALUE */
+ oa->count = 1;
+
+ oa->data = kmalloc(sizeof(struct gssx_option), GFP_KERNEL);
+ if (!oa->data)
+ return -ENOMEM;
+
+ creds = kmalloc(sizeof(struct svc_cred), GFP_KERNEL);
+ if (!creds) {
+ kfree(oa->data);
+ return -ENOMEM;
+ }
+
+ oa->data[0].option.data = CREDS_VALUE;
+ oa->data[0].option.len = sizeof(CREDS_VALUE);
+ oa->data[0].value.data = (void *)creds;
+ oa->data[0].value.len = 0;
+
+ for (i = 0; i < count; i++) {
+ gssx_buffer dummy = { 0, NULL };
+ u32 length;
+
+ /* option buffer */
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ return -ENOSPC;
+
+ length = be32_to_cpup(p);
+ p = xdr_inline_decode(xdr, length);
+ if (unlikely(p == NULL))
+ return -ENOSPC;
+
+ if (length == sizeof(CREDS_VALUE) &&
+ memcmp(p, CREDS_VALUE, sizeof(CREDS_VALUE)) == 0) {
+ /* We have creds here. parse them */
+ err = gssx_dec_linux_creds(xdr, creds);
+ if (err)
+ return err;
+ oa->data[0].value.len = 1; /* presence */
+ } else {
+ /* consume uninteresting buffer */
+ err = gssx_dec_buffer(xdr, &dummy);
+ if (err)
+ return err;
+ }
+ }
+ return 0;
+}
+
+static int gssx_dec_status(struct xdr_stream *xdr,
+ struct gssx_status *status)
+{
+ __be32 *p;
+ int err;
+
+ /* status->major_status */
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(p == NULL))
+ return -ENOSPC;
+ p = xdr_decode_hyper(p, &status->major_status);
+
+ /* status->mech */
+ err = gssx_dec_buffer(xdr, &status->mech);
+ if (err)
+ return err;
+
+ /* status->minor_status */
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(p == NULL))
+ return -ENOSPC;
+ p = xdr_decode_hyper(p, &status->minor_status);
+
+ /* status->major_status_string */
+ err = gssx_dec_buffer(xdr, &status->major_status_string);
+ if (err)
+ return err;
+
+ /* status->minor_status_string */
+ err = gssx_dec_buffer(xdr, &status->minor_status_string);
+ if (err)
+ return err;
+
+ /* status->server_ctx */
+ err = gssx_dec_buffer(xdr, &status->server_ctx);
+ if (err)
+ return err;
+
+ /* we assume we have no options for now, so simply consume them */
+ /* status->options */
+ err = dummy_dec_opt_array(xdr, &status->options);
+
+ return err;
+}
+
+static int gssx_enc_call_ctx(struct xdr_stream *xdr,
+ struct gssx_call_ctx *ctx)
+{
+ struct gssx_option opt;
+ __be32 *p;
+ int err;
+
+ /* ctx->locale */
+ err = gssx_enc_buffer(xdr, &ctx->locale);
+ if (err)
+ return err;
+
+ /* ctx->server_ctx */
+ err = gssx_enc_buffer(xdr, &ctx->server_ctx);
+ if (err)
+ return err;
+
+ /* we always want to ask for lucid contexts */
+ /* ctx->options */
+ p = xdr_reserve_space(xdr, 4);
+ *p = cpu_to_be32(2);
+
+ /* we want a lucid_v1 context */
+ opt.option.data = LUCID_OPTION;
+ opt.option.len = sizeof(LUCID_OPTION);
+ opt.value.data = LUCID_VALUE;
+ opt.value.len = sizeof(LUCID_VALUE);
+ err = gssx_enc_option(xdr, &opt);
+
+ /* ..and user creds */
+ opt.option.data = CREDS_OPTION;
+ opt.option.len = sizeof(CREDS_OPTION);
+ opt.value.data = CREDS_VALUE;
+ opt.value.len = sizeof(CREDS_VALUE);
+ err = gssx_enc_option(xdr, &opt);
+
+ return err;
+}
+
+static int gssx_dec_name_attr(struct xdr_stream *xdr,
+ struct gssx_name_attr *attr)
+{
+ int err;
+
+ /* attr->attr */
+ err = gssx_dec_buffer(xdr, &attr->attr);
+ if (err)
+ return err;
+
+ /* attr->value */
+ err = gssx_dec_buffer(xdr, &attr->value);
+ if (err)
+ return err;
+
+ /* attr->extensions */
+ err = dummy_dec_opt_array(xdr, &attr->extensions);
+
+ return err;
+}
+
+static int dummy_enc_nameattr_array(struct xdr_stream *xdr,
+ struct gssx_name_attr_array *naa)
+{
+ __be32 *p;
+
+ if (naa->count != 0)
+ return -EINVAL;
+
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
+ return -ENOSPC;
+ *p = 0;
+
+ return 0;
+}
+
+static int dummy_dec_nameattr_array(struct xdr_stream *xdr,
+ struct gssx_name_attr_array *naa)
+{
+ struct gssx_name_attr dummy;
+ u32 count, i;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(p == NULL))
+ return -ENOSPC;
+ count = be32_to_cpup(p++);
+ for (i = 0; i < count; i++) {
+ gssx_dec_name_attr(xdr, &dummy);
+ }
+
+ naa->count = 0;
+ naa->data = NULL;
+ return 0;
+}
+
+static struct xdr_netobj zero_netobj = {};
+
+static struct gssx_name_attr_array zero_name_attr_array = {};
+
+static struct gssx_option_array zero_option_array = {};
+
+static int gssx_enc_name(struct xdr_stream *xdr,
+ struct gssx_name *name)
+{
+ int err;
+
+ /* name->display_name */
+ err = gssx_enc_buffer(xdr, &name->display_name);
+ if (err)
+ return err;
+
+ /* name->name_type */
+ err = gssx_enc_buffer(xdr, &zero_netobj);
+ if (err)
+ return err;
+
+ /* name->exported_name */
+ err = gssx_enc_buffer(xdr, &zero_netobj);
+ if (err)
+ return err;
+
+ /* name->exported_composite_name */
+ err = gssx_enc_buffer(xdr, &zero_netobj);
+ if (err)
+ return err;
+
+ /* leave name_attributes empty for now, will add once we have any
+ * to pass up at all */
+ /* name->name_attributes */
+ err = dummy_enc_nameattr_array(xdr, &zero_name_attr_array);
+ if (err)
+ return err;
+
+ /* leave options empty for now, will add once we have any options
+ * to pass up at all */
+ /* name->extensions */
+ err = dummy_enc_opt_array(xdr, &zero_option_array);
+
+ return err;
+}
+
+static int gssx_dec_name(struct xdr_stream *xdr,
+ struct gssx_name *name)
+{
+ struct xdr_netobj dummy_netobj;
+ struct gssx_name_attr_array dummy_name_attr_array;
+ struct gssx_option_array dummy_option_array;
+ int err;
+
+ /* name->display_name */
+ err = gssx_dec_buffer(xdr, &name->display_name);
+ if (err)
+ return err;
+
+ /* name->name_type */
+ err = gssx_dec_buffer(xdr, &dummy_netobj);
+ if (err)
+ return err;
+
+ /* name->exported_name */
+ err = gssx_dec_buffer(xdr, &dummy_netobj);
+ if (err)
+ return err;
+
+ /* name->exported_composite_name */
+ err = gssx_dec_buffer(xdr, &dummy_netobj);
+ if (err)
+ return err;
+
+ /* we assume we have no attributes for now, so simply consume them */
+ /* name->name_attributes */
+ err = dummy_dec_nameattr_array(xdr, &dummy_name_attr_array);
+ if (err)
+ return err;
+
+ /* we assume we have no options for now, so simply consume them */
+ /* name->extensions */
+ err = dummy_dec_opt_array(xdr, &dummy_option_array);
+
+ return err;
+}
+
+static int dummy_enc_credel_array(struct xdr_stream *xdr,
+ struct gssx_cred_element_array *cea)
+{
+ __be32 *p;
+
+ if (cea->count != 0)
+ return -EINVAL;
+
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
+ return -ENOSPC;
+ *p = 0;
+
+ return 0;
+}
+
+static int gssx_enc_cred(struct xdr_stream *xdr,
+ struct gssx_cred *cred)
+{
+ int err;
+
+ /* cred->desired_name */
+ err = gssx_enc_name(xdr, &cred->desired_name);
+ if (err)
+ return err;
+
+ /* cred->elements */
+ err = dummy_enc_credel_array(xdr, &cred->elements);
+
+ /* cred->cred_handle_reference */
+ err = gssx_enc_buffer(xdr, &cred->cred_handle_reference);
+ if (err)
+ return err;
+
+ /* cred->needs_release */
+ err = gssx_enc_bool(xdr, cred->needs_release);
+
+ return err;
+}
+
+static int gssx_enc_ctx(struct xdr_stream *xdr,
+ struct gssx_ctx *ctx)
+{
+ __be32 *p;
+ int err;
+
+ /* ctx->exported_context_token */
+ err = gssx_enc_buffer(xdr, &ctx->exported_context_token);
+ if (err)
+ return err;
+
+ /* ctx->state */
+ err = gssx_enc_buffer(xdr, &ctx->state);
+ if (err)
+ return err;
+
+ /* ctx->need_release */
+ err = gssx_enc_bool(xdr, ctx->need_release);
+ if (err)
+ return err;
+
+ /* ctx->mech */
+ err = gssx_enc_buffer(xdr, &ctx->mech);
+ if (err)
+ return err;
+
+ /* ctx->src_name */
+ err = gssx_enc_name(xdr, &ctx->src_name);
+ if (err)
+ return err;
+
+ /* ctx->targ_name */
+ err = gssx_enc_name(xdr, &ctx->targ_name);
+ if (err)
+ return err;
+
+ /* ctx->lifetime */
+ p = xdr_reserve_space(xdr, 8+8);
+ if (!p)
+ return -ENOSPC;
+ p = xdr_encode_hyper(p, ctx->lifetime);
+
+ /* ctx->ctx_flags */
+ p = xdr_encode_hyper(p, ctx->ctx_flags);
+
+ /* ctx->locally_initiated */
+ err = gssx_enc_bool(xdr, ctx->locally_initiated);
+ if (err)
+ return err;
+
+ /* ctx->open */
+ err = gssx_enc_bool(xdr, ctx->open);
+ if (err)
+ return err;
+
+ /* leave options empty for now, will add once we have any options
+ * to pass up at all */
+ /* ctx->options */
+ err = dummy_enc_opt_array(xdr, &ctx->options);
+
+ return err;
+}
+
+static int gssx_dec_ctx(struct xdr_stream *xdr,
+ struct gssx_ctx *ctx)
+{
+ __be32 *p;
+ int err;
+
+ /* ctx->exported_context_token */
+ err = gssx_dec_buffer(xdr, &ctx->exported_context_token);
+ if (err)
+ return err;
+
+ /* ctx->state */
+ err = gssx_dec_buffer(xdr, &ctx->state);
+ if (err)
+ return err;
+
+ /* ctx->need_release */
+ err = gssx_dec_bool(xdr, &ctx->need_release);
+ if (err)
+ return err;
+
+ /* ctx->mech */
+ err = gssx_dec_buffer(xdr, &ctx->mech);
+ if (err)
+ return err;
+
+ /* ctx->src_name */
+ err = gssx_dec_name(xdr, &ctx->src_name);
+ if (err)
+ return err;
+
+ /* ctx->targ_name */
+ err = gssx_dec_name(xdr, &ctx->targ_name);
+ if (err)
+ return err;
+
+ /* ctx->lifetime */
+ p = xdr_inline_decode(xdr, 8+8);
+ if (unlikely(p == NULL))
+ return -ENOSPC;
+ p = xdr_decode_hyper(p, &ctx->lifetime);
+
+ /* ctx->ctx_flags */
+ p = xdr_decode_hyper(p, &ctx->ctx_flags);
+
+ /* ctx->locally_initiated */
+ err = gssx_dec_bool(xdr, &ctx->locally_initiated);
+ if (err)
+ return err;
+
+ /* ctx->open */
+ err = gssx_dec_bool(xdr, &ctx->open);
+ if (err)
+ return err;
+
+ /* we assume we have no options for now, so simply consume them */
+ /* ctx->options */
+ err = dummy_dec_opt_array(xdr, &ctx->options);
+
+ return err;
+}
+
+static int gssx_enc_cb(struct xdr_stream *xdr, struct gssx_cb *cb)
+{
+ __be32 *p;
+ int err;
+
+ /* cb->initiator_addrtype */
+ p = xdr_reserve_space(xdr, 8);
+ if (!p)
+ return -ENOSPC;
+ p = xdr_encode_hyper(p, cb->initiator_addrtype);
+
+ /* cb->initiator_address */
+ err = gssx_enc_buffer(xdr, &cb->initiator_address);
+ if (err)
+ return err;
+
+ /* cb->acceptor_addrtype */
+ p = xdr_reserve_space(xdr, 8);
+ if (!p)
+ return -ENOSPC;
+ p = xdr_encode_hyper(p, cb->acceptor_addrtype);
+
+ /* cb->acceptor_address */
+ err = gssx_enc_buffer(xdr, &cb->acceptor_address);
+ if (err)
+ return err;
+
+ /* cb->application_data */
+ err = gssx_enc_buffer(xdr, &cb->application_data);
+
+ return err;
+}
+
+void gssx_enc_accept_sec_context(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct gssx_arg_accept_sec_context *arg)
+{
+ int err;
+
+ err = gssx_enc_call_ctx(xdr, &arg->call_ctx);
+ if (err)
+ goto done;
+
+ /* arg->context_handle */
+ if (arg->context_handle) {
+ err = gssx_enc_ctx(xdr, arg->context_handle);
+ if (err)
+ goto done;
+ } else {
+ err = gssx_enc_bool(xdr, 0);
+ }
+
+ /* arg->cred_handle */
+ if (arg->cred_handle) {
+ err = gssx_enc_cred(xdr, arg->cred_handle);
+ if (err)
+ goto done;
+ } else {
+ err = gssx_enc_bool(xdr, 0);
+ }
+
+ /* arg->input_token */
+ err = gssx_enc_in_token(xdr, &arg->input_token);
+ if (err)
+ goto done;
+
+ /* arg->input_cb */
+ if (arg->input_cb) {
+ err = gssx_enc_cb(xdr, arg->input_cb);
+ if (err)
+ goto done;
+ } else {
+ err = gssx_enc_bool(xdr, 0);
+ }
+
+ err = gssx_enc_bool(xdr, arg->ret_deleg_cred);
+ if (err)
+ goto done;
+
+ /* leave options empty for now, will add once we have any options
+ * to pass up at all */
+ /* arg->options */
+ err = dummy_enc_opt_array(xdr, &arg->options);
+
+done:
+ if (err)
+ dprintk("RPC: gssx_enc_accept_sec_context: %d\n", err);
+}
+
+int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct gssx_res_accept_sec_context *res)
+{
+ u32 value_follows;
+ int err;
+
+ /* res->status */
+ err = gssx_dec_status(xdr, &res->status);
+ if (err)
+ return err;
+
+ /* res->context_handle */
+ err = gssx_dec_bool(xdr, &value_follows);
+ if (err)
+ return err;
+ if (value_follows) {
+ err = gssx_dec_ctx(xdr, res->context_handle);
+ if (err)
+ return err;
+ } else {
+ res->context_handle = NULL;
+ }
+
+ /* res->output_token */
+ err = gssx_dec_bool(xdr, &value_follows);
+ if (err)
+ return err;
+ if (value_follows) {
+ err = gssx_dec_buffer(xdr, res->output_token);
+ if (err)
+ return err;
+ } else {
+ res->output_token = NULL;
+ }
+
+ /* res->delegated_cred_handle */
+ err = gssx_dec_bool(xdr, &value_follows);
+ if (err)
+ return err;
+ if (value_follows) {
+ /* we do not support upcall servers sending this data. */
+ return -EINVAL;
+ }
+
+ /* res->options */
+ err = gssx_dec_option_array(xdr, &res->options);
+
+ return err;
+}
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.h b/net/sunrpc/auth_gss/gss_rpc_xdr.h
new file mode 100644
index 000000000000..1c98b27d870c
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.h
@@ -0,0 +1,264 @@
+/*
+ * GSS Proxy upcall module
+ *
+ * Copyright (C) 2012 Simo Sorce <simo@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _LINUX_GSS_RPC_XDR_H
+#define _LINUX_GSS_RPC_XDR_H
+
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/xprtsock.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY RPCDBG_AUTH
+#endif
+
+#define LUCID_OPTION "exported_context_type"
+#define LUCID_VALUE "linux_lucid_v1"
+#define CREDS_OPTION "exported_creds_type"
+#define CREDS_VALUE "linux_creds_v1"
+
+typedef struct xdr_netobj gssx_buffer;
+typedef struct xdr_netobj utf8string;
+typedef struct xdr_netobj gssx_OID;
+
+enum gssx_cred_usage {
+ GSSX_C_INITIATE = 1,
+ GSSX_C_ACCEPT = 2,
+ GSSX_C_BOTH = 3,
+};
+
+struct gssx_option {
+ gssx_buffer option;
+ gssx_buffer value;
+};
+
+struct gssx_option_array {
+ u32 count;
+ struct gssx_option *data;
+};
+
+struct gssx_status {
+ u64 major_status;
+ gssx_OID mech;
+ u64 minor_status;
+ utf8string major_status_string;
+ utf8string minor_status_string;
+ gssx_buffer server_ctx;
+ struct gssx_option_array options;
+};
+
+struct gssx_call_ctx {
+ utf8string locale;
+ gssx_buffer server_ctx;
+ struct gssx_option_array options;
+};
+
+struct gssx_name_attr {
+ gssx_buffer attr;
+ gssx_buffer value;
+ struct gssx_option_array extensions;
+};
+
+struct gssx_name_attr_array {
+ u32 count;
+ struct gssx_name_attr *data;
+};
+
+struct gssx_name {
+ gssx_buffer display_name;
+};
+typedef struct gssx_name gssx_name;
+
+struct gssx_cred_element {
+ gssx_name MN;
+ gssx_OID mech;
+ u32 cred_usage;
+ u64 initiator_time_rec;
+ u64 acceptor_time_rec;
+ struct gssx_option_array options;
+};
+
+struct gssx_cred_element_array {
+ u32 count;
+ struct gssx_cred_element *data;
+};
+
+struct gssx_cred {
+ gssx_name desired_name;
+ struct gssx_cred_element_array elements;
+ gssx_buffer cred_handle_reference;
+ u32 needs_release;
+};
+
+struct gssx_ctx {
+ gssx_buffer exported_context_token;
+ gssx_buffer state;
+ u32 need_release;
+ gssx_OID mech;
+ gssx_name src_name;
+ gssx_name targ_name;
+ u64 lifetime;
+ u64 ctx_flags;
+ u32 locally_initiated;
+ u32 open;
+ struct gssx_option_array options;
+};
+
+struct gssx_cb {
+ u64 initiator_addrtype;
+ gssx_buffer initiator_address;
+ u64 acceptor_addrtype;
+ gssx_buffer acceptor_address;
+ gssx_buffer application_data;
+};
+
+
+/* This structure is not defined in the protocol.
+ * It is used in the kernel to carry around a big buffer
+ * as a set of pages */
+struct gssp_in_token {
+ struct page **pages; /* Array of contiguous pages */
+ unsigned int page_base; /* Start of page data */
+ unsigned int page_len; /* Length of page data */
+};
+
+struct gssx_arg_accept_sec_context {
+ struct gssx_call_ctx call_ctx;
+ struct gssx_ctx *context_handle;
+ struct gssx_cred *cred_handle;
+ struct gssp_in_token input_token;
+ struct gssx_cb *input_cb;
+ u32 ret_deleg_cred;
+ struct gssx_option_array options;
+};
+
+struct gssx_res_accept_sec_context {
+ struct gssx_status status;
+ struct gssx_ctx *context_handle;
+ gssx_buffer *output_token;
+ /* struct gssx_cred *delegated_cred_handle; not used in kernel */
+ struct gssx_option_array options;
+};
+
+
+
+#define gssx_enc_indicate_mechs NULL
+#define gssx_dec_indicate_mechs NULL
+#define gssx_enc_get_call_context NULL
+#define gssx_dec_get_call_context NULL
+#define gssx_enc_import_and_canon_name NULL
+#define gssx_dec_import_and_canon_name NULL
+#define gssx_enc_export_cred NULL
+#define gssx_dec_export_cred NULL
+#define gssx_enc_import_cred NULL
+#define gssx_dec_import_cred NULL
+#define gssx_enc_acquire_cred NULL
+#define gssx_dec_acquire_cred NULL
+#define gssx_enc_store_cred NULL
+#define gssx_dec_store_cred NULL
+#define gssx_enc_init_sec_context NULL
+#define gssx_dec_init_sec_context NULL
+void gssx_enc_accept_sec_context(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct gssx_arg_accept_sec_context *args);
+int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct gssx_res_accept_sec_context *res);
+#define gssx_enc_release_handle NULL
+#define gssx_dec_release_handle NULL
+#define gssx_enc_get_mic NULL
+#define gssx_dec_get_mic NULL
+#define gssx_enc_verify NULL
+#define gssx_dec_verify NULL
+#define gssx_enc_wrap NULL
+#define gssx_dec_wrap NULL
+#define gssx_enc_unwrap NULL
+#define gssx_dec_unwrap NULL
+#define gssx_enc_wrap_size_limit NULL
+#define gssx_dec_wrap_size_limit NULL
+
+/* non implemented calls are set to 0 size */
+#define GSSX_ARG_indicate_mechs_sz 0
+#define GSSX_RES_indicate_mechs_sz 0
+#define GSSX_ARG_get_call_context_sz 0
+#define GSSX_RES_get_call_context_sz 0
+#define GSSX_ARG_import_and_canon_name_sz 0
+#define GSSX_RES_import_and_canon_name_sz 0
+#define GSSX_ARG_export_cred_sz 0
+#define GSSX_RES_export_cred_sz 0
+#define GSSX_ARG_import_cred_sz 0
+#define GSSX_RES_import_cred_sz 0
+#define GSSX_ARG_acquire_cred_sz 0
+#define GSSX_RES_acquire_cred_sz 0
+#define GSSX_ARG_store_cred_sz 0
+#define GSSX_RES_store_cred_sz 0
+#define GSSX_ARG_init_sec_context_sz 0
+#define GSSX_RES_init_sec_context_sz 0
+
+#define GSSX_default_in_call_ctx_sz (4 + 4 + 4 + \
+ 8 + sizeof(LUCID_OPTION) + sizeof(LUCID_VALUE) + \
+ 8 + sizeof(CREDS_OPTION) + sizeof(CREDS_VALUE))
+#define GSSX_default_in_ctx_hndl_sz (4 + 4+8 + 4 + 4 + 6*4 + 6*4 + 8 + 8 + \
+ 4 + 4 + 4)
+#define GSSX_default_in_cred_sz 4 /* we send in no cred_handle */
+#define GSSX_default_in_token_sz 4 /* does *not* include token data */
+#define GSSX_default_in_cb_sz 4 /* we do not use channel bindings */
+#define GSSX_ARG_accept_sec_context_sz (GSSX_default_in_call_ctx_sz + \
+ GSSX_default_in_ctx_hndl_sz + \
+ GSSX_default_in_cred_sz + \
+ GSSX_default_in_token_sz + \
+ GSSX_default_in_cb_sz + \
+ 4 /* no deleg creds boolean */ + \
+ 4) /* empty options */
+
+/* somewhat arbitrary numbers but large enough (we ignore some of the data
+ * sent down, but it is part of the protocol so we need enough space to take
+ * it in) */
+#define GSSX_default_status_sz 8 + 24 + 8 + 256 + 256 + 16 + 4
+#define GSSX_max_output_handle_sz 128
+#define GSSX_max_oid_sz 16
+#define GSSX_max_princ_sz 256
+#define GSSX_default_ctx_sz (GSSX_max_output_handle_sz + \
+ 16 + 4 + GSSX_max_oid_sz + \
+ 2 * GSSX_max_princ_sz + \
+ 8 + 8 + 4 + 4 + 4)
+#define GSSX_max_output_token_sz 1024
+#define GSSX_max_creds_sz (4 + 4 + 4 + NGROUPS_MAX * 4)
+#define GSSX_RES_accept_sec_context_sz (GSSX_default_status_sz + \
+ GSSX_default_ctx_sz + \
+ GSSX_max_output_token_sz + \
+ 4 + GSSX_max_creds_sz)
+
+#define GSSX_ARG_release_handle_sz 0
+#define GSSX_RES_release_handle_sz 0
+#define GSSX_ARG_get_mic_sz 0
+#define GSSX_RES_get_mic_sz 0
+#define GSSX_ARG_verify_sz 0
+#define GSSX_RES_verify_sz 0
+#define GSSX_ARG_wrap_sz 0
+#define GSSX_RES_wrap_sz 0
+#define GSSX_ARG_unwrap_sz 0
+#define GSSX_RES_unwrap_sz 0
+#define GSSX_ARG_wrap_size_limit_sz 0
+#define GSSX_RES_wrap_size_limit_sz 0
+
+
+
+#endif /* _LINUX_GSS_RPC_XDR_H */
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 5ead60550895..b05ace4c5f12 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -48,8 +48,8 @@
#include <linux/sunrpc/svcauth.h>
#include <linux/sunrpc/svcauth_gss.h>
#include <linux/sunrpc/cache.h>
+#include "gss_rpc_upcall.h"
-#include "../netns.h"
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_AUTH
@@ -497,7 +497,8 @@ static int rsc_parse(struct cache_detail *cd,
len = qword_get(&mesg, buf, mlen);
if (len < 0)
goto out;
- status = gss_import_sec_context(buf, len, gm, &rsci.mechctx, GFP_KERNEL);
+ status = gss_import_sec_context(buf, len, gm, &rsci.mechctx,
+ NULL, GFP_KERNEL);
if (status)
goto out;
@@ -505,8 +506,10 @@ static int rsc_parse(struct cache_detail *cd,
len = qword_get(&mesg, buf, mlen);
if (len > 0) {
rsci.cred.cr_principal = kstrdup(buf, GFP_KERNEL);
- if (!rsci.cred.cr_principal)
+ if (!rsci.cred.cr_principal) {
+ status = -ENOMEM;
goto out;
+ }
}
}
@@ -987,13 +990,10 @@ gss_write_init_verf(struct cache_detail *cd, struct svc_rqst *rqstp,
}
static inline int
-gss_read_verf(struct rpc_gss_wire_cred *gc,
- struct kvec *argv, __be32 *authp,
- struct xdr_netobj *in_handle,
- struct xdr_netobj *in_token)
+gss_read_common_verf(struct rpc_gss_wire_cred *gc,
+ struct kvec *argv, __be32 *authp,
+ struct xdr_netobj *in_handle)
{
- struct xdr_netobj tmpobj;
-
/* Read the verifier; should be NULL: */
*authp = rpc_autherr_badverf;
if (argv->iov_len < 2 * 4)
@@ -1009,6 +1009,23 @@ gss_read_verf(struct rpc_gss_wire_cred *gc,
if (dup_netobj(in_handle, &gc->gc_ctx))
return SVC_CLOSE;
*authp = rpc_autherr_badverf;
+
+ return 0;
+}
+
+static inline int
+gss_read_verf(struct rpc_gss_wire_cred *gc,
+ struct kvec *argv, __be32 *authp,
+ struct xdr_netobj *in_handle,
+ struct xdr_netobj *in_token)
+{
+ struct xdr_netobj tmpobj;
+ int res;
+
+ res = gss_read_common_verf(gc, argv, authp, in_handle);
+ if (res)
+ return res;
+
if (svc_safe_getnetobj(argv, &tmpobj)) {
kfree(in_handle->data);
return SVC_DENIED;
@@ -1021,6 +1038,40 @@ gss_read_verf(struct rpc_gss_wire_cred *gc,
return 0;
}
+/* Ok this is really heavily depending on a set of semantics in
+ * how rqstp is set up by svc_recv and pages laid down by the
+ * server when reading a request. We are basically guaranteed that
+ * the token lays all down linearly across a set of pages, starting
+ * at iov_base in rq_arg.head[0] which happens to be the first of a
+ * set of pages stored in rq_pages[].
+ * rq_arg.head[0].iov_base will provide us the page_base to pass
+ * to the upcall.
+ */
+static inline int
+gss_read_proxy_verf(struct svc_rqst *rqstp,
+ struct rpc_gss_wire_cred *gc, __be32 *authp,
+ struct xdr_netobj *in_handle,
+ struct gssp_in_token *in_token)
+{
+ struct kvec *argv = &rqstp->rq_arg.head[0];
+ u32 inlen;
+ int res;
+
+ res = gss_read_common_verf(gc, argv, authp, in_handle);
+ if (res)
+ return res;
+
+ inlen = svc_getnl(argv);
+ if (inlen > (argv->iov_len + rqstp->rq_arg.page_len))
+ return SVC_DENIED;
+
+ in_token->pages = rqstp->rq_pages;
+ in_token->page_base = (ulong)argv->iov_base & ~PAGE_MASK;
+ in_token->page_len = inlen;
+
+ return 0;
+}
+
static inline int
gss_write_resv(struct kvec *resv, size_t size_limit,
struct xdr_netobj *out_handle, struct xdr_netobj *out_token,
@@ -1048,7 +1099,7 @@ gss_write_resv(struct kvec *resv, size_t size_limit,
* the upcall results are available, write the verifier and result.
* Otherwise, drop the request pending an answer to the upcall.
*/
-static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
+static int svcauth_gss_legacy_init(struct svc_rqst *rqstp,
struct rpc_gss_wire_cred *gc, __be32 *authp)
{
struct kvec *argv = &rqstp->rq_arg.head[0];
@@ -1088,6 +1139,289 @@ out:
return ret;
}
+static int gss_proxy_save_rsc(struct cache_detail *cd,
+ struct gssp_upcall_data *ud,
+ uint64_t *handle)
+{
+ struct rsc rsci, *rscp = NULL;
+ static atomic64_t ctxhctr;
+ long long ctxh;
+ struct gss_api_mech *gm = NULL;
+ time_t expiry;
+ int status = -EINVAL;
+
+ memset(&rsci, 0, sizeof(rsci));
+ /* context handle */
+ status = -ENOMEM;
+ /* the handle needs to be just a unique id,
+ * use a static counter */
+ ctxh = atomic64_inc_return(&ctxhctr);
+
+ /* make a copy for the caller */
+ *handle = ctxh;
+
+ /* make a copy for the rsc cache */
+ if (dup_to_netobj(&rsci.handle, (char *)handle, sizeof(uint64_t)))
+ goto out;
+ rscp = rsc_lookup(cd, &rsci);
+ if (!rscp)
+ goto out;
+
+ /* creds */
+ if (!ud->found_creds) {
+ /* userspace seem buggy, we should always get at least a
+ * mapping to nobody */
+ dprintk("RPC: No creds found, marking Negative!\n");
+ set_bit(CACHE_NEGATIVE, &rsci.h.flags);
+ } else {
+
+ /* steal creds */
+ rsci.cred = ud->creds;
+ memset(&ud->creds, 0, sizeof(struct svc_cred));
+
+ status = -EOPNOTSUPP;
+ /* get mech handle from OID */
+ gm = gss_mech_get_by_OID(&ud->mech_oid);
+ if (!gm)
+ goto out;
+
+ status = -EINVAL;
+ /* mech-specific data: */
+ status = gss_import_sec_context(ud->out_handle.data,
+ ud->out_handle.len,
+ gm, &rsci.mechctx,
+ &expiry, GFP_KERNEL);
+ if (status)
+ goto out;
+ }
+
+ rsci.h.expiry_time = expiry;
+ rscp = rsc_update(cd, &rsci, rscp);
+ status = 0;
+out:
+ gss_mech_put(gm);
+ rsc_free(&rsci);
+ if (rscp)
+ cache_put(&rscp->h, cd);
+ else
+ status = -ENOMEM;
+ return status;
+}
+
+static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
+ struct rpc_gss_wire_cred *gc, __be32 *authp)
+{
+ struct kvec *resv = &rqstp->rq_res.head[0];
+ struct xdr_netobj cli_handle;
+ struct gssp_upcall_data ud;
+ uint64_t handle;
+ int status;
+ int ret;
+ struct net *net = rqstp->rq_xprt->xpt_net;
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+ memset(&ud, 0, sizeof(ud));
+ ret = gss_read_proxy_verf(rqstp, gc, authp,
+ &ud.in_handle, &ud.in_token);
+ if (ret)
+ return ret;
+
+ ret = SVC_CLOSE;
+
+ /* Perform synchronous upcall to gss-proxy */
+ status = gssp_accept_sec_context_upcall(net, &ud);
+ if (status)
+ goto out;
+
+ dprintk("RPC: svcauth_gss: gss major status = %d\n",
+ ud.major_status);
+
+ switch (ud.major_status) {
+ case GSS_S_CONTINUE_NEEDED:
+ cli_handle = ud.out_handle;
+ break;
+ case GSS_S_COMPLETE:
+ status = gss_proxy_save_rsc(sn->rsc_cache, &ud, &handle);
+ if (status)
+ goto out;
+ cli_handle.data = (u8 *)&handle;
+ cli_handle.len = sizeof(handle);
+ break;
+ default:
+ ret = SVC_CLOSE;
+ goto out;
+ }
+
+ /* Got an answer to the upcall; use it: */
+ if (gss_write_init_verf(sn->rsc_cache, rqstp,
+ &cli_handle, &ud.major_status))
+ goto out;
+ if (gss_write_resv(resv, PAGE_SIZE,
+ &cli_handle, &ud.out_token,
+ ud.major_status, ud.minor_status))
+ goto out;
+
+ ret = SVC_COMPLETE;
+out:
+ gssp_free_upcall_data(&ud);
+ return ret;
+}
+
+DEFINE_SPINLOCK(use_gssp_lock);
+
+static bool use_gss_proxy(struct net *net)
+{
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+ if (sn->use_gss_proxy != -1)
+ return sn->use_gss_proxy;
+ spin_lock(&use_gssp_lock);
+ /*
+ * If you wanted gss-proxy, you should have said so before
+ * starting to accept requests:
+ */
+ sn->use_gss_proxy = 0;
+ spin_unlock(&use_gssp_lock);
+ return 0;
+}
+
+#ifdef CONFIG_PROC_FS
+
+static int set_gss_proxy(struct net *net, int type)
+{
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+ int ret = 0;
+
+ WARN_ON_ONCE(type != 0 && type != 1);
+ spin_lock(&use_gssp_lock);
+ if (sn->use_gss_proxy == -1 || sn->use_gss_proxy == type)
+ sn->use_gss_proxy = type;
+ else
+ ret = -EBUSY;
+ spin_unlock(&use_gssp_lock);
+ wake_up(&sn->gssp_wq);
+ return ret;
+}
+
+static inline bool gssp_ready(struct sunrpc_net *sn)
+{
+ switch (sn->use_gss_proxy) {
+ case -1:
+ return false;
+ case 0:
+ return true;
+ case 1:
+ return sn->gssp_clnt;
+ }
+ WARN_ON_ONCE(1);
+ return false;
+}
+
+static int wait_for_gss_proxy(struct net *net, struct file *file)
+{
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+ if (file->f_flags & O_NONBLOCK && !gssp_ready(sn))
+ return -EAGAIN;
+ return wait_event_interruptible(sn->gssp_wq, gssp_ready(sn));
+}
+
+
+static ssize_t write_gssp(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct net *net = PDE_DATA(file_inode(file));
+ char tbuf[20];
+ unsigned long i;
+ int res;
+
+ if (*ppos || count > sizeof(tbuf)-1)
+ return -EINVAL;
+ if (copy_from_user(tbuf, buf, count))
+ return -EFAULT;
+
+ tbuf[count] = 0;
+ res = kstrtoul(tbuf, 0, &i);
+ if (res)
+ return res;
+ if (i != 1)
+ return -EINVAL;
+ res = set_gss_proxy(net, 1);
+ if (res)
+ return res;
+ res = set_gssp_clnt(net);
+ if (res)
+ return res;
+ return count;
+}
+
+static ssize_t read_gssp(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct net *net = PDE_DATA(file_inode(file));
+ unsigned long p = *ppos;
+ char tbuf[10];
+ size_t len;
+ int ret;
+
+ ret = wait_for_gss_proxy(net, file);
+ if (ret)
+ return ret;
+
+ snprintf(tbuf, sizeof(tbuf), "%d\n", use_gss_proxy(net));
+ len = strlen(tbuf);
+ if (p >= len)
+ return 0;
+ len -= p;
+ if (len > count)
+ len = count;
+ if (copy_to_user(buf, (void *)(tbuf+p), len))
+ return -EFAULT;
+ *ppos += len;
+ return len;
+}
+
+static const struct file_operations use_gss_proxy_ops = {
+ .open = nonseekable_open,
+ .write = write_gssp,
+ .read = read_gssp,
+};
+
+static int create_use_gss_proxy_proc_entry(struct net *net)
+{
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+ struct proc_dir_entry **p = &sn->use_gssp_proc;
+
+ sn->use_gss_proxy = -1;
+ *p = proc_create_data("use-gss-proxy", S_IFREG|S_IRUSR|S_IWUSR,
+ sn->proc_net_rpc,
+ &use_gss_proxy_ops, net);
+ if (!*p)
+ return -ENOMEM;
+ init_gssp_clnt(sn);
+ return 0;
+}
+
+static void destroy_use_gss_proxy_proc_entry(struct net *net)
+{
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+ if (sn->use_gssp_proc) {
+ remove_proc_entry("use-gss-proxy", sn->proc_net_rpc);
+ clear_gssp_clnt(sn);
+ }
+}
+#else /* CONFIG_PROC_FS */
+
+static int create_use_gss_proxy_proc_entry(struct net *net)
+{
+ return 0;
+}
+
+static void destroy_use_gss_proxy_proc_entry(struct net *net) {}
+
+#endif /* CONFIG_PROC_FS */
+
/*
* Accept an rpcsec packet.
* If context establishment, punt to user space
@@ -1154,7 +1488,10 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
switch (gc->gc_proc) {
case RPC_GSS_PROC_INIT:
case RPC_GSS_PROC_CONTINUE_INIT:
- return svcauth_gss_handle_init(rqstp, gc, authp);
+ if (use_gss_proxy(SVC_NET(rqstp)))
+ return svcauth_gss_proxy_init(rqstp, gc, authp);
+ else
+ return svcauth_gss_legacy_init(rqstp, gc, authp);
case RPC_GSS_PROC_DATA:
case RPC_GSS_PROC_DESTROY:
/* Look up the context, and check the verifier: */
@@ -1220,7 +1557,9 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
svcdata->rsci = rsci;
cache_get(&rsci->h);
rqstp->rq_cred.cr_flavor = gss_svc_to_pseudoflavor(
- rsci->mechctx->mech_type, gc->gc_svc);
+ rsci->mechctx->mech_type,
+ GSS_C_QOP_DEFAULT,
+ gc->gc_svc);
ret = SVC_OK;
goto out;
}
@@ -1529,7 +1868,12 @@ gss_svc_init_net(struct net *net)
rv = rsi_cache_create_net(net);
if (rv)
goto out1;
+ rv = create_use_gss_proxy_proc_entry(net);
+ if (rv)
+ goto out2;
return 0;
+out2:
+ destroy_use_gss_proxy_proc_entry(net);
out1:
rsc_cache_destroy_net(net);
return rv;
@@ -1538,6 +1882,7 @@ out1:
void
gss_svc_shutdown_net(struct net *net)
{
+ destroy_use_gss_proxy_proc_entry(net);
rsi_cache_destroy_net(net);
rsc_cache_destroy_net(net);
}
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 25d58e766014..80fe5c86efd1 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -986,8 +986,10 @@ static int cache_open(struct inode *inode, struct file *filp,
nonseekable_open(inode, filp);
if (filp->f_mode & FMODE_READ) {
rp = kmalloc(sizeof(*rp), GFP_KERNEL);
- if (!rp)
+ if (!rp) {
+ module_put(cd->owner);
return -ENOMEM;
+ }
rp->offset = 0;
rp->q.reader = 1;
atomic_inc(&cd->readers);
@@ -1208,7 +1210,6 @@ EXPORT_SYMBOL_GPL(sunrpc_cache_pipe_upcall);
* key and content are both parsed by cache
*/
-#define isodigit(c) (isdigit(c) && c <= '7')
int qword_get(char **bpp, char *dest, int bufsize)
{
/* return bytes copied, or -1 on error */
@@ -1461,7 +1462,7 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
static ssize_t cache_read_procfs(struct file *filp, char __user *buf,
size_t count, loff_t *ppos)
{
- struct cache_detail *cd = PDE(file_inode(filp))->data;
+ struct cache_detail *cd = PDE_DATA(file_inode(filp));
return cache_read(filp, buf, count, ppos, cd);
}
@@ -1469,14 +1470,14 @@ static ssize_t cache_read_procfs(struct file *filp, char __user *buf,
static ssize_t cache_write_procfs(struct file *filp, const char __user *buf,
size_t count, loff_t *ppos)
{
- struct cache_detail *cd = PDE(file_inode(filp))->data;
+ struct cache_detail *cd = PDE_DATA(file_inode(filp));
return cache_write(filp, buf, count, ppos, cd);
}
static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait)
{
- struct cache_detail *cd = PDE(file_inode(filp))->data;
+ struct cache_detail *cd = PDE_DATA(file_inode(filp));
return cache_poll(filp, wait, cd);
}
@@ -1485,21 +1486,21 @@ static long cache_ioctl_procfs(struct file *filp,
unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
- struct cache_detail *cd = PDE(inode)->data;
+ struct cache_detail *cd = PDE_DATA(inode);
return cache_ioctl(inode, filp, cmd, arg, cd);
}
static int cache_open_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE(inode)->data;
+ struct cache_detail *cd = PDE_DATA(inode);
return cache_open(inode, filp, cd);
}
static int cache_release_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE(inode)->data;
+ struct cache_detail *cd = PDE_DATA(inode);
return cache_release(inode, filp, cd);
}
@@ -1517,14 +1518,14 @@ static const struct file_operations cache_file_operations_procfs = {
static int content_open_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE(inode)->data;
+ struct cache_detail *cd = PDE_DATA(inode);
return content_open(inode, filp, cd);
}
static int content_release_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE(inode)->data;
+ struct cache_detail *cd = PDE_DATA(inode);
return content_release(inode, filp, cd);
}
@@ -1538,14 +1539,14 @@ static const struct file_operations content_file_operations_procfs = {
static int open_flush_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE(inode)->data;
+ struct cache_detail *cd = PDE_DATA(inode);
return open_flush(inode, filp, cd);
}
static int release_flush_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE(inode)->data;
+ struct cache_detail *cd = PDE_DATA(inode);
return release_flush(inode, filp, cd);
}
@@ -1553,7 +1554,7 @@ static int release_flush_procfs(struct inode *inode, struct file *filp)
static ssize_t read_flush_procfs(struct file *filp, char __user *buf,
size_t count, loff_t *ppos)
{
- struct cache_detail *cd = PDE(file_inode(filp))->data;
+ struct cache_detail *cd = PDE_DATA(file_inode(filp));
return read_flush(filp, buf, count, ppos, cd);
}
@@ -1562,7 +1563,7 @@ static ssize_t write_flush_procfs(struct file *filp,
const char __user *buf,
size_t count, loff_t *ppos)
{
- struct cache_detail *cd = PDE(file_inode(filp))->data;
+ struct cache_detail *cd = PDE_DATA(file_inode(filp));
return write_flush(filp, buf, count, ppos, cd);
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d5f35f15af98..f0339ae9bf37 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -157,20 +157,15 @@ static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb,
}
static int
-rpc_setup_pipedir(struct rpc_clnt *clnt, const char *dir_name)
+rpc_setup_pipedir(struct rpc_clnt *clnt, const char *dir_name,
+ struct super_block *pipefs_sb)
{
- struct net *net = rpc_net_ns(clnt);
- struct super_block *pipefs_sb;
struct dentry *dentry;
clnt->cl_dentry = NULL;
if (dir_name == NULL)
return 0;
- pipefs_sb = rpc_get_sb_net(net);
- if (!pipefs_sb)
- return 0;
dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt, dir_name);
- rpc_put_sb_net(net);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
clnt->cl_dentry = dentry;
@@ -182,6 +177,8 @@ static inline int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event
if (((event == RPC_PIPEFS_MOUNT) && clnt->cl_dentry) ||
((event == RPC_PIPEFS_UMOUNT) && !clnt->cl_dentry))
return 1;
+ if ((event == RPC_PIPEFS_MOUNT) && atomic_read(&clnt->cl_count) == 0)
+ return 1;
return 0;
}
@@ -241,8 +238,6 @@ static struct rpc_clnt *rpc_get_client_for_event(struct net *net, int event)
continue;
if (rpc_clnt_skip_event(clnt, event))
continue;
- if (atomic_inc_not_zero(&clnt->cl_count) == 0)
- continue;
spin_unlock(&sn->rpc_client_lock);
return clnt;
}
@@ -259,7 +254,6 @@ static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
while ((clnt = rpc_get_client_for_event(sb->s_fs_info, event))) {
error = __rpc_pipefs_event(clnt, event, sb);
- rpc_release_client(clnt);
if (error)
break;
}
@@ -289,12 +283,46 @@ static void rpc_clnt_set_nodename(struct rpc_clnt *clnt, const char *nodename)
memcpy(clnt->cl_nodename, nodename, clnt->cl_nodelen);
}
+static int rpc_client_register(const struct rpc_create_args *args,
+ struct rpc_clnt *clnt)
+{
+ const struct rpc_program *program = args->program;
+ struct rpc_auth *auth;
+ struct net *net = rpc_net_ns(clnt);
+ struct super_block *pipefs_sb;
+ int err = 0;
+
+ pipefs_sb = rpc_get_sb_net(net);
+ if (pipefs_sb) {
+ err = rpc_setup_pipedir(clnt, program->pipe_dir_name, pipefs_sb);
+ if (err)
+ goto out;
+ }
+
+ auth = rpcauth_create(args->authflavor, clnt);
+ if (IS_ERR(auth)) {
+ dprintk("RPC: Couldn't create auth handle (flavor %u)\n",
+ args->authflavor);
+ err = PTR_ERR(auth);
+ goto err_auth;
+ }
+
+ rpc_register_client(clnt);
+out:
+ if (pipefs_sb)
+ rpc_put_sb_net(net);
+ return err;
+
+err_auth:
+ __rpc_clnt_remove_pipedir(clnt);
+ goto out;
+}
+
static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt)
{
const struct rpc_program *program = args->program;
const struct rpc_version *version;
struct rpc_clnt *clnt = NULL;
- struct rpc_auth *auth;
int err;
/* sanity check the name before trying to print it */
@@ -354,25 +382,14 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
atomic_set(&clnt->cl_count, 1);
- err = rpc_setup_pipedir(clnt, program->pipe_dir_name);
- if (err < 0)
- goto out_no_path;
-
- auth = rpcauth_create(args->authflavor, clnt);
- if (IS_ERR(auth)) {
- printk(KERN_INFO "RPC: Couldn't create auth handle (flavor %u)\n",
- args->authflavor);
- err = PTR_ERR(auth);
- goto out_no_auth;
- }
-
/* save the nodename */
rpc_clnt_set_nodename(clnt, utsname()->nodename);
- rpc_register_client(clnt);
+
+ err = rpc_client_register(args, clnt);
+ if (err)
+ goto out_no_path;
return clnt;
-out_no_auth:
- rpc_clnt_remove_pipedir(clnt);
out_no_path:
kfree(clnt->cl_principal);
out_no_principal:
@@ -411,6 +428,10 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
};
char servername[48];
+ if (args->flags & RPC_CLNT_CREATE_INFINITE_SLOTS)
+ xprtargs.flags |= XPRT_CREATE_INFINITE_SLOTS;
+ if (args->flags & RPC_CLNT_CREATE_NO_IDLE_TIMEOUT)
+ xprtargs.flags |= XPRT_CREATE_NO_IDLE_TIMEOUT;
/*
* If the caller chooses not to specify a hostname, whip
* up a string representation of the passed-in address.
@@ -633,8 +654,8 @@ rpc_free_client(struct rpc_clnt *clnt)
rcu_dereference(clnt->cl_xprt)->servername);
if (clnt->cl_parent != clnt)
rpc_release_client(clnt->cl_parent);
- rpc_unregister_client(clnt);
rpc_clnt_remove_pipedir(clnt);
+ rpc_unregister_client(clnt);
rpc_free_iostats(clnt->cl_metrics);
kfree(clnt->cl_principal);
clnt->cl_metrics = NULL;
@@ -679,6 +700,7 @@ rpc_release_client(struct rpc_clnt *clnt)
if (atomic_dec_and_test(&clnt->cl_count))
rpc_free_auth(clnt);
}
+EXPORT_SYMBOL_GPL(rpc_release_client);
/**
* rpc_bind_new_program - bind a new RPC program to an existing client
@@ -1301,6 +1323,8 @@ call_reserve(struct rpc_task *task)
xprt_reserve(task);
}
+static void call_retry_reserve(struct rpc_task *task);
+
/*
* 1b. Grok the result of xprt_reserve()
*/
@@ -1342,7 +1366,7 @@ call_reserveresult(struct rpc_task *task)
case -ENOMEM:
rpc_delay(task, HZ >> 2);
case -EAGAIN: /* woken up; retry */
- task->tk_action = call_reserve;
+ task->tk_action = call_retry_reserve;
return;
case -EIO: /* probably a shutdown */
break;
@@ -1355,6 +1379,19 @@ call_reserveresult(struct rpc_task *task)
}
/*
+ * 1c. Retry reserving an RPC call slot
+ */
+static void
+call_retry_reserve(struct rpc_task *task)
+{
+ dprint_status(task);
+
+ task->tk_status = 0;
+ task->tk_action = call_reserveresult;
+ xprt_retry_reserve(task);
+}
+
+/*
* 2. Bind and/or refresh the credentials
*/
static void
@@ -1639,22 +1676,26 @@ call_connect_status(struct rpc_task *task)
dprint_status(task);
- task->tk_status = 0;
- if (status >= 0 || status == -EAGAIN) {
- clnt->cl_stats->netreconn++;
- task->tk_action = call_transmit;
- return;
- }
-
trace_rpc_connect_status(task, status);
switch (status) {
/* if soft mounted, test if we've timed out */
case -ETIMEDOUT:
task->tk_action = call_timeout;
- break;
- default:
- rpc_exit(task, -EIO);
+ return;
+ case -ECONNREFUSED:
+ case -ECONNRESET:
+ case -ENETUNREACH:
+ if (RPC_IS_SOFTCONN(task))
+ break;
+ /* retry with existing socket, after a delay */
+ case 0:
+ case -EAGAIN:
+ task->tk_status = 0;
+ clnt->cl_stats->netreconn++;
+ task->tk_action = call_transmit;
+ return;
}
+ rpc_exit(task, status);
}
/*
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
index ce7bd449173d..74d948f5d5a1 100644
--- a/net/sunrpc/netns.h
+++ b/net/sunrpc/netns.h
@@ -23,6 +23,16 @@ struct sunrpc_net {
struct rpc_clnt *rpcb_local_clnt4;
spinlock_t rpcb_clnt_lock;
unsigned int rpcb_users;
+
+ struct mutex gssp_lock;
+ wait_queue_head_t gssp_wq;
+ struct rpc_clnt *gssp_clnt;
+ int use_gss_proxy;
+ int pipe_version;
+ atomic_t pipe_users;
+ struct proc_dir_entry *use_gssp_proc;
+
+ unsigned int gssd_running;
};
extern int sunrpc_net_id;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index a9129f8d7070..4679df5a6d50 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -216,11 +216,14 @@ rpc_destroy_inode(struct inode *inode)
static int
rpc_pipe_open(struct inode *inode, struct file *filp)
{
+ struct net *net = inode->i_sb->s_fs_info;
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
struct rpc_pipe *pipe;
int first_open;
int res = -ENXIO;
mutex_lock(&inode->i_mutex);
+ sn->gssd_running = 1;
pipe = RPC_I(inode)->pipe;
if (pipe == NULL)
goto out;
@@ -664,7 +667,8 @@ static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent,
return ERR_PTR(-ENOMEM);
}
if (dentry->d_inode == NULL) {
- d_set_d_op(dentry, &rpc_dentry_operations);
+ if (!dentry->d_op)
+ d_set_d_op(dentry, &rpc_dentry_operations);
return dentry;
}
dput(dentry);
@@ -1069,6 +1073,8 @@ void rpc_pipefs_init_net(struct net *net)
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
mutex_init(&sn->pipefs_sb_lock);
+ sn->gssd_running = 1;
+ sn->pipe_version = -1;
}
/*
@@ -1121,6 +1127,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
return -ENOMEM;
dprintk("RPC: sending pipefs MOUNT notification for net %p%s\n",
net, NET_NAME(net));
+ mutex_lock(&sn->pipefs_sb_lock);
sn->pipefs_sb = sb;
err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
RPC_PIPEFS_MOUNT,
@@ -1128,6 +1135,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
if (err)
goto err_depopulate;
sb->s_fs_info = get_net(net);
+ mutex_unlock(&sn->pipefs_sb_lock);
return 0;
err_depopulate:
@@ -1136,6 +1144,7 @@ err_depopulate:
sb);
sn->pipefs_sb = NULL;
__rpc_depopulate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF);
+ mutex_unlock(&sn->pipefs_sb_lock);
return err;
}
@@ -1157,12 +1166,12 @@ static void rpc_kill_sb(struct super_block *sb)
goto out;
}
sn->pipefs_sb = NULL;
- mutex_unlock(&sn->pipefs_sb_lock);
dprintk("RPC: sending pipefs UMOUNT notification for net %p%s\n",
net, NET_NAME(net));
blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
RPC_PIPEFS_UMOUNT,
sb);
+ mutex_unlock(&sn->pipefs_sb_lock);
put_net(net);
out:
kill_litter_super(sb);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index f8529fc8e542..93a7a4e94d80 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -254,7 +254,7 @@ static int rpc_wait_bit_killable(void *word)
{
if (fatal_signal_pending(current))
return -ERESTARTSYS;
- freezable_schedule();
+ freezable_schedule_unsafe();
return 0;
}
@@ -324,11 +324,17 @@ EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task);
* Note: If the task is ASYNC, and is being made runnable after sitting on an
* rpc_wait_queue, this must be called with the queue spinlock held to protect
* the wait queue operation.
+ * Note the ordering of rpc_test_and_set_running() and rpc_clear_queued(),
+ * which is needed to ensure that __rpc_execute() doesn't loop (due to the
+ * lockless RPC_IS_QUEUED() test) before we've had a chance to test
+ * the RPC_TASK_RUNNING flag.
*/
static void rpc_make_runnable(struct rpc_task *task)
{
+ bool need_wakeup = !rpc_test_and_set_running(task);
+
rpc_clear_queued(task);
- if (rpc_test_and_set_running(task))
+ if (!need_wakeup)
return;
if (RPC_IS_ASYNC(task)) {
INIT_WORK(&task->u.tk_work, rpc_async_schedule);
@@ -440,20 +446,6 @@ static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct r
}
/*
- * Tests whether rpc queue is empty
- */
-int rpc_queue_empty(struct rpc_wait_queue *queue)
-{
- int res;
-
- spin_lock_bh(&queue->lock);
- res = queue->qlen;
- spin_unlock_bh(&queue->lock);
- return res == 0;
-}
-EXPORT_SYMBOL_GPL(rpc_queue_empty);
-
-/*
* Wake up a task on a specific queue
*/
void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task)
@@ -798,7 +790,6 @@ static void __rpc_execute(struct rpc_task *task)
task->tk_flags |= RPC_TASK_KILLED;
rpc_exit(task, -ERESTARTSYS);
}
- rpc_set_running(task);
dprintk("RPC: %5u sync task resuming\n", task->tk_pid);
}
@@ -819,9 +810,11 @@ static void __rpc_execute(struct rpc_task *task)
*/
void rpc_execute(struct rpc_task *task)
{
+ bool is_async = RPC_IS_ASYNC(task);
+
rpc_set_active(task);
rpc_make_runnable(task);
- if (!RPC_IS_ASYNC(task))
+ if (!is_async)
__rpc_execute(task);
}
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index bc2068ee795b..21b75cb08c03 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -64,7 +64,7 @@ static int rpc_proc_show(struct seq_file *seq, void *v) {
static int rpc_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, rpc_proc_show, PDE(inode)->data);
+ return single_open(file, rpc_proc_show, PDE_DATA(inode));
}
static const struct file_operations rpc_proc_fops = {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 89a588b4478b..b974571126fe 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -740,7 +740,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
__module_get(serv->sv_module);
task = kthread_create_on_node(serv->sv_function, rqstp,
- node, serv->sv_name);
+ node, "%s", serv->sv_name);
if (IS_ERR(task)) {
error = PTR_ERR(task);
module_put(serv->sv_module);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index c3f9e1ef7f53..06bdf5a1082c 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -810,11 +810,15 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
goto badcred;
argv->iov_base = (void*)((__be32*)argv->iov_base + slen); /* skip machname */
argv->iov_len -= slen*4;
-
+ /*
+ * Note: we skip uid_valid()/gid_valid() checks here for
+ * backwards compatibility with clients that use -1 id's.
+ * Instead, -1 uid or gid is later mapped to the
+ * (export-specific) anonymous id by nfsd_setuser.
+ * Supplementary gid's will be left alone.
+ */
cred->cr_uid = make_kuid(&init_user_ns, svc_getnl(argv)); /* uid */
cred->cr_gid = make_kgid(&init_user_ns, svc_getnl(argv)); /* gid */
- if (!uid_valid(cred->cr_uid) || !gid_valid(cred->cr_gid))
- goto badcred;
slen = svc_getnl(argv); /* gids length */
if (slen > 16 || (len -= (slen + 2)*4) < 0)
goto badcred;
@@ -823,8 +827,6 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
return SVC_CLOSE;
for (i = 0; i < slen; i++) {
kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv));
- if (!gid_valid(kgid))
- goto badcred;
GROUP_AT(cred->cr_group_info, i) = kgid;
}
if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index b7478d5e7ffd..095363eee764 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -948,6 +948,34 @@ void xprt_transmit(struct rpc_task *task)
spin_unlock_bh(&xprt->transport_lock);
}
+static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+ set_bit(XPRT_CONGESTED, &xprt->state);
+ rpc_sleep_on(&xprt->backlog, task, NULL);
+}
+
+static void xprt_wake_up_backlog(struct rpc_xprt *xprt)
+{
+ if (rpc_wake_up_next(&xprt->backlog) == NULL)
+ clear_bit(XPRT_CONGESTED, &xprt->state);
+}
+
+static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+ bool ret = false;
+
+ if (!test_bit(XPRT_CONGESTED, &xprt->state))
+ goto out;
+ spin_lock(&xprt->reserve_lock);
+ if (test_bit(XPRT_CONGESTED, &xprt->state)) {
+ rpc_sleep_on(&xprt->backlog, task, NULL);
+ ret = true;
+ }
+ spin_unlock(&xprt->reserve_lock);
+out:
+ return ret;
+}
+
static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt, gfp_t gfp_flags)
{
struct rpc_rqst *req = ERR_PTR(-EAGAIN);
@@ -992,7 +1020,7 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
task->tk_status = -ENOMEM;
break;
case -EAGAIN:
- rpc_sleep_on(&xprt->backlog, task, NULL);
+ xprt_add_backlog(xprt, task);
dprintk("RPC: waiting for request slot\n");
default:
task->tk_status = -EAGAIN;
@@ -1028,7 +1056,7 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
memset(req, 0, sizeof(*req)); /* mark unused */
list_add(&req->rq_list, &xprt->free);
}
- rpc_wake_up_next(&xprt->backlog);
+ xprt_wake_up_backlog(xprt);
spin_unlock(&xprt->reserve_lock);
}
@@ -1092,7 +1120,8 @@ EXPORT_SYMBOL_GPL(xprt_free);
* xprt_reserve - allocate an RPC request slot
* @task: RPC task requesting a slot allocation
*
- * If no more slots are available, place the task on the transport's
+ * If the transport is marked as being congested, or if no more
+ * slots are available, place the task on the transport's
* backlog queue.
*/
void xprt_reserve(struct rpc_task *task)
@@ -1107,6 +1136,32 @@ void xprt_reserve(struct rpc_task *task)
task->tk_status = -EAGAIN;
rcu_read_lock();
xprt = rcu_dereference(task->tk_client->cl_xprt);
+ if (!xprt_throttle_congested(xprt, task))
+ xprt->ops->alloc_slot(xprt, task);
+ rcu_read_unlock();
+}
+
+/**
+ * xprt_retry_reserve - allocate an RPC request slot
+ * @task: RPC task requesting a slot allocation
+ *
+ * If no more slots are available, place the task on the transport's
+ * backlog queue.
+ * Note that the only difference with xprt_reserve is that we now
+ * ignore the value of the XPRT_CONGESTED flag.
+ */
+void xprt_retry_reserve(struct rpc_task *task)
+{
+ struct rpc_xprt *xprt;
+
+ task->tk_status = 0;
+ if (task->tk_rqstp != NULL)
+ return;
+
+ task->tk_timeout = 0;
+ task->tk_status = -EAGAIN;
+ rcu_read_lock();
+ xprt = rcu_dereference(task->tk_client->cl_xprt);
xprt->ops->alloc_slot(xprt, task);
rcu_read_unlock();
}
@@ -1245,6 +1300,8 @@ found:
-PTR_ERR(xprt));
goto out;
}
+ if (args->flags & XPRT_CREATE_NO_IDLE_TIMEOUT)
+ xprt->idle_timeout = 0;
INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
if (xprt_has_timer(xprt))
setup_timer(&xprt->timer, xprt_init_autodisconnect,
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 3d02130828da..ffd50348a509 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2207,10 +2207,6 @@ static void xs_tcp_setup_socket(struct work_struct *work)
*/
xs_tcp_force_close(xprt);
break;
- case -ECONNREFUSED:
- case -ECONNRESET:
- case -ENETUNREACH:
- /* retry with existing socket, after a delay */
case 0:
case -EINPROGRESS:
case -EALREADY:
@@ -2221,6 +2217,10 @@ static void xs_tcp_setup_socket(struct work_struct *work)
/* Happens, for instance, if the user specified a link
* local IPv6 address without a scope-id.
*/
+ case -ECONNREFUSED:
+ case -ECONNRESET:
+ case -ENETUNREACH:
+ /* retry with existing socket, after a delay */
goto out;
}
out_eagain:
@@ -2655,6 +2655,9 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
}
xprt_set_bound(xprt);
xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL);
+ ret = ERR_PTR(xs_local_setup_socket(transport));
+ if (ret)
+ goto out_err;
break;
default:
ret = ERR_PTR(-EAFNOSUPPORT);
@@ -2767,9 +2770,13 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
struct rpc_xprt *xprt;
struct sock_xprt *transport;
struct rpc_xprt *ret;
+ unsigned int max_slot_table_size = xprt_max_tcp_slot_table_entries;
+
+ if (args->flags & XPRT_CREATE_INFINITE_SLOTS)
+ max_slot_table_size = RPC_MAX_SLOT_TABLE_LIMIT;
xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
- xprt_max_tcp_slot_table_entries);
+ max_slot_table_size);
if (IS_ERR(xprt))
return xprt;
transport = container_of(xprt, struct sock_xprt, xprt);
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index 4f99600a5fed..c890848f9d56 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -31,3 +31,10 @@ config TIPC_PORTS
Setting this to a smaller value saves some memory,
setting it to higher allows for more ports.
+
+config TIPC_MEDIA_IB
+ bool "InfiniBand media type support"
+ depends on TIPC && INFINIBAND_IPOIB
+ help
+ Saying Y here will enable support for running TIPC on
+ IP-over-InfiniBand devices.
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 6cd55d671d3a..4df8e02d9008 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -9,3 +9,5 @@ tipc-y += addr.o bcast.o bearer.o config.o \
name_distr.o subscr.o name_table.o net.o \
netlink.o node.o node_subscr.o port.o ref.o \
socket.o log.o eth_media.o
+
+tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 2655c9f4ecad..e5f3da507823 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -584,8 +584,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
{
int bp_index;
- /*
- * Prepare broadcast link message for reliable transmission,
+ /* Prepare broadcast link message for reliable transmission,
* if first time trying to send it;
* preparation is skipped for broadcast link protocol messages
* since they are sent in an unreliable manner and don't need it
@@ -611,30 +610,43 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) {
struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary;
struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary;
+ struct tipc_bearer *b = p;
+ struct sk_buff *tbuf;
if (!p)
- break; /* no more bearers to try */
+ break; /* No more bearers to try */
+
+ if (tipc_bearer_blocked(p)) {
+ if (!s || tipc_bearer_blocked(s))
+ continue; /* Can't use either bearer */
+ b = s;
+ }
- tipc_nmap_diff(&bcbearer->remains, &p->nodes, &bcbearer->remains_new);
+ tipc_nmap_diff(&bcbearer->remains, &b->nodes,
+ &bcbearer->remains_new);
if (bcbearer->remains_new.count == bcbearer->remains.count)
- continue; /* bearer pair doesn't add anything */
+ continue; /* Nothing added by bearer pair */
- if (!tipc_bearer_blocked(p))
- tipc_bearer_send(p, buf, &p->media->bcast_addr);
- else if (s && !tipc_bearer_blocked(s))
- /* unable to send on primary bearer */
- tipc_bearer_send(s, buf, &s->media->bcast_addr);
- else
- /* unable to send on either bearer */
- continue;
+ if (bp_index == 0) {
+ /* Use original buffer for first bearer */
+ tipc_bearer_send(b, buf, &b->bcast_addr);
+ } else {
+ /* Avoid concurrent buffer access */
+ tbuf = pskb_copy(buf, GFP_ATOMIC);
+ if (!tbuf)
+ break;
+ tipc_bearer_send(b, tbuf, &b->bcast_addr);
+ kfree_skb(tbuf); /* Bearer keeps a clone */
+ }
+ /* Swap bearers for next packet */
if (s) {
bcbearer->bpairs[bp_index].primary = s;
bcbearer->bpairs[bp_index].secondary = p;
}
if (bcbearer->remains_new.count == 0)
- break; /* all targets reached */
+ break; /* All targets reached */
bcbearer->remains = bcbearer->remains_new;
}
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index aa62f93a9127..cb29ef7ba2f0 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -39,7 +39,7 @@
#include "bearer.h"
#include "discover.h"
-#define MAX_ADDR_STR 32
+#define MAX_ADDR_STR 60
static struct tipc_media *media_list[MAX_MEDIA];
static u32 media_count;
@@ -89,9 +89,6 @@ int tipc_register_media(struct tipc_media *m_ptr)
if ((strlen(m_ptr->name) + 1) > TIPC_MAX_MEDIA_NAME)
goto exit;
- if ((m_ptr->bcast_addr.media_id != m_ptr->type_id) ||
- !m_ptr->bcast_addr.broadcast)
- goto exit;
if (m_ptr->priority > TIPC_MAX_LINK_PRI)
goto exit;
if ((m_ptr->tolerance < TIPC_MIN_LINK_TOL) ||
@@ -407,7 +404,7 @@ restart:
INIT_LIST_HEAD(&b_ptr->links);
spin_lock_init(&b_ptr->lock);
- res = tipc_disc_create(b_ptr, &m_ptr->bcast_addr, disc_domain);
+ res = tipc_disc_create(b_ptr, &b_ptr->bcast_addr, disc_domain);
if (res) {
bearer_disable(b_ptr);
pr_warn("Bearer <%s> rejected, discovery object creation failed\n",
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 39f1192d04bf..09c869adcfcf 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -56,6 +56,7 @@
* Identifiers of supported TIPC media types
*/
#define TIPC_MEDIA_TYPE_ETH 1
+#define TIPC_MEDIA_TYPE_IB 2
/**
* struct tipc_media_addr - destination address used by TIPC bearers
@@ -77,7 +78,6 @@ struct tipc_bearer;
* @enable_bearer: routine which enables a bearer
* @disable_bearer: routine which disables a bearer
* @addr2str: routine which converts media address to string
- * @str2addr: routine which converts media address from string
* @addr2msg: routine which converts media address to protocol message area
* @msg2addr: routine which converts media address from protocol message area
* @bcast_addr: media address used in broadcasting
@@ -94,10 +94,9 @@ struct tipc_media {
int (*enable_bearer)(struct tipc_bearer *b_ptr);
void (*disable_bearer)(struct tipc_bearer *b_ptr);
int (*addr2str)(struct tipc_media_addr *a, char *str_buf, int str_size);
- int (*str2addr)(struct tipc_media_addr *a, char *str_buf);
int (*addr2msg)(struct tipc_media_addr *a, char *msg_area);
- int (*msg2addr)(struct tipc_media_addr *a, char *msg_area);
- struct tipc_media_addr bcast_addr;
+ int (*msg2addr)(const struct tipc_bearer *b_ptr,
+ struct tipc_media_addr *a, char *msg_area);
u32 priority;
u32 tolerance;
u32 window;
@@ -136,6 +135,7 @@ struct tipc_bearer {
char name[TIPC_MAX_BEARER_NAME];
spinlock_t lock;
struct tipc_media *media;
+ struct tipc_media_addr bcast_addr;
u32 priority;
u32 window;
u32 tolerance;
@@ -175,6 +175,14 @@ int tipc_disable_bearer(const char *name);
int tipc_eth_media_start(void);
void tipc_eth_media_stop(void);
+#ifdef CONFIG_TIPC_MEDIA_IB
+int tipc_ib_media_start(void);
+void tipc_ib_media_stop(void);
+#else
+static inline int tipc_ib_media_start(void) { return 0; }
+static inline void tipc_ib_media_stop(void) { return; }
+#endif
+
int tipc_media_set_priority(const char *name, u32 new_value);
int tipc_media_set_window(const char *name, u32 new_value);
void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a);
diff --git a/net/tipc/core.c b/net/tipc/core.c
index fc05cecd7481..7ec2c1eb94f1 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -82,6 +82,7 @@ static void tipc_core_stop_net(void)
{
tipc_net_stop();
tipc_eth_media_stop();
+ tipc_ib_media_stop();
}
/**
@@ -93,8 +94,15 @@ int tipc_core_start_net(unsigned long addr)
tipc_net_start(addr);
res = tipc_eth_media_start();
- if (res)
- tipc_core_stop_net();
+ if (res < 0)
+ goto err;
+ res = tipc_ib_media_start();
+ if (res < 0)
+ goto err;
+ return res;
+
+err:
+ tipc_core_stop_net();
return res;
}
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 1074b9587e81..eedff58d0387 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -129,7 +129,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr)
int link_fully_up;
media_addr.broadcast = 1;
- b_ptr->media->msg2addr(&media_addr, msg_media_addr(msg));
+ b_ptr->media->msg2addr(b_ptr, &media_addr, msg_media_addr(msg));
kfree_skb(buf);
/* Ensure message from node is valid and communication is permitted */
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 2132c1ef2951..120a676a3360 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -77,12 +77,13 @@ static struct notifier_block notifier = {
* Media-dependent "value" field stores MAC address in first 6 bytes
* and zeroes out the remaining bytes.
*/
-static void eth_media_addr_set(struct tipc_media_addr *a, char *mac)
+static void eth_media_addr_set(const struct tipc_bearer *tb_ptr,
+ struct tipc_media_addr *a, char *mac)
{
memcpy(a->value, mac, ETH_ALEN);
memset(a->value + ETH_ALEN, 0, sizeof(a->value) - ETH_ALEN);
a->media_id = TIPC_MEDIA_TYPE_ETH;
- a->broadcast = !memcmp(mac, eth_media_info.bcast_addr.value, ETH_ALEN);
+ a->broadcast = !memcmp(mac, tb_ptr->bcast_addr.value, ETH_ALEN);
}
/**
@@ -110,6 +111,7 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
skb_reset_network_header(clone);
clone->dev = dev;
+ clone->protocol = htons(ETH_P_TIPC);
dev_hard_header(clone, dev, ETH_P_TIPC, dest->value,
dev->dev_addr, clone->len);
dev_queue_xmit(clone);
@@ -201,9 +203,13 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
/* Associate TIPC bearer with Ethernet bearer */
eb_ptr->bearer = tb_ptr;
tb_ptr->usr_handle = (void *)eb_ptr;
+ memset(tb_ptr->bcast_addr.value, 0, sizeof(tb_ptr->bcast_addr.value));
+ memcpy(tb_ptr->bcast_addr.value, dev->broadcast, ETH_ALEN);
+ tb_ptr->bcast_addr.media_id = TIPC_MEDIA_TYPE_ETH;
+ tb_ptr->bcast_addr.broadcast = 1;
tb_ptr->mtu = dev->mtu;
tb_ptr->blocked = 0;
- eth_media_addr_set(&tb_ptr->addr, (char *)dev->dev_addr);
+ eth_media_addr_set(tb_ptr, &tb_ptr->addr, (char *)dev->dev_addr);
return 0;
}
@@ -302,25 +308,6 @@ static int eth_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size)
}
/**
- * eth_str2addr - convert string to Ethernet address
- */
-static int eth_str2addr(struct tipc_media_addr *a, char *str_buf)
-{
- char mac[ETH_ALEN];
- int r;
-
- r = sscanf(str_buf, "%02x:%02x:%02x:%02x:%02x:%02x",
- (u32 *)&mac[0], (u32 *)&mac[1], (u32 *)&mac[2],
- (u32 *)&mac[3], (u32 *)&mac[4], (u32 *)&mac[5]);
-
- if (r != ETH_ALEN)
- return 1;
-
- eth_media_addr_set(a, mac);
- return 0;
-}
-
-/**
* eth_str2addr - convert Ethernet address format to message header format
*/
static int eth_addr2msg(struct tipc_media_addr *a, char *msg_area)
@@ -334,12 +321,13 @@ static int eth_addr2msg(struct tipc_media_addr *a, char *msg_area)
/**
* eth_str2addr - convert message header address format to Ethernet format
*/
-static int eth_msg2addr(struct tipc_media_addr *a, char *msg_area)
+static int eth_msg2addr(const struct tipc_bearer *tb_ptr,
+ struct tipc_media_addr *a, char *msg_area)
{
if (msg_area[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_ETH)
return 1;
- eth_media_addr_set(a, msg_area + ETH_ADDR_OFFSET);
+ eth_media_addr_set(tb_ptr, a, msg_area + ETH_ADDR_OFFSET);
return 0;
}
@@ -351,11 +339,8 @@ static struct tipc_media eth_media_info = {
.enable_bearer = enable_bearer,
.disable_bearer = disable_bearer,
.addr2str = eth_addr2str,
- .str2addr = eth_str2addr,
.addr2msg = eth_addr2msg,
.msg2addr = eth_msg2addr,
- .bcast_addr = { { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
- TIPC_MEDIA_TYPE_ETH, 1 },
.priority = TIPC_DEF_LINK_PRI,
.tolerance = TIPC_DEF_LINK_TOL,
.window = TIPC_DEF_LINK_WIN,
diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c
new file mode 100644
index 000000000000..2a2864c25e15
--- /dev/null
+++ b/net/tipc/ib_media.c
@@ -0,0 +1,387 @@
+/*
+ * net/tipc/ib_media.c: Infiniband bearer support for TIPC
+ *
+ * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
+ *
+ * Based on eth_media.c, which carries the following copyright notice:
+ *
+ * Copyright (c) 2001-2007, Ericsson AB
+ * Copyright (c) 2005-2008, 2011, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/if_infiniband.h>
+#include "core.h"
+#include "bearer.h"
+
+#define MAX_IB_BEARERS MAX_BEARERS
+
+/**
+ * struct ib_bearer - Infiniband bearer data structure
+ * @bearer: ptr to associated "generic" bearer structure
+ * @dev: ptr to associated Infiniband network device
+ * @tipc_packet_type: used in binding TIPC to Infiniband driver
+ * @cleanup: work item used when disabling bearer
+ */
+
+struct ib_bearer {
+ struct tipc_bearer *bearer;
+ struct net_device *dev;
+ struct packet_type tipc_packet_type;
+ struct work_struct setup;
+ struct work_struct cleanup;
+};
+
+static struct tipc_media ib_media_info;
+static struct ib_bearer ib_bearers[MAX_IB_BEARERS];
+static int ib_started;
+
+/**
+ * ib_media_addr_set - initialize Infiniband media address structure
+ *
+ * Media-dependent "value" field stores MAC address in first 6 bytes
+ * and zeroes out the remaining bytes.
+ */
+static void ib_media_addr_set(const struct tipc_bearer *tb_ptr,
+ struct tipc_media_addr *a, char *mac)
+{
+ BUILD_BUG_ON(sizeof(a->value) < INFINIBAND_ALEN);
+ memcpy(a->value, mac, INFINIBAND_ALEN);
+ a->media_id = TIPC_MEDIA_TYPE_IB;
+ a->broadcast = !memcmp(mac, tb_ptr->bcast_addr.value, INFINIBAND_ALEN);
+}
+
+/**
+ * send_msg - send a TIPC message out over an InfiniBand interface
+ */
+static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
+ struct tipc_media_addr *dest)
+{
+ struct sk_buff *clone;
+ struct net_device *dev;
+ int delta;
+
+ clone = skb_clone(buf, GFP_ATOMIC);
+ if (!clone)
+ return 0;
+
+ dev = ((struct ib_bearer *)(tb_ptr->usr_handle))->dev;
+ delta = dev->hard_header_len - skb_headroom(buf);
+
+ if ((delta > 0) &&
+ pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
+ kfree_skb(clone);
+ return 0;
+ }
+
+ skb_reset_network_header(clone);
+ clone->dev = dev;
+ clone->protocol = htons(ETH_P_TIPC);
+ dev_hard_header(clone, dev, ETH_P_TIPC, dest->value,
+ dev->dev_addr, clone->len);
+ dev_queue_xmit(clone);
+ return 0;
+}
+
+/**
+ * recv_msg - handle incoming TIPC message from an InfiniBand interface
+ *
+ * Accept only packets explicitly sent to this node, or broadcast packets;
+ * ignores packets sent using InfiniBand multicast, and traffic sent to other
+ * nodes (which can happen if interface is running in promiscuous mode).
+ */
+static int recv_msg(struct sk_buff *buf, struct net_device *dev,
+ struct packet_type *pt, struct net_device *orig_dev)
+{
+ struct ib_bearer *ib_ptr = (struct ib_bearer *)pt->af_packet_priv;
+
+ if (!net_eq(dev_net(dev), &init_net)) {
+ kfree_skb(buf);
+ return 0;
+ }
+
+ if (likely(ib_ptr->bearer)) {
+ if (likely(buf->pkt_type <= PACKET_BROADCAST)) {
+ buf->next = NULL;
+ tipc_recv_msg(buf, ib_ptr->bearer);
+ return 0;
+ }
+ }
+ kfree_skb(buf);
+ return 0;
+}
+
+/**
+ * setup_bearer - setup association between InfiniBand bearer and interface
+ */
+static void setup_bearer(struct work_struct *work)
+{
+ struct ib_bearer *ib_ptr =
+ container_of(work, struct ib_bearer, setup);
+
+ dev_add_pack(&ib_ptr->tipc_packet_type);
+}
+
+/**
+ * enable_bearer - attach TIPC bearer to an InfiniBand interface
+ */
+static int enable_bearer(struct tipc_bearer *tb_ptr)
+{
+ struct net_device *dev = NULL;
+ struct net_device *pdev = NULL;
+ struct ib_bearer *ib_ptr = &ib_bearers[0];
+ struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS];
+ char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1;
+ int pending_dev = 0;
+
+ /* Find unused InfiniBand bearer structure */
+ while (ib_ptr->dev) {
+ if (!ib_ptr->bearer)
+ pending_dev++;
+ if (++ib_ptr == stop)
+ return pending_dev ? -EAGAIN : -EDQUOT;
+ }
+
+ /* Find device with specified name */
+ read_lock(&dev_base_lock);
+ for_each_netdev(&init_net, pdev) {
+ if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) {
+ dev = pdev;
+ dev_hold(dev);
+ break;
+ }
+ }
+ read_unlock(&dev_base_lock);
+ if (!dev)
+ return -ENODEV;
+
+ /* Create InfiniBand bearer for device */
+ ib_ptr->dev = dev;
+ ib_ptr->tipc_packet_type.type = htons(ETH_P_TIPC);
+ ib_ptr->tipc_packet_type.dev = dev;
+ ib_ptr->tipc_packet_type.func = recv_msg;
+ ib_ptr->tipc_packet_type.af_packet_priv = ib_ptr;
+ INIT_LIST_HEAD(&(ib_ptr->tipc_packet_type.list));
+ INIT_WORK(&ib_ptr->setup, setup_bearer);
+ schedule_work(&ib_ptr->setup);
+
+ /* Associate TIPC bearer with InfiniBand bearer */
+ ib_ptr->bearer = tb_ptr;
+ tb_ptr->usr_handle = (void *)ib_ptr;
+ memset(tb_ptr->bcast_addr.value, 0, sizeof(tb_ptr->bcast_addr.value));
+ memcpy(tb_ptr->bcast_addr.value, dev->broadcast, INFINIBAND_ALEN);
+ tb_ptr->bcast_addr.media_id = TIPC_MEDIA_TYPE_IB;
+ tb_ptr->bcast_addr.broadcast = 1;
+ tb_ptr->mtu = dev->mtu;
+ tb_ptr->blocked = 0;
+ ib_media_addr_set(tb_ptr, &tb_ptr->addr, (char *)dev->dev_addr);
+ return 0;
+}
+
+/**
+ * cleanup_bearer - break association between InfiniBand bearer and interface
+ *
+ * This routine must be invoked from a work queue because it can sleep.
+ */
+static void cleanup_bearer(struct work_struct *work)
+{
+ struct ib_bearer *ib_ptr =
+ container_of(work, struct ib_bearer, cleanup);
+
+ dev_remove_pack(&ib_ptr->tipc_packet_type);
+ dev_put(ib_ptr->dev);
+ ib_ptr->dev = NULL;
+}
+
+/**
+ * disable_bearer - detach TIPC bearer from an InfiniBand interface
+ *
+ * Mark InfiniBand bearer as inactive so that incoming buffers are thrown away,
+ * then get worker thread to complete bearer cleanup. (Can't do cleanup
+ * here because cleanup code needs to sleep and caller holds spinlocks.)
+ */
+static void disable_bearer(struct tipc_bearer *tb_ptr)
+{
+ struct ib_bearer *ib_ptr = (struct ib_bearer *)tb_ptr->usr_handle;
+
+ ib_ptr->bearer = NULL;
+ INIT_WORK(&ib_ptr->cleanup, cleanup_bearer);
+ schedule_work(&ib_ptr->cleanup);
+}
+
+/**
+ * recv_notification - handle device updates from OS
+ *
+ * Change the state of the InfiniBand bearer (if any) associated with the
+ * specified device.
+ */
+static int recv_notification(struct notifier_block *nb, unsigned long evt,
+ void *dv)
+{
+ struct net_device *dev = (struct net_device *)dv;
+ struct ib_bearer *ib_ptr = &ib_bearers[0];
+ struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS];
+
+ if (!net_eq(dev_net(dev), &init_net))
+ return NOTIFY_DONE;
+
+ while ((ib_ptr->dev != dev)) {
+ if (++ib_ptr == stop)
+ return NOTIFY_DONE; /* couldn't find device */
+ }
+ if (!ib_ptr->bearer)
+ return NOTIFY_DONE; /* bearer had been disabled */
+
+ ib_ptr->bearer->mtu = dev->mtu;
+
+ switch (evt) {
+ case NETDEV_CHANGE:
+ if (netif_carrier_ok(dev))
+ tipc_continue(ib_ptr->bearer);
+ else
+ tipc_block_bearer(ib_ptr->bearer->name);
+ break;
+ case NETDEV_UP:
+ tipc_continue(ib_ptr->bearer);
+ break;
+ case NETDEV_DOWN:
+ tipc_block_bearer(ib_ptr->bearer->name);
+ break;
+ case NETDEV_CHANGEMTU:
+ case NETDEV_CHANGEADDR:
+ tipc_block_bearer(ib_ptr->bearer->name);
+ tipc_continue(ib_ptr->bearer);
+ break;
+ case NETDEV_UNREGISTER:
+ case NETDEV_CHANGENAME:
+ tipc_disable_bearer(ib_ptr->bearer->name);
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block notifier = {
+ .notifier_call = recv_notification,
+ .priority = 0,
+};
+
+/**
+ * ib_addr2str - convert InfiniBand address to string
+ */
+static int ib_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size)
+{
+ if (str_size < 60) /* 60 = 19 * strlen("xx:") + strlen("xx\0") */
+ return 1;
+
+ sprintf(str_buf, "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:"
+ "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x",
+ a->value[0], a->value[1], a->value[2], a->value[3],
+ a->value[4], a->value[5], a->value[6], a->value[7],
+ a->value[8], a->value[9], a->value[10], a->value[11],
+ a->value[12], a->value[13], a->value[14], a->value[15],
+ a->value[16], a->value[17], a->value[18], a->value[19]);
+
+ return 0;
+}
+
+/**
+ * ib_addr2msg - convert InfiniBand address format to message header format
+ */
+static int ib_addr2msg(struct tipc_media_addr *a, char *msg_area)
+{
+ memset(msg_area, 0, TIPC_MEDIA_ADDR_SIZE);
+ msg_area[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_IB;
+ memcpy(msg_area, a->value, INFINIBAND_ALEN);
+ return 0;
+}
+
+/**
+ * ib_msg2addr - convert message header address format to InfiniBand format
+ */
+static int ib_msg2addr(const struct tipc_bearer *tb_ptr,
+ struct tipc_media_addr *a, char *msg_area)
+{
+ ib_media_addr_set(tb_ptr, a, msg_area);
+ return 0;
+}
+
+/*
+ * InfiniBand media registration info
+ */
+static struct tipc_media ib_media_info = {
+ .send_msg = send_msg,
+ .enable_bearer = enable_bearer,
+ .disable_bearer = disable_bearer,
+ .addr2str = ib_addr2str,
+ .addr2msg = ib_addr2msg,
+ .msg2addr = ib_msg2addr,
+ .priority = TIPC_DEF_LINK_PRI,
+ .tolerance = TIPC_DEF_LINK_TOL,
+ .window = TIPC_DEF_LINK_WIN,
+ .type_id = TIPC_MEDIA_TYPE_IB,
+ .name = "ib"
+};
+
+/**
+ * tipc_ib_media_start - activate InfiniBand bearer support
+ *
+ * Register InfiniBand media type with TIPC bearer code. Also register
+ * with OS for notifications about device state changes.
+ */
+int tipc_ib_media_start(void)
+{
+ int res;
+
+ if (ib_started)
+ return -EINVAL;
+
+ res = tipc_register_media(&ib_media_info);
+ if (res)
+ return res;
+
+ res = register_netdevice_notifier(&notifier);
+ if (!res)
+ ib_started = 1;
+ return res;
+}
+
+/**
+ * tipc_ib_media_stop - deactivate InfiniBand bearer support
+ */
+void tipc_ib_media_stop(void)
+{
+ if (!ib_started)
+ return;
+
+ flush_scheduled_work();
+ unregister_netdevice_notifier(&notifier);
+ ib_started = 0;
+}
diff --git a/net/tipc/link.c b/net/tipc/link.c
index daa6080a2a0c..a80feee5197a 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2306,8 +2306,11 @@ static int link_recv_changeover_msg(struct tipc_link **l_ptr,
struct tipc_msg *tunnel_msg = buf_msg(tunnel_buf);
u32 msg_typ = msg_type(tunnel_msg);
u32 msg_count = msg_msgcnt(tunnel_msg);
+ u32 bearer_id = msg_bearer_id(tunnel_msg);
- dest_link = (*l_ptr)->owner->links[msg_bearer_id(tunnel_msg)];
+ if (bearer_id >= MAX_BEARERS)
+ goto exit;
+ dest_link = (*l_ptr)->owner->links[bearer_id];
if (!dest_link)
goto exit;
if (dest_link == *l_ptr) {
@@ -2521,14 +2524,16 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
struct tipc_msg *imsg = (struct tipc_msg *)msg_data(fragm);
u32 msg_sz = msg_size(imsg);
u32 fragm_sz = msg_data_sz(fragm);
- u32 exp_fragm_cnt = msg_sz/fragm_sz + !!(msg_sz % fragm_sz);
+ u32 exp_fragm_cnt;
u32 max = TIPC_MAX_USER_MSG_SIZE + NAMED_H_SIZE;
+
if (msg_type(imsg) == TIPC_MCAST_MSG)
max = TIPC_MAX_USER_MSG_SIZE + MCAST_H_SIZE;
- if (msg_size(imsg) > max) {
+ if (fragm_sz == 0 || msg_size(imsg) > max) {
kfree_skb(fbuf);
return 0;
}
+ exp_fragm_cnt = msg_sz / fragm_sz + !!(msg_sz % fragm_sz);
pbuf = tipc_buf_acquire(msg_size(imsg));
if (pbuf != NULL) {
pbuf->next = *pending;
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 6675914dc592..8bcd4985d0fb 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -44,7 +44,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
struct nlmsghdr *rep_nlh;
struct nlmsghdr *req_nlh = info->nlhdr;
struct tipc_genlmsghdr *req_userhdr = info->userhdr;
- int hdr_space = NLMSG_SPACE(GENL_HDRLEN + TIPC_GENL_HDRLEN);
+ int hdr_space = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN);
u16 cmd;
if ((req_userhdr->cmd & 0xC000) && (!capable(CAP_NET_ADMIN)))
@@ -53,8 +53,8 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
cmd = req_userhdr->cmd;
rep_buf = tipc_cfg_do_cmd(req_userhdr->dest, cmd,
- NLMSG_DATA(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN,
- NLMSG_PAYLOAD(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN),
+ nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN,
+ nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN),
hdr_space);
if (rep_buf) {
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 2db702d82e7d..c4ce243824bb 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -114,6 +114,7 @@
#include <linux/mount.h>
#include <net/checksum.h>
#include <linux/security.h>
+#include <linux/freezer.h>
struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
EXPORT_SYMBOL_GPL(unix_socket_table);
@@ -1340,7 +1341,6 @@ static void unix_destruct_scm(struct sk_buff *skb)
struct scm_cookie scm;
memset(&scm, 0, sizeof(scm));
scm.pid = UNIXCB(skb).pid;
- scm.cred = UNIXCB(skb).cred;
if (UNIXCB(skb).fp)
unix_detach_fds(&scm, skb);
@@ -1391,8 +1391,8 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
int err = 0;
UNIXCB(skb).pid = get_pid(scm->pid);
- if (scm->cred)
- UNIXCB(skb).cred = get_cred(scm->cred);
+ UNIXCB(skb).uid = scm->creds.uid;
+ UNIXCB(skb).gid = scm->creds.gid;
UNIXCB(skb).fp = NULL;
if (scm->fp && send_fds)
err = unix_attach_fds(scm, skb);
@@ -1409,13 +1409,13 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
const struct sock *other)
{
- if (UNIXCB(skb).cred)
+ if (UNIXCB(skb).pid)
return;
if (test_bit(SOCK_PASSCRED, &sock->flags) ||
!other->sk_socket ||
test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
UNIXCB(skb).pid = get_pid(task_tgid(current));
- UNIXCB(skb).cred = get_current_cred();
+ current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
}
}
@@ -1819,7 +1819,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
siocb->scm = &tmp_scm;
memset(&tmp_scm, 0, sizeof(tmp_scm));
}
- scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
+ scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
unix_set_secdata(siocb->scm, skb);
if (!(flags & MSG_PEEK)) {
@@ -1859,10 +1859,10 @@ out:
}
/*
- * Sleep until data has arrive. But check for races..
+ * Sleep until more data has arrived. But check for races..
*/
-
-static long unix_stream_data_wait(struct sock *sk, long timeo)
+static long unix_stream_data_wait(struct sock *sk, long timeo,
+ struct sk_buff *last)
{
DEFINE_WAIT(wait);
@@ -1871,7 +1871,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo)
for (;;) {
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- if (!skb_queue_empty(&sk->sk_receive_queue) ||
+ if (skb_peek_tail(&sk->sk_receive_queue) != last ||
sk->sk_err ||
(sk->sk_shutdown & RCV_SHUTDOWN) ||
signal_pending(current) ||
@@ -1880,7 +1880,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo)
set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
unix_state_unlock(sk);
- timeo = schedule_timeout(timeo);
+ timeo = freezable_schedule_timeout(timeo);
unix_state_lock(sk);
clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
}
@@ -1890,8 +1890,6 @@ static long unix_stream_data_wait(struct sock *sk, long timeo)
return timeo;
}
-
-
static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size,
int flags)
@@ -1936,14 +1934,12 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
goto out;
}
- skip = sk_peek_offset(sk, flags);
-
do {
int chunk;
- struct sk_buff *skb;
+ struct sk_buff *skb, *last;
unix_state_lock(sk);
- skb = skb_peek(&sk->sk_receive_queue);
+ last = skb = skb_peek(&sk->sk_receive_queue);
again:
if (skb == NULL) {
unix_sk(sk)->recursion_level = 0;
@@ -1966,7 +1962,7 @@ again:
break;
mutex_unlock(&u->readlock);
- timeo = unix_stream_data_wait(sk, timeo);
+ timeo = unix_stream_data_wait(sk, timeo, last);
if (signal_pending(current)
|| mutex_lock_interruptible(&u->readlock)) {
@@ -1980,10 +1976,13 @@ again:
break;
}
- if (skip >= skb->len) {
+ skip = sk_peek_offset(sk, flags);
+ while (skip >= skb->len) {
skip -= skb->len;
+ last = skb;
skb = skb_peek_next(skb, &sk->sk_receive_queue);
- goto again;
+ if (!skb)
+ goto again;
}
unix_state_unlock(sk);
@@ -1991,11 +1990,12 @@ again:
if (check_creds) {
/* Never glue messages from different writers */
if ((UNIXCB(skb).pid != siocb->scm->pid) ||
- (UNIXCB(skb).cred != siocb->scm->cred))
+ !uid_eq(UNIXCB(skb).uid, siocb->scm->creds.uid) ||
+ !gid_eq(UNIXCB(skb).gid, siocb->scm->creds.gid))
break;
} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
/* Copy credentials */
- scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
+ scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
check_creds = 1;
}
@@ -2196,7 +2196,9 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
/* exceptional events? */
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR;
+ mask |= POLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
+
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= POLLRDHUP | POLLIN | POLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index d0f6545b0010..9bc73f87f64a 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -185,7 +185,7 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
* have been added to the queues after
* starting the garbage collection
*/
- if (u->gc_candidate) {
+ if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) {
hit = true;
func(u);
}
@@ -254,7 +254,7 @@ static void inc_inflight_move_tail(struct unix_sock *u)
* of the list, so that it's checked even if it was already
* passed over
*/
- if (u->gc_maybe_cycle)
+ if (test_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags))
list_move_tail(&u->link, &gc_candidates);
}
@@ -315,8 +315,8 @@ void unix_gc(void)
BUG_ON(total_refs < inflight_refs);
if (total_refs == inflight_refs) {
list_move_tail(&u->link, &gc_candidates);
- u->gc_candidate = 1;
- u->gc_maybe_cycle = 1;
+ __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
+ __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
}
}
@@ -344,7 +344,7 @@ void unix_gc(void)
if (atomic_long_read(&u->inflight) > 0) {
list_move_tail(&u->link, &not_cycle_list);
- u->gc_maybe_cycle = 0;
+ __clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
scan_children(&u->sk, inc_inflight_move_tail, NULL);
}
}
@@ -356,7 +356,7 @@ void unix_gc(void)
*/
while (!list_empty(&not_cycle_list)) {
u = list_entry(not_cycle_list.next, struct unix_sock, link);
- u->gc_candidate = 0;
+ __clear_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
list_move_tail(&u->link, &gc_inflight_list);
}
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 7f93e2a42d7a..3f77f42a3b58 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -165,7 +165,7 @@ static struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];
static struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
static DEFINE_SPINLOCK(vsock_table_lock);
-static __init void vsock_init_tables(void)
+static void vsock_init_tables(void)
{
int i;
@@ -1932,7 +1932,6 @@ static const struct file_operations vsock_device_ops = {
static struct miscdevice vsock_device = {
.name = "vsock",
- .minor = MISC_DYNAMIC_MINOR,
.fops = &vsock_device_ops,
};
@@ -1942,6 +1941,7 @@ static int __vsock_core_init(void)
vsock_init_tables();
+ vsock_device.minor = MISC_DYNAMIC_MINOR;
err = misc_register(&vsock_device);
if (err) {
pr_err("Failed to register misc device\n");
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 5e04d3d96285..daff75200e25 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -123,6 +123,14 @@ static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
return err > 0 ? -err : err;
}
+static u32 vmci_transport_peer_rid(u32 peer_cid)
+{
+ if (VMADDR_CID_HYPERVISOR == peer_cid)
+ return VMCI_TRANSPORT_HYPERVISOR_PACKET_RID;
+
+ return VMCI_TRANSPORT_PACKET_RID;
+}
+
static inline void
vmci_transport_packet_init(struct vmci_transport_packet *pkt,
struct sockaddr_vm *src,
@@ -140,7 +148,7 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt,
pkt->dg.src = vmci_make_handle(VMADDR_CID_ANY,
VMCI_TRANSPORT_PACKET_RID);
pkt->dg.dst = vmci_make_handle(dst->svm_cid,
- VMCI_TRANSPORT_PACKET_RID);
+ vmci_transport_peer_rid(dst->svm_cid));
pkt->dg.payload_size = sizeof(*pkt) - sizeof(pkt->dg);
pkt->version = VMCI_TRANSPORT_PACKET_VERSION;
pkt->type = type;
@@ -508,6 +516,9 @@ static bool vmci_transport_is_trusted(struct vsock_sock *vsock, u32 peer_cid)
static bool vmci_transport_allow_dgram(struct vsock_sock *vsock, u32 peer_cid)
{
+ if (VMADDR_CID_HYPERVISOR == peer_cid)
+ return true;
+
if (vsock->cached_peer != peer_cid) {
vsock->cached_peer = peer_cid;
if (!vmci_transport_is_trusted(vsock, peer_cid) &&
@@ -628,7 +639,6 @@ static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg)
static bool vmci_transport_stream_allow(u32 cid, u32 port)
{
static const u32 non_socket_contexts[] = {
- VMADDR_CID_HYPERVISOR,
VMADDR_CID_RESERVED,
};
int i;
@@ -667,7 +677,7 @@ static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg)
*/
if (!vmci_transport_stream_allow(dg->src.context, -1)
- || VMCI_TRANSPORT_PACKET_RID != dg->src.resource)
+ || vmci_transport_peer_rid(dg->src.context) != dg->src.resource)
return VMCI_ERROR_NO_ACCESS;
if (VMCI_DG_SIZE(dg) < sizeof(*pkt))
diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h
index 1bf991803ec0..fd88ea8924e4 100644
--- a/net/vmw_vsock/vmci_transport.h
+++ b/net/vmw_vsock/vmci_transport.h
@@ -28,6 +28,9 @@
/* The resource ID on which control packets are sent. */
#define VMCI_TRANSPORT_PACKET_RID 1
+/* The resource ID on which control packets are sent to the hypervisor. */
+#define VMCI_TRANSPORT_HYPERVISOR_PACKET_RID 15
+
#define VSOCK_PROTO_INVALID 0
#define VSOCK_PROTO_PKT_ON_NOTIFY (1 << 0)
#define VSOCK_PROTO_ALL_SUPPORTED (VSOCK_PROTO_PKT_ON_NOTIFY)
diff --git a/net/wireless/ap.c b/net/wireless/ap.c
index a4a14e8f55cc..324e8d851dc4 100644
--- a/net/wireless/ap.c
+++ b/net/wireless/ap.c
@@ -46,65 +46,3 @@ int cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
return err;
}
-
-void cfg80211_ch_switch_notify(struct net_device *dev,
- struct cfg80211_chan_def *chandef)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-
- trace_cfg80211_ch_switch_notify(dev, chandef);
-
- wdev_lock(wdev);
-
- if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
- wdev->iftype != NL80211_IFTYPE_P2P_GO))
- goto out;
-
- wdev->channel = chandef->chan;
- nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL);
-out:
- wdev_unlock(wdev);
- return;
-}
-EXPORT_SYMBOL(cfg80211_ch_switch_notify);
-
-bool cfg80211_rx_spurious_frame(struct net_device *dev,
- const u8 *addr, gfp_t gfp)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
- bool ret;
-
- trace_cfg80211_rx_spurious_frame(dev, addr);
-
- if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
- wdev->iftype != NL80211_IFTYPE_P2P_GO)) {
- trace_cfg80211_return_bool(false);
- return false;
- }
- ret = nl80211_unexpected_frame(dev, addr, gfp);
- trace_cfg80211_return_bool(ret);
- return ret;
-}
-EXPORT_SYMBOL(cfg80211_rx_spurious_frame);
-
-bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev,
- const u8 *addr, gfp_t gfp)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
- bool ret;
-
- trace_cfg80211_rx_unexpected_4addr_frame(dev, addr);
-
- if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
- wdev->iftype != NL80211_IFTYPE_P2P_GO &&
- wdev->iftype != NL80211_IFTYPE_AP_VLAN)) {
- trace_cfg80211_return_bool(false);
- return false;
- }
- ret = nl80211_unexpected_4addr_frame(dev, addr, gfp);
- trace_cfg80211_return_bool(ret);
- return ret;
-}
-EXPORT_SYMBOL(cfg80211_rx_unexpected_4addr_frame);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 6ddf74f0ae1e..73405e00c800 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -638,17 +638,21 @@ int wiphy_register(struct wiphy *wiphy)
* cfg80211_mutex lock
*/
res = rfkill_register(rdev->rfkill);
- if (res)
- goto out_rm_dev;
+ if (res) {
+ device_del(&rdev->wiphy.dev);
+
+ mutex_lock(&cfg80211_mutex);
+ debugfs_remove_recursive(rdev->wiphy.debugfsdir);
+ list_del_rcu(&rdev->list);
+ wiphy_regulatory_deregister(wiphy);
+ mutex_unlock(&cfg80211_mutex);
+ return res;
+ }
rtnl_lock();
rdev->wiphy.registered = true;
rtnl_unlock();
return 0;
-
-out_rm_dev:
- device_del(&rdev->wiphy.dev);
- return res;
}
EXPORT_SYMBOL(wiphy_register);
@@ -842,6 +846,45 @@ void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev,
rdev->num_running_monitor_ifaces += num;
}
+void cfg80211_leave(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev)
+{
+ struct net_device *dev = wdev->netdev;
+
+ switch (wdev->iftype) {
+ case NL80211_IFTYPE_ADHOC:
+ cfg80211_leave_ibss(rdev, dev, true);
+ break;
+ case NL80211_IFTYPE_P2P_CLIENT:
+ case NL80211_IFTYPE_STATION:
+ mutex_lock(&rdev->sched_scan_mtx);
+ __cfg80211_stop_sched_scan(rdev, false);
+ mutex_unlock(&rdev->sched_scan_mtx);
+
+ wdev_lock(wdev);
+#ifdef CONFIG_CFG80211_WEXT
+ kfree(wdev->wext.ie);
+ wdev->wext.ie = NULL;
+ wdev->wext.ie_len = 0;
+ wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC;
+#endif
+ __cfg80211_disconnect(rdev, dev,
+ WLAN_REASON_DEAUTH_LEAVING, true);
+ wdev_unlock(wdev);
+ break;
+ case NL80211_IFTYPE_MESH_POINT:
+ cfg80211_leave_mesh(rdev, dev);
+ break;
+ case NL80211_IFTYPE_AP:
+ cfg80211_stop_ap(rdev, dev);
+ break;
+ default:
+ break;
+ }
+
+ wdev->beacon_interval = 0;
+}
+
static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
unsigned long state,
void *ndev)
@@ -910,38 +953,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
dev->priv_flags |= IFF_DONT_BRIDGE;
break;
case NETDEV_GOING_DOWN:
- switch (wdev->iftype) {
- case NL80211_IFTYPE_ADHOC:
- cfg80211_leave_ibss(rdev, dev, true);
- break;
- case NL80211_IFTYPE_P2P_CLIENT:
- case NL80211_IFTYPE_STATION:
- mutex_lock(&rdev->sched_scan_mtx);
- __cfg80211_stop_sched_scan(rdev, false);
- mutex_unlock(&rdev->sched_scan_mtx);
-
- wdev_lock(wdev);
-#ifdef CONFIG_CFG80211_WEXT
- kfree(wdev->wext.ie);
- wdev->wext.ie = NULL;
- wdev->wext.ie_len = 0;
- wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC;
-#endif
- __cfg80211_disconnect(rdev, dev,
- WLAN_REASON_DEAUTH_LEAVING, true);
- cfg80211_mlme_down(rdev, dev);
- wdev_unlock(wdev);
- break;
- case NL80211_IFTYPE_MESH_POINT:
- cfg80211_leave_mesh(rdev, dev);
- break;
- case NL80211_IFTYPE_AP:
- cfg80211_stop_ap(rdev, dev);
- break;
- default:
- break;
- }
- wdev->beacon_interval = 0;
+ cfg80211_leave(rdev, wdev);
break;
case NETDEV_DOWN:
cfg80211_update_iface_num(rdev, wdev->iftype, -1);
@@ -1117,8 +1129,10 @@ static int __init cfg80211_init(void)
goto out_fail_reg;
cfg80211_wq = create_singlethread_workqueue("cfg80211");
- if (!cfg80211_wq)
+ if (!cfg80211_wq) {
+ err = -ENOMEM;
goto out_fail_wq;
+ }
return 0;
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 5845c2b37aa8..fd35dae547c4 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -88,6 +88,9 @@ struct cfg80211_registered_device {
struct delayed_work dfs_update_channels_wk;
+ /* netlink port which started critical protocol (0 means not started) */
+ u32 crit_proto_nlportid;
+
/* must be last because of the way we do wiphy_priv(),
* and it should at least be aligned to NETDEV_ALIGN */
struct wiphy wiphy __aligned(NETDEV_ALIGN);
@@ -330,20 +333,15 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
struct net_device *dev,
struct ieee80211_channel *chan,
- const u8 *bssid, const u8 *prev_bssid,
+ const u8 *bssid,
const u8 *ssid, int ssid_len,
- const u8 *ie, int ie_len, bool use_mfp,
- struct cfg80211_crypto_settings *crypt,
- u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
- struct ieee80211_ht_cap *ht_capa_mask);
+ struct cfg80211_assoc_request *req);
int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
- struct net_device *dev, struct ieee80211_channel *chan,
- const u8 *bssid, const u8 *prev_bssid,
+ struct net_device *dev,
+ struct ieee80211_channel *chan,
+ const u8 *bssid,
const u8 *ssid, int ssid_len,
- const u8 *ie, int ie_len, bool use_mfp,
- struct cfg80211_crypto_settings *crypt,
- u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
- struct ieee80211_ht_cap *ht_capa_mask);
+ struct cfg80211_assoc_request *req);
int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
struct net_device *dev, const u8 *bssid,
const u8 *ie, int ie_len, u16 reason,
@@ -375,6 +373,8 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
bool no_cck, bool dont_wait_for_ack, u64 *cookie);
void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa,
const struct ieee80211_ht_cap *ht_capa_mask);
+void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa,
+ const struct ieee80211_vht_cap *vht_capa_mask);
/* SME */
int __cfg80211_connect(struct cfg80211_registered_device *rdev,
@@ -503,6 +503,9 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev,
enum nl80211_iftype iftype, int num);
+void cfg80211_leave(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev);
+
void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev);
diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c
index 1526c211db66..dc0e59e53dbf 100644
--- a/net/wireless/lib80211_crypt_ccmp.c
+++ b/net/wireless/lib80211_crypt_ccmp.c
@@ -430,24 +430,23 @@ static int lib80211_ccmp_get_key(void *key, int len, u8 * seq, void *priv)
return CCMP_TK_LEN;
}
-static char *lib80211_ccmp_print_stats(char *p, void *priv)
+static void lib80211_ccmp_print_stats(struct seq_file *m, void *priv)
{
struct lib80211_ccmp_data *ccmp = priv;
- p += sprintf(p, "key[%d] alg=CCMP key_set=%d "
- "tx_pn=%02x%02x%02x%02x%02x%02x "
- "rx_pn=%02x%02x%02x%02x%02x%02x "
- "format_errors=%d replays=%d decrypt_errors=%d\n",
- ccmp->key_idx, ccmp->key_set,
- ccmp->tx_pn[0], ccmp->tx_pn[1], ccmp->tx_pn[2],
- ccmp->tx_pn[3], ccmp->tx_pn[4], ccmp->tx_pn[5],
- ccmp->rx_pn[0], ccmp->rx_pn[1], ccmp->rx_pn[2],
- ccmp->rx_pn[3], ccmp->rx_pn[4], ccmp->rx_pn[5],
- ccmp->dot11RSNAStatsCCMPFormatErrors,
- ccmp->dot11RSNAStatsCCMPReplays,
- ccmp->dot11RSNAStatsCCMPDecryptErrors);
-
- return p;
+ seq_printf(m,
+ "key[%d] alg=CCMP key_set=%d "
+ "tx_pn=%02x%02x%02x%02x%02x%02x "
+ "rx_pn=%02x%02x%02x%02x%02x%02x "
+ "format_errors=%d replays=%d decrypt_errors=%d\n",
+ ccmp->key_idx, ccmp->key_set,
+ ccmp->tx_pn[0], ccmp->tx_pn[1], ccmp->tx_pn[2],
+ ccmp->tx_pn[3], ccmp->tx_pn[4], ccmp->tx_pn[5],
+ ccmp->rx_pn[0], ccmp->rx_pn[1], ccmp->rx_pn[2],
+ ccmp->rx_pn[3], ccmp->rx_pn[4], ccmp->rx_pn[5],
+ ccmp->dot11RSNAStatsCCMPFormatErrors,
+ ccmp->dot11RSNAStatsCCMPReplays,
+ ccmp->dot11RSNAStatsCCMPDecryptErrors);
}
static struct lib80211_crypto_ops lib80211_crypt_ccmp = {
diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c
index d475cfc8568f..8c90ba79e56e 100644
--- a/net/wireless/lib80211_crypt_tkip.c
+++ b/net/wireless/lib80211_crypt_tkip.c
@@ -703,30 +703,30 @@ static int lib80211_tkip_get_key(void *key, int len, u8 * seq, void *priv)
return TKIP_KEY_LEN;
}
-static char *lib80211_tkip_print_stats(char *p, void *priv)
+static void lib80211_tkip_print_stats(struct seq_file *m, void *priv)
{
struct lib80211_tkip_data *tkip = priv;
- p += sprintf(p, "key[%d] alg=TKIP key_set=%d "
- "tx_pn=%02x%02x%02x%02x%02x%02x "
- "rx_pn=%02x%02x%02x%02x%02x%02x "
- "replays=%d icv_errors=%d local_mic_failures=%d\n",
- tkip->key_idx, tkip->key_set,
- (tkip->tx_iv32 >> 24) & 0xff,
- (tkip->tx_iv32 >> 16) & 0xff,
- (tkip->tx_iv32 >> 8) & 0xff,
- tkip->tx_iv32 & 0xff,
- (tkip->tx_iv16 >> 8) & 0xff,
- tkip->tx_iv16 & 0xff,
- (tkip->rx_iv32 >> 24) & 0xff,
- (tkip->rx_iv32 >> 16) & 0xff,
- (tkip->rx_iv32 >> 8) & 0xff,
- tkip->rx_iv32 & 0xff,
- (tkip->rx_iv16 >> 8) & 0xff,
- tkip->rx_iv16 & 0xff,
- tkip->dot11RSNAStatsTKIPReplays,
- tkip->dot11RSNAStatsTKIPICVErrors,
- tkip->dot11RSNAStatsTKIPLocalMICFailures);
- return p;
+ seq_printf(m,
+ "key[%d] alg=TKIP key_set=%d "
+ "tx_pn=%02x%02x%02x%02x%02x%02x "
+ "rx_pn=%02x%02x%02x%02x%02x%02x "
+ "replays=%d icv_errors=%d local_mic_failures=%d\n",
+ tkip->key_idx, tkip->key_set,
+ (tkip->tx_iv32 >> 24) & 0xff,
+ (tkip->tx_iv32 >> 16) & 0xff,
+ (tkip->tx_iv32 >> 8) & 0xff,
+ tkip->tx_iv32 & 0xff,
+ (tkip->tx_iv16 >> 8) & 0xff,
+ tkip->tx_iv16 & 0xff,
+ (tkip->rx_iv32 >> 24) & 0xff,
+ (tkip->rx_iv32 >> 16) & 0xff,
+ (tkip->rx_iv32 >> 8) & 0xff,
+ tkip->rx_iv32 & 0xff,
+ (tkip->rx_iv16 >> 8) & 0xff,
+ tkip->rx_iv16 & 0xff,
+ tkip->dot11RSNAStatsTKIPReplays,
+ tkip->dot11RSNAStatsTKIPICVErrors,
+ tkip->dot11RSNAStatsTKIPLocalMICFailures);
}
static struct lib80211_crypto_ops lib80211_crypt_tkip = {
diff --git a/net/wireless/lib80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c
index c1304018fc1c..1c292e4ea7b6 100644
--- a/net/wireless/lib80211_crypt_wep.c
+++ b/net/wireless/lib80211_crypt_wep.c
@@ -253,11 +253,10 @@ static int lib80211_wep_get_key(void *key, int len, u8 * seq, void *priv)
return wep->key_len;
}
-static char *lib80211_wep_print_stats(char *p, void *priv)
+static void lib80211_wep_print_stats(struct seq_file *m, void *priv)
{
struct lib80211_wep_data *wep = priv;
- p += sprintf(p, "key[%d] alg=WEP len=%d\n", wep->key_idx, wep->key_len);
- return p;
+ seq_printf(m, "key[%d] alg=WEP len=%d\n", wep->key_idx, wep->key_len);
}
static struct lib80211_crypto_ops lib80211_crypt_wep = {
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 55957a284f6c..0bb93f3061a4 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -85,6 +85,7 @@ const struct mesh_setup default_mesh_setup = {
.ie = NULL,
.ie_len = 0,
.is_secure = false,
+ .user_mpm = false,
.beacon_interval = MESH_DEFAULT_BEACON_INTERVAL,
.dtim_period = MESH_DEFAULT_DTIM_PERIOD,
};
@@ -233,20 +234,6 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev,
return 0;
}
-void cfg80211_notify_new_peer_candidate(struct net_device *dev,
- const u8 *macaddr, const u8* ie, u8 ie_len, gfp_t gfp)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
-
- trace_cfg80211_notify_new_peer_candidate(dev, macaddr);
- if (WARN_ON(wdev->iftype != NL80211_IFTYPE_MESH_POINT))
- return;
-
- nl80211_send_new_peer_candidate(wiphy_to_dev(wdev->wiphy), dev,
- macaddr, ie, ie_len, gfp);
-}
-EXPORT_SYMBOL(cfg80211_notify_new_peer_candidate);
-
static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
struct net_device *dev)
{
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index caddca35d686..0c7b7dd855f6 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -187,30 +187,6 @@ void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len)
}
EXPORT_SYMBOL(cfg80211_send_disassoc);
-void cfg80211_send_unprot_deauth(struct net_device *dev, const u8 *buf,
- size_t len)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-
- trace_cfg80211_send_unprot_deauth(dev);
- nl80211_send_unprot_deauth(rdev, dev, buf, len, GFP_ATOMIC);
-}
-EXPORT_SYMBOL(cfg80211_send_unprot_deauth);
-
-void cfg80211_send_unprot_disassoc(struct net_device *dev, const u8 *buf,
- size_t len)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-
- trace_cfg80211_send_unprot_disassoc(dev);
- nl80211_send_unprot_disassoc(rdev, dev, buf, len, GFP_ATOMIC);
-}
-EXPORT_SYMBOL(cfg80211_send_unprot_disassoc);
-
void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
@@ -367,27 +343,38 @@ void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa,
p1[i] &= p2[i];
}
+/* Do a logical ht_capa &= ht_capa_mask. */
+void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa,
+ const struct ieee80211_vht_cap *vht_capa_mask)
+{
+ int i;
+ u8 *p1, *p2;
+ if (!vht_capa_mask) {
+ memset(vht_capa, 0, sizeof(*vht_capa));
+ return;
+ }
+
+ p1 = (u8*)(vht_capa);
+ p2 = (u8*)(vht_capa_mask);
+ for (i = 0; i < sizeof(*vht_capa); i++)
+ p1[i] &= p2[i];
+}
+
int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
struct net_device *dev,
struct ieee80211_channel *chan,
- const u8 *bssid, const u8 *prev_bssid,
+ const u8 *bssid,
const u8 *ssid, int ssid_len,
- const u8 *ie, int ie_len, bool use_mfp,
- struct cfg80211_crypto_settings *crypt,
- u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
- struct ieee80211_ht_cap *ht_capa_mask)
+ struct cfg80211_assoc_request *req)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_assoc_request req;
int err;
bool was_connected = false;
ASSERT_WDEV_LOCK(wdev);
- memset(&req, 0, sizeof(req));
-
- if (wdev->current_bss && prev_bssid &&
- ether_addr_equal(wdev->current_bss->pub.bssid, prev_bssid)) {
+ if (wdev->current_bss && req->prev_bssid &&
+ ether_addr_equal(wdev->current_bss->pub.bssid, req->prev_bssid)) {
/*
* Trying to reassociate: Allow this to proceed and let the old
* association to be dropped when the new one is completed.
@@ -399,40 +386,30 @@ int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
} else if (wdev->current_bss)
return -EALREADY;
- req.ie = ie;
- req.ie_len = ie_len;
- memcpy(&req.crypto, crypt, sizeof(req.crypto));
- req.use_mfp = use_mfp;
- req.prev_bssid = prev_bssid;
- req.flags = assoc_flags;
- if (ht_capa)
- memcpy(&req.ht_capa, ht_capa, sizeof(req.ht_capa));
- if (ht_capa_mask)
- memcpy(&req.ht_capa_mask, ht_capa_mask,
- sizeof(req.ht_capa_mask));
- cfg80211_oper_and_ht_capa(&req.ht_capa_mask,
+ cfg80211_oper_and_ht_capa(&req->ht_capa_mask,
rdev->wiphy.ht_capa_mod_mask);
+ cfg80211_oper_and_vht_capa(&req->vht_capa_mask,
+ rdev->wiphy.vht_capa_mod_mask);
- req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len,
- WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS);
- if (!req.bss) {
+ req->bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len,
+ WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS);
+ if (!req->bss) {
if (was_connected)
wdev->sme_state = CFG80211_SME_CONNECTED;
return -ENOENT;
}
- err = cfg80211_can_use_chan(rdev, wdev, req.bss->channel,
- CHAN_MODE_SHARED);
+ err = cfg80211_can_use_chan(rdev, wdev, chan, CHAN_MODE_SHARED);
if (err)
goto out;
- err = rdev_assoc(rdev, dev, &req);
+ err = rdev_assoc(rdev, dev, req);
out:
if (err) {
if (was_connected)
wdev->sme_state = CFG80211_SME_CONNECTED;
- cfg80211_put_bss(&rdev->wiphy, req.bss);
+ cfg80211_put_bss(&rdev->wiphy, req->bss);
}
return err;
@@ -441,21 +418,17 @@ out:
int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
struct net_device *dev,
struct ieee80211_channel *chan,
- const u8 *bssid, const u8 *prev_bssid,
+ const u8 *bssid,
const u8 *ssid, int ssid_len,
- const u8 *ie, int ie_len, bool use_mfp,
- struct cfg80211_crypto_settings *crypt,
- u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
- struct ieee80211_ht_cap *ht_capa_mask)
+ struct cfg80211_assoc_request *req)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
int err;
mutex_lock(&rdev->devlist_mtx);
wdev_lock(wdev);
- err = __cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid,
- ssid, ssid_len, ie, ie_len, use_mfp, crypt,
- assoc_flags, ht_capa, ht_capa_mask);
+ err = __cfg80211_mlme_assoc(rdev, dev, chan, bssid,
+ ssid, ssid_len, req);
wdev_unlock(wdev);
mutex_unlock(&rdev->devlist_mtx);
@@ -577,62 +550,6 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
}
}
-void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie,
- struct ieee80211_channel *chan,
- unsigned int duration, gfp_t gfp)
-{
- struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-
- trace_cfg80211_ready_on_channel(wdev, cookie, chan, duration);
- nl80211_send_remain_on_channel(rdev, wdev, cookie, chan, duration, gfp);
-}
-EXPORT_SYMBOL(cfg80211_ready_on_channel);
-
-void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie,
- struct ieee80211_channel *chan,
- gfp_t gfp)
-{
- struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-
- trace_cfg80211_ready_on_channel_expired(wdev, cookie, chan);
- nl80211_send_remain_on_channel_cancel(rdev, wdev, cookie, chan, gfp);
-}
-EXPORT_SYMBOL(cfg80211_remain_on_channel_expired);
-
-void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
- struct station_info *sinfo, gfp_t gfp)
-{
- struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-
- trace_cfg80211_new_sta(dev, mac_addr, sinfo);
- nl80211_send_sta_event(rdev, dev, mac_addr, sinfo, gfp);
-}
-EXPORT_SYMBOL(cfg80211_new_sta);
-
-void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp)
-{
- struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-
- trace_cfg80211_del_sta(dev, mac_addr);
- nl80211_send_sta_del_event(rdev, dev, mac_addr, gfp);
-}
-EXPORT_SYMBOL(cfg80211_del_sta);
-
-void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
- enum nl80211_connect_failed_reason reason,
- gfp_t gfp)
-{
- struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-
- nl80211_send_conn_failed_event(rdev, dev, mac_addr, reason, gfp);
-}
-EXPORT_SYMBOL(cfg80211_conn_failed);
-
struct cfg80211_mgmt_registration {
struct list_head list;
@@ -731,6 +648,11 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid)
spin_unlock_bh(&wdev->mgmt_registrations_lock);
+ if (nlportid && rdev->crit_proto_nlportid == nlportid) {
+ rdev->crit_proto_nlportid = 0;
+ rdev_crit_proto_stop(rdev, wdev);
+ }
+
if (nlportid == wdev->ap_unexpected_nlportid)
wdev->ap_unexpected_nlportid = 0;
}
@@ -909,85 +831,6 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm,
}
EXPORT_SYMBOL(cfg80211_rx_mgmt);
-void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
- const u8 *buf, size_t len, bool ack, gfp_t gfp)
-{
- struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-
- trace_cfg80211_mgmt_tx_status(wdev, cookie, ack);
-
- /* Indicate TX status of the Action frame to user space */
- nl80211_send_mgmt_tx_status(rdev, wdev, cookie, buf, len, ack, gfp);
-}
-EXPORT_SYMBOL(cfg80211_mgmt_tx_status);
-
-void cfg80211_cqm_rssi_notify(struct net_device *dev,
- enum nl80211_cqm_rssi_threshold_event rssi_event,
- gfp_t gfp)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-
- trace_cfg80211_cqm_rssi_notify(dev, rssi_event);
-
- /* Indicate roaming trigger event to user space */
- nl80211_send_cqm_rssi_notify(rdev, dev, rssi_event, gfp);
-}
-EXPORT_SYMBOL(cfg80211_cqm_rssi_notify);
-
-void cfg80211_cqm_pktloss_notify(struct net_device *dev,
- const u8 *peer, u32 num_packets, gfp_t gfp)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-
- trace_cfg80211_cqm_pktloss_notify(dev, peer, num_packets);
-
- /* Indicate roaming trigger event to user space */
- nl80211_send_cqm_pktloss_notify(rdev, dev, peer, num_packets, gfp);
-}
-EXPORT_SYMBOL(cfg80211_cqm_pktloss_notify);
-
-void cfg80211_cqm_txe_notify(struct net_device *dev,
- const u8 *peer, u32 num_packets,
- u32 rate, u32 intvl, gfp_t gfp)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-
- nl80211_send_cqm_txe_notify(rdev, dev, peer, num_packets,
- rate, intvl, gfp);
-}
-EXPORT_SYMBOL(cfg80211_cqm_txe_notify);
-
-void cfg80211_gtk_rekey_notify(struct net_device *dev, const u8 *bssid,
- const u8 *replay_ctr, gfp_t gfp)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-
- trace_cfg80211_gtk_rekey_notify(dev, bssid);
- nl80211_gtk_rekey_notify(rdev, dev, bssid, replay_ctr, gfp);
-}
-EXPORT_SYMBOL(cfg80211_gtk_rekey_notify);
-
-void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index,
- const u8 *bssid, bool preauth, gfp_t gfp)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-
- trace_cfg80211_pmksa_candidate_notify(dev, index, bssid, preauth);
- nl80211_pmksa_candidate_notify(rdev, dev, index, bssid, preauth, gfp);
-}
-EXPORT_SYMBOL(cfg80211_pmksa_candidate_notify);
-
void cfg80211_dfs_channels_update_work(struct work_struct *work)
{
struct delayed_work *delayed_work;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 58e13a8c95f9..b14b7e3cb6e6 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -370,6 +370,14 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
[NL80211_ATTR_MAC_ADDRS] = { .type = NLA_NESTED },
[NL80211_ATTR_STA_CAPABILITY] = { .type = NLA_U16 },
[NL80211_ATTR_STA_EXT_CAPABILITY] = { .type = NLA_BINARY, },
+ [NL80211_ATTR_SPLIT_WIPHY_DUMP] = { .type = NLA_FLAG, },
+ [NL80211_ATTR_DISABLE_VHT] = { .type = NLA_FLAG },
+ [NL80211_ATTR_VHT_CAPABILITY_MASK] = {
+ .len = NL80211_VHT_CAPABILITY_LEN,
+ },
+ [NL80211_ATTR_MDID] = { .type = NLA_U16 },
+ [NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY,
+ .len = IEEE80211_MAX_DATA_LEN },
};
/* policy for the key attributes */
@@ -439,62 +447,69 @@ nl80211_match_policy[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1] = {
[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI] = { .type = NLA_U32 },
};
-/* ifidx get helper */
-static int nl80211_get_ifidx(struct netlink_callback *cb)
+static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
+ struct netlink_callback *cb,
+ struct cfg80211_registered_device **rdev,
+ struct wireless_dev **wdev)
{
- int res;
-
- res = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
- nl80211_fam.attrbuf, nl80211_fam.maxattr,
- nl80211_policy);
- if (res)
- return res;
-
- if (!nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX])
- return -EINVAL;
+ int err;
- res = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]);
- if (!res)
- return -EINVAL;
- return res;
-}
+ rtnl_lock();
+ mutex_lock(&cfg80211_mutex);
-static int nl80211_prepare_netdev_dump(struct sk_buff *skb,
- struct netlink_callback *cb,
- struct cfg80211_registered_device **rdev,
- struct net_device **dev)
-{
- int ifidx = cb->args[0];
- int err;
+ if (!cb->args[0]) {
+ err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
+ nl80211_fam.attrbuf, nl80211_fam.maxattr,
+ nl80211_policy);
+ if (err)
+ goto out_unlock;
- if (!ifidx)
- ifidx = nl80211_get_ifidx(cb);
- if (ifidx < 0)
- return ifidx;
+ *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk),
+ nl80211_fam.attrbuf);
+ if (IS_ERR(*wdev)) {
+ err = PTR_ERR(*wdev);
+ goto out_unlock;
+ }
+ *rdev = wiphy_to_dev((*wdev)->wiphy);
+ cb->args[0] = (*rdev)->wiphy_idx;
+ cb->args[1] = (*wdev)->identifier;
+ } else {
+ struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0]);
+ struct wireless_dev *tmp;
- cb->args[0] = ifidx;
+ if (!wiphy) {
+ err = -ENODEV;
+ goto out_unlock;
+ }
+ *rdev = wiphy_to_dev(wiphy);
+ *wdev = NULL;
- rtnl_lock();
+ mutex_lock(&(*rdev)->devlist_mtx);
+ list_for_each_entry(tmp, &(*rdev)->wdev_list, list) {
+ if (tmp->identifier == cb->args[1]) {
+ *wdev = tmp;
+ break;
+ }
+ }
+ mutex_unlock(&(*rdev)->devlist_mtx);
- *dev = __dev_get_by_index(sock_net(skb->sk), ifidx);
- if (!*dev) {
- err = -ENODEV;
- goto out_rtnl;
+ if (!*wdev) {
+ err = -ENODEV;
+ goto out_unlock;
+ }
}
- *rdev = cfg80211_get_dev_from_ifindex(sock_net(skb->sk), ifidx);
- if (IS_ERR(*rdev)) {
- err = PTR_ERR(*rdev);
- goto out_rtnl;
- }
+ cfg80211_lock_rdev(*rdev);
+ mutex_unlock(&cfg80211_mutex);
return 0;
- out_rtnl:
+ out_unlock:
+ mutex_unlock(&cfg80211_mutex);
rtnl_unlock();
return err;
}
-static void nl80211_finish_netdev_dump(struct cfg80211_registered_device *rdev)
+static void nl80211_finish_wdev_dump(struct cfg80211_registered_device *rdev)
{
cfg80211_unlock_rdev(rdev);
rtnl_unlock();
@@ -539,7 +554,8 @@ static inline void *nl80211hdr_put(struct sk_buff *skb, u32 portid, u32 seq,
}
static int nl80211_msg_put_channel(struct sk_buff *msg,
- struct ieee80211_channel *chan)
+ struct ieee80211_channel *chan,
+ bool large)
{
if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_FREQ,
chan->center_freq))
@@ -554,9 +570,37 @@ static int nl80211_msg_put_channel(struct sk_buff *msg,
if ((chan->flags & IEEE80211_CHAN_NO_IBSS) &&
nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_IBSS))
goto nla_put_failure;
- if ((chan->flags & IEEE80211_CHAN_RADAR) &&
- nla_put_flag(msg, NL80211_FREQUENCY_ATTR_RADAR))
- goto nla_put_failure;
+ if (chan->flags & IEEE80211_CHAN_RADAR) {
+ if (nla_put_flag(msg, NL80211_FREQUENCY_ATTR_RADAR))
+ goto nla_put_failure;
+ if (large) {
+ u32 time;
+
+ time = elapsed_jiffies_msecs(chan->dfs_state_entered);
+
+ if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_STATE,
+ chan->dfs_state))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_TIME,
+ time))
+ goto nla_put_failure;
+ }
+ }
+
+ if (large) {
+ if ((chan->flags & IEEE80211_CHAN_NO_HT40MINUS) &&
+ nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HT40_MINUS))
+ goto nla_put_failure;
+ if ((chan->flags & IEEE80211_CHAN_NO_HT40PLUS) &&
+ nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HT40_PLUS))
+ goto nla_put_failure;
+ if ((chan->flags & IEEE80211_CHAN_NO_80MHZ) &&
+ nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_80MHZ))
+ goto nla_put_failure;
+ if ((chan->flags & IEEE80211_CHAN_NO_160MHZ) &&
+ nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_160MHZ))
+ goto nla_put_failure;
+ }
if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER,
DBM_TO_MBM(chan->max_power)))
@@ -832,7 +876,8 @@ nla_put_failure:
}
static int nl80211_put_iface_combinations(struct wiphy *wiphy,
- struct sk_buff *msg)
+ struct sk_buff *msg,
+ bool large)
{
struct nlattr *nl_combis;
int i, j;
@@ -881,6 +926,10 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy,
nla_put_u32(msg, NL80211_IFACE_COMB_MAXNUM,
c->max_interfaces))
goto nla_put_failure;
+ if (large &&
+ nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS,
+ c->radar_detect_widths))
+ goto nla_put_failure;
nla_nest_end(msg, nl_combi);
}
@@ -892,412 +941,615 @@ nla_put_failure:
return -ENOBUFS;
}
-static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flags,
- struct cfg80211_registered_device *dev)
+#ifdef CONFIG_PM
+static int nl80211_send_wowlan_tcp_caps(struct cfg80211_registered_device *rdev,
+ struct sk_buff *msg)
{
- void *hdr;
- struct nlattr *nl_bands, *nl_band;
- struct nlattr *nl_freqs, *nl_freq;
- struct nlattr *nl_rates, *nl_rate;
- struct nlattr *nl_cmds;
- enum ieee80211_band band;
- struct ieee80211_channel *chan;
- struct ieee80211_rate *rate;
- int i;
- const struct ieee80211_txrx_stypes *mgmt_stypes =
- dev->wiphy.mgmt_stypes;
+ const struct wiphy_wowlan_tcp_support *tcp = rdev->wiphy.wowlan.tcp;
+ struct nlattr *nl_tcp;
- hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY);
- if (!hdr)
- return -1;
+ if (!tcp)
+ return 0;
- if (nla_put_u32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx) ||
- nla_put_string(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy)) ||
- nla_put_u32(msg, NL80211_ATTR_GENERATION,
- cfg80211_rdev_list_generation) ||
- nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT,
- dev->wiphy.retry_short) ||
- nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_LONG,
- dev->wiphy.retry_long) ||
- nla_put_u32(msg, NL80211_ATTR_WIPHY_FRAG_THRESHOLD,
- dev->wiphy.frag_threshold) ||
- nla_put_u32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD,
- dev->wiphy.rts_threshold) ||
- nla_put_u8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS,
- dev->wiphy.coverage_class) ||
- nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS,
- dev->wiphy.max_scan_ssids) ||
- nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS,
- dev->wiphy.max_sched_scan_ssids) ||
- nla_put_u16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN,
- dev->wiphy.max_scan_ie_len) ||
- nla_put_u16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN,
- dev->wiphy.max_sched_scan_ie_len) ||
- nla_put_u8(msg, NL80211_ATTR_MAX_MATCH_SETS,
- dev->wiphy.max_match_sets))
- goto nla_put_failure;
+ nl_tcp = nla_nest_start(msg, NL80211_WOWLAN_TRIG_TCP_CONNECTION);
+ if (!nl_tcp)
+ return -ENOBUFS;
- if ((dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) &&
- nla_put_flag(msg, NL80211_ATTR_SUPPORT_IBSS_RSN))
- goto nla_put_failure;
- if ((dev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) &&
- nla_put_flag(msg, NL80211_ATTR_SUPPORT_MESH_AUTH))
- goto nla_put_failure;
- if ((dev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) &&
- nla_put_flag(msg, NL80211_ATTR_SUPPORT_AP_UAPSD))
- goto nla_put_failure;
- if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) &&
- nla_put_flag(msg, NL80211_ATTR_ROAM_SUPPORT))
- goto nla_put_failure;
- if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) &&
- nla_put_flag(msg, NL80211_ATTR_TDLS_SUPPORT))
- goto nla_put_failure;
- if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) &&
- nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP))
- goto nla_put_failure;
+ if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD,
+ tcp->data_payload_max))
+ return -ENOBUFS;
- if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES,
- sizeof(u32) * dev->wiphy.n_cipher_suites,
- dev->wiphy.cipher_suites))
- goto nla_put_failure;
+ if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD,
+ tcp->data_payload_max))
+ return -ENOBUFS;
- if (nla_put_u8(msg, NL80211_ATTR_MAX_NUM_PMKIDS,
- dev->wiphy.max_num_pmkids))
- goto nla_put_failure;
+ if (tcp->seq && nla_put_flag(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ))
+ return -ENOBUFS;
- if ((dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) &&
- nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE))
- goto nla_put_failure;
+ if (tcp->tok && nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN,
+ sizeof(*tcp->tok), tcp->tok))
+ return -ENOBUFS;
- if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX,
- dev->wiphy.available_antennas_tx) ||
- nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX,
- dev->wiphy.available_antennas_rx))
- goto nla_put_failure;
+ if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_INTERVAL,
+ tcp->data_interval_max))
+ return -ENOBUFS;
- if ((dev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) &&
- nla_put_u32(msg, NL80211_ATTR_PROBE_RESP_OFFLOAD,
- dev->wiphy.probe_resp_offload))
- goto nla_put_failure;
+ if (nla_put_u32(msg, NL80211_WOWLAN_TCP_WAKE_PAYLOAD,
+ tcp->wake_payload_max))
+ return -ENOBUFS;
- if ((dev->wiphy.available_antennas_tx ||
- dev->wiphy.available_antennas_rx) && dev->ops->get_antenna) {
- u32 tx_ant = 0, rx_ant = 0;
- int res;
- res = rdev_get_antenna(dev, &tx_ant, &rx_ant);
- if (!res) {
- if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_TX,
- tx_ant) ||
- nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_RX,
- rx_ant))
- goto nla_put_failure;
- }
+ nla_nest_end(msg, nl_tcp);
+ return 0;
+}
+
+static int nl80211_send_wowlan(struct sk_buff *msg,
+ struct cfg80211_registered_device *dev,
+ bool large)
+{
+ struct nlattr *nl_wowlan;
+
+ if (!dev->wiphy.wowlan.flags && !dev->wiphy.wowlan.n_patterns)
+ return 0;
+
+ nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED);
+ if (!nl_wowlan)
+ return -ENOBUFS;
+
+ if (((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_ANY) &&
+ nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) ||
+ ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_DISCONNECT) &&
+ nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) ||
+ ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_MAGIC_PKT) &&
+ nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) ||
+ ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) &&
+ nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) ||
+ ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) &&
+ nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) ||
+ ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) &&
+ nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) ||
+ ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) &&
+ nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) ||
+ ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_RFKILL_RELEASE) &&
+ nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE)))
+ return -ENOBUFS;
+
+ if (dev->wiphy.wowlan.n_patterns) {
+ struct nl80211_wowlan_pattern_support pat = {
+ .max_patterns = dev->wiphy.wowlan.n_patterns,
+ .min_pattern_len = dev->wiphy.wowlan.pattern_min_len,
+ .max_pattern_len = dev->wiphy.wowlan.pattern_max_len,
+ .max_pkt_offset = dev->wiphy.wowlan.max_pkt_offset,
+ };
+
+ if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN,
+ sizeof(pat), &pat))
+ return -ENOBUFS;
}
- if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES,
- dev->wiphy.interface_modes))
- goto nla_put_failure;
+ if (large && nl80211_send_wowlan_tcp_caps(dev, msg))
+ return -ENOBUFS;
- nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS);
- if (!nl_bands)
- goto nla_put_failure;
+ nla_nest_end(msg, nl_wowlan);
- for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
- if (!dev->wiphy.bands[band])
- continue;
+ return 0;
+}
+#endif
- nl_band = nla_nest_start(msg, band);
- if (!nl_band)
- goto nla_put_failure;
+static int nl80211_send_band_rateinfo(struct sk_buff *msg,
+ struct ieee80211_supported_band *sband)
+{
+ struct nlattr *nl_rates, *nl_rate;
+ struct ieee80211_rate *rate;
+ int i;
- /* add HT info */
- if (dev->wiphy.bands[band]->ht_cap.ht_supported &&
- (nla_put(msg, NL80211_BAND_ATTR_HT_MCS_SET,
- sizeof(dev->wiphy.bands[band]->ht_cap.mcs),
- &dev->wiphy.bands[band]->ht_cap.mcs) ||
- nla_put_u16(msg, NL80211_BAND_ATTR_HT_CAPA,
- dev->wiphy.bands[band]->ht_cap.cap) ||
- nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_FACTOR,
- dev->wiphy.bands[band]->ht_cap.ampdu_factor) ||
- nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_DENSITY,
- dev->wiphy.bands[band]->ht_cap.ampdu_density)))
- goto nla_put_failure;
+ /* add HT info */
+ if (sband->ht_cap.ht_supported &&
+ (nla_put(msg, NL80211_BAND_ATTR_HT_MCS_SET,
+ sizeof(sband->ht_cap.mcs),
+ &sband->ht_cap.mcs) ||
+ nla_put_u16(msg, NL80211_BAND_ATTR_HT_CAPA,
+ sband->ht_cap.cap) ||
+ nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_FACTOR,
+ sband->ht_cap.ampdu_factor) ||
+ nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_DENSITY,
+ sband->ht_cap.ampdu_density)))
+ return -ENOBUFS;
- /* add VHT info */
- if (dev->wiphy.bands[band]->vht_cap.vht_supported &&
- (nla_put(msg, NL80211_BAND_ATTR_VHT_MCS_SET,
- sizeof(dev->wiphy.bands[band]->vht_cap.vht_mcs),
- &dev->wiphy.bands[band]->vht_cap.vht_mcs) ||
- nla_put_u32(msg, NL80211_BAND_ATTR_VHT_CAPA,
- dev->wiphy.bands[band]->vht_cap.cap)))
- goto nla_put_failure;
+ /* add VHT info */
+ if (sband->vht_cap.vht_supported &&
+ (nla_put(msg, NL80211_BAND_ATTR_VHT_MCS_SET,
+ sizeof(sband->vht_cap.vht_mcs),
+ &sband->vht_cap.vht_mcs) ||
+ nla_put_u32(msg, NL80211_BAND_ATTR_VHT_CAPA,
+ sband->vht_cap.cap)))
+ return -ENOBUFS;
- /* add frequencies */
- nl_freqs = nla_nest_start(msg, NL80211_BAND_ATTR_FREQS);
- if (!nl_freqs)
- goto nla_put_failure;
+ /* add bitrates */
+ nl_rates = nla_nest_start(msg, NL80211_BAND_ATTR_RATES);
+ if (!nl_rates)
+ return -ENOBUFS;
- for (i = 0; i < dev->wiphy.bands[band]->n_channels; i++) {
- nl_freq = nla_nest_start(msg, i);
- if (!nl_freq)
- goto nla_put_failure;
+ for (i = 0; i < sband->n_bitrates; i++) {
+ nl_rate = nla_nest_start(msg, i);
+ if (!nl_rate)
+ return -ENOBUFS;
- chan = &dev->wiphy.bands[band]->channels[i];
+ rate = &sband->bitrates[i];
+ if (nla_put_u32(msg, NL80211_BITRATE_ATTR_RATE,
+ rate->bitrate))
+ return -ENOBUFS;
+ if ((rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) &&
+ nla_put_flag(msg,
+ NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE))
+ return -ENOBUFS;
- if (nl80211_msg_put_channel(msg, chan))
- goto nla_put_failure;
+ nla_nest_end(msg, nl_rate);
+ }
- nla_nest_end(msg, nl_freq);
- }
+ nla_nest_end(msg, nl_rates);
- nla_nest_end(msg, nl_freqs);
+ return 0;
+}
- /* add bitrates */
- nl_rates = nla_nest_start(msg, NL80211_BAND_ATTR_RATES);
- if (!nl_rates)
- goto nla_put_failure;
+static int
+nl80211_send_mgmt_stypes(struct sk_buff *msg,
+ const struct ieee80211_txrx_stypes *mgmt_stypes)
+{
+ u16 stypes;
+ struct nlattr *nl_ftypes, *nl_ifs;
+ enum nl80211_iftype ift;
+ int i;
- for (i = 0; i < dev->wiphy.bands[band]->n_bitrates; i++) {
- nl_rate = nla_nest_start(msg, i);
- if (!nl_rate)
- goto nla_put_failure;
+ if (!mgmt_stypes)
+ return 0;
- rate = &dev->wiphy.bands[band]->bitrates[i];
- if (nla_put_u32(msg, NL80211_BITRATE_ATTR_RATE,
- rate->bitrate))
- goto nla_put_failure;
- if ((rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) &&
- nla_put_flag(msg,
- NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE))
- goto nla_put_failure;
+ nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES);
+ if (!nl_ifs)
+ return -ENOBUFS;
- nla_nest_end(msg, nl_rate);
+ for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
+ nl_ftypes = nla_nest_start(msg, ift);
+ if (!nl_ftypes)
+ return -ENOBUFS;
+ i = 0;
+ stypes = mgmt_stypes[ift].tx;
+ while (stypes) {
+ if ((stypes & 1) &&
+ nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE,
+ (i << 4) | IEEE80211_FTYPE_MGMT))
+ return -ENOBUFS;
+ stypes >>= 1;
+ i++;
}
+ nla_nest_end(msg, nl_ftypes);
+ }
+
+ nla_nest_end(msg, nl_ifs);
- nla_nest_end(msg, nl_rates);
+ nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES);
+ if (!nl_ifs)
+ return -ENOBUFS;
- nla_nest_end(msg, nl_band);
+ for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
+ nl_ftypes = nla_nest_start(msg, ift);
+ if (!nl_ftypes)
+ return -ENOBUFS;
+ i = 0;
+ stypes = mgmt_stypes[ift].rx;
+ while (stypes) {
+ if ((stypes & 1) &&
+ nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE,
+ (i << 4) | IEEE80211_FTYPE_MGMT))
+ return -ENOBUFS;
+ stypes >>= 1;
+ i++;
+ }
+ nla_nest_end(msg, nl_ftypes);
}
- nla_nest_end(msg, nl_bands);
+ nla_nest_end(msg, nl_ifs);
- nl_cmds = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_COMMANDS);
- if (!nl_cmds)
- goto nla_put_failure;
+ return 0;
+}
- i = 0;
-#define CMD(op, n) \
- do { \
- if (dev->ops->op) { \
- i++; \
- if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \
- goto nla_put_failure; \
- } \
- } while (0)
-
- CMD(add_virtual_intf, NEW_INTERFACE);
- CMD(change_virtual_intf, SET_INTERFACE);
- CMD(add_key, NEW_KEY);
- CMD(start_ap, START_AP);
- CMD(add_station, NEW_STATION);
- CMD(add_mpath, NEW_MPATH);
- CMD(update_mesh_config, SET_MESH_CONFIG);
- CMD(change_bss, SET_BSS);
- CMD(auth, AUTHENTICATE);
- CMD(assoc, ASSOCIATE);
- CMD(deauth, DEAUTHENTICATE);
- CMD(disassoc, DISASSOCIATE);
- CMD(join_ibss, JOIN_IBSS);
- CMD(join_mesh, JOIN_MESH);
- CMD(set_pmksa, SET_PMKSA);
- CMD(del_pmksa, DEL_PMKSA);
- CMD(flush_pmksa, FLUSH_PMKSA);
- if (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL)
- CMD(remain_on_channel, REMAIN_ON_CHANNEL);
- CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
- CMD(mgmt_tx, FRAME);
- CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL);
- if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
- i++;
- if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS))
- goto nla_put_failure;
+static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
+ struct sk_buff *msg, u32 portid, u32 seq,
+ int flags, bool split, long *split_start,
+ long *band_start, long *chan_start)
+{
+ void *hdr;
+ struct nlattr *nl_bands, *nl_band;
+ struct nlattr *nl_freqs, *nl_freq;
+ struct nlattr *nl_cmds;
+ enum ieee80211_band band;
+ struct ieee80211_channel *chan;
+ int i;
+ const struct ieee80211_txrx_stypes *mgmt_stypes =
+ dev->wiphy.mgmt_stypes;
+ long start = 0, start_chan = 0, start_band = 0;
+ u32 features;
+
+ hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY);
+ if (!hdr)
+ return -ENOBUFS;
+
+ /* allow always using the variables */
+ if (!split) {
+ split_start = &start;
+ band_start = &start_band;
+ chan_start = &start_chan;
}
- if (dev->ops->set_monitor_channel || dev->ops->start_ap ||
- dev->ops->join_mesh) {
- i++;
- if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL))
+
+ if (nla_put_u32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx) ||
+ nla_put_string(msg, NL80211_ATTR_WIPHY_NAME,
+ wiphy_name(&dev->wiphy)) ||
+ nla_put_u32(msg, NL80211_ATTR_GENERATION,
+ cfg80211_rdev_list_generation))
+ goto nla_put_failure;
+
+ switch (*split_start) {
+ case 0:
+ if (nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT,
+ dev->wiphy.retry_short) ||
+ nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_LONG,
+ dev->wiphy.retry_long) ||
+ nla_put_u32(msg, NL80211_ATTR_WIPHY_FRAG_THRESHOLD,
+ dev->wiphy.frag_threshold) ||
+ nla_put_u32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD,
+ dev->wiphy.rts_threshold) ||
+ nla_put_u8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS,
+ dev->wiphy.coverage_class) ||
+ nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS,
+ dev->wiphy.max_scan_ssids) ||
+ nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS,
+ dev->wiphy.max_sched_scan_ssids) ||
+ nla_put_u16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN,
+ dev->wiphy.max_scan_ie_len) ||
+ nla_put_u16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN,
+ dev->wiphy.max_sched_scan_ie_len) ||
+ nla_put_u8(msg, NL80211_ATTR_MAX_MATCH_SETS,
+ dev->wiphy.max_match_sets))
goto nla_put_failure;
- }
- CMD(set_wds_peer, SET_WDS_PEER);
- if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) {
- CMD(tdls_mgmt, TDLS_MGMT);
- CMD(tdls_oper, TDLS_OPER);
- }
- if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
- CMD(sched_scan_start, START_SCHED_SCAN);
- CMD(probe_client, PROBE_CLIENT);
- CMD(set_noack_map, SET_NOACK_MAP);
- if (dev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) {
- i++;
- if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS))
+
+ if ((dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) &&
+ nla_put_flag(msg, NL80211_ATTR_SUPPORT_IBSS_RSN))
+ goto nla_put_failure;
+ if ((dev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) &&
+ nla_put_flag(msg, NL80211_ATTR_SUPPORT_MESH_AUTH))
+ goto nla_put_failure;
+ if ((dev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) &&
+ nla_put_flag(msg, NL80211_ATTR_SUPPORT_AP_UAPSD))
+ goto nla_put_failure;
+ if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) &&
+ nla_put_flag(msg, NL80211_ATTR_ROAM_SUPPORT))
+ goto nla_put_failure;
+ if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) &&
+ nla_put_flag(msg, NL80211_ATTR_TDLS_SUPPORT))
+ goto nla_put_failure;
+ if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) &&
+ nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP))
goto nla_put_failure;
- }
- CMD(start_p2p_device, START_P2P_DEVICE);
- CMD(set_mcast_rate, SET_MCAST_RATE);
-#ifdef CONFIG_NL80211_TESTMODE
- CMD(testmode_cmd, TESTMODE);
-#endif
+ (*split_start)++;
+ if (split)
+ break;
+ case 1:
+ if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES,
+ sizeof(u32) * dev->wiphy.n_cipher_suites,
+ dev->wiphy.cipher_suites))
+ goto nla_put_failure;
-#undef CMD
+ if (nla_put_u8(msg, NL80211_ATTR_MAX_NUM_PMKIDS,
+ dev->wiphy.max_num_pmkids))
+ goto nla_put_failure;
- if (dev->ops->connect || dev->ops->auth) {
- i++;
- if (nla_put_u32(msg, i, NL80211_CMD_CONNECT))
+ if ((dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) &&
+ nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE))
goto nla_put_failure;
- }
- if (dev->ops->disconnect || dev->ops->deauth) {
- i++;
- if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT))
+ if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX,
+ dev->wiphy.available_antennas_tx) ||
+ nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX,
+ dev->wiphy.available_antennas_rx))
goto nla_put_failure;
- }
- nla_nest_end(msg, nl_cmds);
+ if ((dev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) &&
+ nla_put_u32(msg, NL80211_ATTR_PROBE_RESP_OFFLOAD,
+ dev->wiphy.probe_resp_offload))
+ goto nla_put_failure;
- if (dev->ops->remain_on_channel &&
- (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) &&
- nla_put_u32(msg, NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION,
- dev->wiphy.max_remain_on_channel_duration))
- goto nla_put_failure;
+ if ((dev->wiphy.available_antennas_tx ||
+ dev->wiphy.available_antennas_rx) &&
+ dev->ops->get_antenna) {
+ u32 tx_ant = 0, rx_ant = 0;
+ int res;
+ res = rdev_get_antenna(dev, &tx_ant, &rx_ant);
+ if (!res) {
+ if (nla_put_u32(msg,
+ NL80211_ATTR_WIPHY_ANTENNA_TX,
+ tx_ant) ||
+ nla_put_u32(msg,
+ NL80211_ATTR_WIPHY_ANTENNA_RX,
+ rx_ant))
+ goto nla_put_failure;
+ }
+ }
- if ((dev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) &&
- nla_put_flag(msg, NL80211_ATTR_OFFCHANNEL_TX_OK))
- goto nla_put_failure;
+ (*split_start)++;
+ if (split)
+ break;
+ case 2:
+ if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES,
+ dev->wiphy.interface_modes))
+ goto nla_put_failure;
+ (*split_start)++;
+ if (split)
+ break;
+ case 3:
+ nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS);
+ if (!nl_bands)
+ goto nla_put_failure;
- if (mgmt_stypes) {
- u16 stypes;
- struct nlattr *nl_ftypes, *nl_ifs;
- enum nl80211_iftype ift;
+ for (band = *band_start; band < IEEE80211_NUM_BANDS; band++) {
+ struct ieee80211_supported_band *sband;
- nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES);
- if (!nl_ifs)
- goto nla_put_failure;
+ sband = dev->wiphy.bands[band];
- for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
- nl_ftypes = nla_nest_start(msg, ift);
- if (!nl_ftypes)
+ if (!sband)
+ continue;
+
+ nl_band = nla_nest_start(msg, band);
+ if (!nl_band)
goto nla_put_failure;
- i = 0;
- stypes = mgmt_stypes[ift].tx;
- while (stypes) {
- if ((stypes & 1) &&
- nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE,
- (i << 4) | IEEE80211_FTYPE_MGMT))
+
+ switch (*chan_start) {
+ case 0:
+ if (nl80211_send_band_rateinfo(msg, sband))
goto nla_put_failure;
- stypes >>= 1;
- i++;
+ (*chan_start)++;
+ if (split)
+ break;
+ default:
+ /* add frequencies */
+ nl_freqs = nla_nest_start(
+ msg, NL80211_BAND_ATTR_FREQS);
+ if (!nl_freqs)
+ goto nla_put_failure;
+
+ for (i = *chan_start - 1;
+ i < sband->n_channels;
+ i++) {
+ nl_freq = nla_nest_start(msg, i);
+ if (!nl_freq)
+ goto nla_put_failure;
+
+ chan = &sband->channels[i];
+
+ if (nl80211_msg_put_channel(msg, chan,
+ split))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, nl_freq);
+ if (split)
+ break;
+ }
+ if (i < sband->n_channels)
+ *chan_start = i + 2;
+ else
+ *chan_start = 0;
+ nla_nest_end(msg, nl_freqs);
+ }
+
+ nla_nest_end(msg, nl_band);
+
+ if (split) {
+ /* start again here */
+ if (*chan_start)
+ band--;
+ break;
}
- nla_nest_end(msg, nl_ftypes);
}
+ nla_nest_end(msg, nl_bands);
- nla_nest_end(msg, nl_ifs);
+ if (band < IEEE80211_NUM_BANDS)
+ *band_start = band + 1;
+ else
+ *band_start = 0;
- nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES);
- if (!nl_ifs)
+ /* if bands & channels are done, continue outside */
+ if (*band_start == 0 && *chan_start == 0)
+ (*split_start)++;
+ if (split)
+ break;
+ case 4:
+ nl_cmds = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_COMMANDS);
+ if (!nl_cmds)
goto nla_put_failure;
- for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
- nl_ftypes = nla_nest_start(msg, ift);
- if (!nl_ftypes)
+ i = 0;
+#define CMD(op, n) \
+ do { \
+ if (dev->ops->op) { \
+ i++; \
+ if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \
+ goto nla_put_failure; \
+ } \
+ } while (0)
+
+ CMD(add_virtual_intf, NEW_INTERFACE);
+ CMD(change_virtual_intf, SET_INTERFACE);
+ CMD(add_key, NEW_KEY);
+ CMD(start_ap, START_AP);
+ CMD(add_station, NEW_STATION);
+ CMD(add_mpath, NEW_MPATH);
+ CMD(update_mesh_config, SET_MESH_CONFIG);
+ CMD(change_bss, SET_BSS);
+ CMD(auth, AUTHENTICATE);
+ CMD(assoc, ASSOCIATE);
+ CMD(deauth, DEAUTHENTICATE);
+ CMD(disassoc, DISASSOCIATE);
+ CMD(join_ibss, JOIN_IBSS);
+ CMD(join_mesh, JOIN_MESH);
+ CMD(set_pmksa, SET_PMKSA);
+ CMD(del_pmksa, DEL_PMKSA);
+ CMD(flush_pmksa, FLUSH_PMKSA);
+ if (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL)
+ CMD(remain_on_channel, REMAIN_ON_CHANNEL);
+ CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
+ CMD(mgmt_tx, FRAME);
+ CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL);
+ if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
+ i++;
+ if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS))
goto nla_put_failure;
- i = 0;
- stypes = mgmt_stypes[ift].rx;
- while (stypes) {
- if ((stypes & 1) &&
- nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE,
- (i << 4) | IEEE80211_FTYPE_MGMT))
- goto nla_put_failure;
- stypes >>= 1;
- i++;
- }
- nla_nest_end(msg, nl_ftypes);
}
- nla_nest_end(msg, nl_ifs);
- }
+ if (dev->ops->set_monitor_channel || dev->ops->start_ap ||
+ dev->ops->join_mesh) {
+ i++;
+ if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL))
+ goto nla_put_failure;
+ }
+ CMD(set_wds_peer, SET_WDS_PEER);
+ if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) {
+ CMD(tdls_mgmt, TDLS_MGMT);
+ CMD(tdls_oper, TDLS_OPER);
+ }
+ if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
+ CMD(sched_scan_start, START_SCHED_SCAN);
+ CMD(probe_client, PROBE_CLIENT);
+ CMD(set_noack_map, SET_NOACK_MAP);
+ if (dev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) {
+ i++;
+ if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS))
+ goto nla_put_failure;
+ }
+ CMD(start_p2p_device, START_P2P_DEVICE);
+ CMD(set_mcast_rate, SET_MCAST_RATE);
+ if (split) {
+ CMD(crit_proto_start, CRIT_PROTOCOL_START);
+ CMD(crit_proto_stop, CRIT_PROTOCOL_STOP);
+ }
-#ifdef CONFIG_PM
- if (dev->wiphy.wowlan.flags || dev->wiphy.wowlan.n_patterns) {
- struct nlattr *nl_wowlan;
+#ifdef CONFIG_NL80211_TESTMODE
+ CMD(testmode_cmd, TESTMODE);
+#endif
- nl_wowlan = nla_nest_start(msg,
- NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED);
- if (!nl_wowlan)
- goto nla_put_failure;
+#undef CMD
- if (((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_ANY) &&
- nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) ||
- ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_DISCONNECT) &&
- nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) ||
- ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_MAGIC_PKT) &&
- nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) ||
- ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) &&
- nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) ||
- ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) &&
- nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) ||
- ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) &&
- nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) ||
- ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) &&
- nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) ||
- ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_RFKILL_RELEASE) &&
- nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE)))
- goto nla_put_failure;
- if (dev->wiphy.wowlan.n_patterns) {
- struct nl80211_wowlan_pattern_support pat = {
- .max_patterns = dev->wiphy.wowlan.n_patterns,
- .min_pattern_len =
- dev->wiphy.wowlan.pattern_min_len,
- .max_pattern_len =
- dev->wiphy.wowlan.pattern_max_len,
- .max_pkt_offset =
- dev->wiphy.wowlan.max_pkt_offset,
- };
- if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN,
- sizeof(pat), &pat))
+ if (dev->ops->connect || dev->ops->auth) {
+ i++;
+ if (nla_put_u32(msg, i, NL80211_CMD_CONNECT))
goto nla_put_failure;
}
- nla_nest_end(msg, nl_wowlan);
- }
+ if (dev->ops->disconnect || dev->ops->deauth) {
+ i++;
+ if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT))
+ goto nla_put_failure;
+ }
+
+ nla_nest_end(msg, nl_cmds);
+ (*split_start)++;
+ if (split)
+ break;
+ case 5:
+ if (dev->ops->remain_on_channel &&
+ (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) &&
+ nla_put_u32(msg,
+ NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION,
+ dev->wiphy.max_remain_on_channel_duration))
+ goto nla_put_failure;
+
+ if ((dev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) &&
+ nla_put_flag(msg, NL80211_ATTR_OFFCHANNEL_TX_OK))
+ goto nla_put_failure;
+
+ if (nl80211_send_mgmt_stypes(msg, mgmt_stypes))
+ goto nla_put_failure;
+ (*split_start)++;
+ if (split)
+ break;
+ case 6:
+#ifdef CONFIG_PM
+ if (nl80211_send_wowlan(msg, dev, split))
+ goto nla_put_failure;
+ (*split_start)++;
+ if (split)
+ break;
+#else
+ (*split_start)++;
#endif
+ case 7:
+ if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES,
+ dev->wiphy.software_iftypes))
+ goto nla_put_failure;
- if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES,
- dev->wiphy.software_iftypes))
- goto nla_put_failure;
+ if (nl80211_put_iface_combinations(&dev->wiphy, msg, split))
+ goto nla_put_failure;
- if (nl80211_put_iface_combinations(&dev->wiphy, msg))
- goto nla_put_failure;
+ (*split_start)++;
+ if (split)
+ break;
+ case 8:
+ if ((dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) &&
+ nla_put_u32(msg, NL80211_ATTR_DEVICE_AP_SME,
+ dev->wiphy.ap_sme_capa))
+ goto nla_put_failure;
- if ((dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) &&
- nla_put_u32(msg, NL80211_ATTR_DEVICE_AP_SME,
- dev->wiphy.ap_sme_capa))
- goto nla_put_failure;
+ features = dev->wiphy.features;
+ /*
+ * We can only add the per-channel limit information if the
+ * dump is split, otherwise it makes it too big. Therefore
+ * only advertise it in that case.
+ */
+ if (split)
+ features |= NL80211_FEATURE_ADVERTISE_CHAN_LIMITS;
+ if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS, features))
+ goto nla_put_failure;
- if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS,
- dev->wiphy.features))
- goto nla_put_failure;
+ if (dev->wiphy.ht_capa_mod_mask &&
+ nla_put(msg, NL80211_ATTR_HT_CAPABILITY_MASK,
+ sizeof(*dev->wiphy.ht_capa_mod_mask),
+ dev->wiphy.ht_capa_mod_mask))
+ goto nla_put_failure;
- if (dev->wiphy.ht_capa_mod_mask &&
- nla_put(msg, NL80211_ATTR_HT_CAPABILITY_MASK,
- sizeof(*dev->wiphy.ht_capa_mod_mask),
- dev->wiphy.ht_capa_mod_mask))
- goto nla_put_failure;
+ if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME &&
+ dev->wiphy.max_acl_mac_addrs &&
+ nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX,
+ dev->wiphy.max_acl_mac_addrs))
+ goto nla_put_failure;
- if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME &&
- dev->wiphy.max_acl_mac_addrs &&
- nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX,
- dev->wiphy.max_acl_mac_addrs))
- goto nla_put_failure;
+ /*
+ * Any information below this point is only available to
+ * applications that can deal with it being split. This
+ * helps ensure that newly added capabilities don't break
+ * older tools by overrunning their buffers.
+ *
+ * We still increment split_start so that in the split
+ * case we'll continue with more data in the next round,
+ * but break unconditionally so unsplit data stops here.
+ */
+ (*split_start)++;
+ break;
+ case 9:
+ if (dev->wiphy.extended_capabilities &&
+ (nla_put(msg, NL80211_ATTR_EXT_CAPA,
+ dev->wiphy.extended_capabilities_len,
+ dev->wiphy.extended_capabilities) ||
+ nla_put(msg, NL80211_ATTR_EXT_CAPA_MASK,
+ dev->wiphy.extended_capabilities_len,
+ dev->wiphy.extended_capabilities_mask)))
+ goto nla_put_failure;
+
+ if (dev->wiphy.vht_capa_mod_mask &&
+ nla_put(msg, NL80211_ATTR_VHT_CAPABILITY_MASK,
+ sizeof(*dev->wiphy.vht_capa_mod_mask),
+ dev->wiphy.vht_capa_mod_mask))
+ goto nla_put_failure;
+ /* done */
+ *split_start = 0;
+ break;
+ }
return genlmsg_end(msg, hdr);
nla_put_failure:
@@ -1310,39 +1562,87 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
int idx = 0, ret;
int start = cb->args[0];
struct cfg80211_registered_device *dev;
+ s64 filter_wiphy = -1;
+ bool split = false;
+ struct nlattr **tb;
+ int res;
+
+ /* will be zeroed in nlmsg_parse() */
+ tb = kmalloc(sizeof(*tb) * (NL80211_ATTR_MAX + 1), GFP_KERNEL);
+ if (!tb)
+ return -ENOMEM;
mutex_lock(&cfg80211_mutex);
+ res = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
+ tb, NL80211_ATTR_MAX, nl80211_policy);
+ if (res == 0) {
+ split = tb[NL80211_ATTR_SPLIT_WIPHY_DUMP];
+ if (tb[NL80211_ATTR_WIPHY])
+ filter_wiphy = nla_get_u32(tb[NL80211_ATTR_WIPHY]);
+ if (tb[NL80211_ATTR_WDEV])
+ filter_wiphy = nla_get_u64(tb[NL80211_ATTR_WDEV]) >> 32;
+ if (tb[NL80211_ATTR_IFINDEX]) {
+ struct net_device *netdev;
+ int ifidx = nla_get_u32(tb[NL80211_ATTR_IFINDEX]);
+
+ netdev = dev_get_by_index(sock_net(skb->sk), ifidx);
+ if (!netdev) {
+ mutex_unlock(&cfg80211_mutex);
+ kfree(tb);
+ return -ENODEV;
+ }
+ if (netdev->ieee80211_ptr) {
+ dev = wiphy_to_dev(
+ netdev->ieee80211_ptr->wiphy);
+ filter_wiphy = dev->wiphy_idx;
+ }
+ dev_put(netdev);
+ }
+ }
+ kfree(tb);
+
list_for_each_entry(dev, &cfg80211_rdev_list, list) {
if (!net_eq(wiphy_net(&dev->wiphy), sock_net(skb->sk)))
continue;
if (++idx <= start)
continue;
- ret = nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
- dev);
- if (ret < 0) {
- /*
- * If sending the wiphy data didn't fit (ENOBUFS or
- * EMSGSIZE returned), this SKB is still empty (so
- * it's not too big because another wiphy dataset is
- * already in the skb) and we've not tried to adjust
- * the dump allocation yet ... then adjust the alloc
- * size to be bigger, and return 1 but with the empty
- * skb. This results in an empty message being RX'ed
- * in userspace, but that is ignored.
- *
- * We can then retry with the larger buffer.
- */
- if ((ret == -ENOBUFS || ret == -EMSGSIZE) &&
- !skb->len &&
- cb->min_dump_alloc < 4096) {
- cb->min_dump_alloc = 4096;
- mutex_unlock(&cfg80211_mutex);
- return 1;
+ if (filter_wiphy != -1 && dev->wiphy_idx != filter_wiphy)
+ continue;
+ /* attempt to fit multiple wiphy data chunks into the skb */
+ do {
+ ret = nl80211_send_wiphy(dev, skb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI,
+ split, &cb->args[1],
+ &cb->args[2],
+ &cb->args[3]);
+ if (ret < 0) {
+ /*
+ * If sending the wiphy data didn't fit (ENOBUFS
+ * or EMSGSIZE returned), this SKB is still
+ * empty (so it's not too big because another
+ * wiphy dataset is already in the skb) and
+ * we've not tried to adjust the dump allocation
+ * yet ... then adjust the alloc size to be
+ * bigger, and return 1 but with the empty skb.
+ * This results in an empty message being RX'ed
+ * in userspace, but that is ignored.
+ *
+ * We can then retry with the larger buffer.
+ */
+ if ((ret == -ENOBUFS || ret == -EMSGSIZE) &&
+ !skb->len &&
+ cb->min_dump_alloc < 4096) {
+ cb->min_dump_alloc = 4096;
+ mutex_unlock(&cfg80211_mutex);
+ return 1;
+ }
+ idx--;
+ break;
}
- idx--;
- break;
- }
+ } while (cb->args[1] > 0);
+ break;
}
mutex_unlock(&cfg80211_mutex);
@@ -1360,7 +1660,8 @@ static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info)
if (!msg)
return -ENOMEM;
- if (nl80211_send_wiphy(msg, info->snd_portid, info->snd_seq, 0, dev) < 0) {
+ if (nl80211_send_wiphy(dev, msg, info->snd_portid, info->snd_seq, 0,
+ false, NULL, NULL, NULL) < 0) {
nlmsg_free(msg);
return -ENOBUFS;
}
@@ -2967,6 +3268,7 @@ static int parse_station_flags(struct genl_info *info,
sta_flags = nla_data(nla);
params->sta_flags_mask = sta_flags->mask;
params->sta_flags_set = sta_flags->set;
+ params->sta_flags_set &= params->sta_flags_mask;
if ((params->sta_flags_mask |
params->sta_flags_set) & BIT(__NL80211_STA_FLAG_INVALID))
return -EINVAL;
@@ -3116,7 +3418,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq,
(u32)sinfo->rx_bytes))
goto nla_put_failure;
if ((sinfo->filled & (STATION_INFO_TX_BYTES |
- NL80211_STA_INFO_TX_BYTES64)) &&
+ STATION_INFO_TX_BYTES64)) &&
nla_put_u32(msg, NL80211_STA_INFO_TX_BYTES,
(u32)sinfo->tx_bytes))
goto nla_put_failure;
@@ -3241,15 +3543,20 @@ static int nl80211_dump_station(struct sk_buff *skb,
{
struct station_info sinfo;
struct cfg80211_registered_device *dev;
- struct net_device *netdev;
+ struct wireless_dev *wdev;
u8 mac_addr[ETH_ALEN];
- int sta_idx = cb->args[1];
+ int sta_idx = cb->args[2];
int err;
- err = nl80211_prepare_netdev_dump(skb, cb, &dev, &netdev);
+ err = nl80211_prepare_wdev_dump(skb, cb, &dev, &wdev);
if (err)
return err;
+ if (!wdev->netdev) {
+ err = -EINVAL;
+ goto out_err;
+ }
+
if (!dev->ops->dump_station) {
err = -EOPNOTSUPP;
goto out_err;
@@ -3257,7 +3564,7 @@ static int nl80211_dump_station(struct sk_buff *skb,
while (1) {
memset(&sinfo, 0, sizeof(sinfo));
- err = rdev_dump_station(dev, netdev, sta_idx,
+ err = rdev_dump_station(dev, wdev->netdev, sta_idx,
mac_addr, &sinfo);
if (err == -ENOENT)
break;
@@ -3267,7 +3574,7 @@ static int nl80211_dump_station(struct sk_buff *skb,
if (nl80211_send_station(skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- dev, netdev, mac_addr,
+ dev, wdev->netdev, mac_addr,
&sinfo) < 0)
goto out;
@@ -3276,10 +3583,10 @@ static int nl80211_dump_station(struct sk_buff *skb,
out:
- cb->args[1] = sta_idx;
+ cb->args[2] = sta_idx;
err = skb->len;
out_err:
- nl80211_finish_netdev_dump(dev);
+ nl80211_finish_wdev_dump(dev);
return err;
}
@@ -3320,6 +3627,136 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
return genlmsg_reply(msg, info);
}
+int cfg80211_check_station_change(struct wiphy *wiphy,
+ struct station_parameters *params,
+ enum cfg80211_station_type statype)
+{
+ if (params->listen_interval != -1)
+ return -EINVAL;
+ if (params->aid)
+ return -EINVAL;
+
+ /* When you run into this, adjust the code below for the new flag */
+ BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7);
+
+ switch (statype) {
+ case CFG80211_STA_MESH_PEER_KERNEL:
+ case CFG80211_STA_MESH_PEER_USER:
+ /*
+ * No ignoring the TDLS flag here -- the userspace mesh
+ * code doesn't have the bug of including TDLS in the
+ * mask everywhere.
+ */
+ if (params->sta_flags_mask &
+ ~(BIT(NL80211_STA_FLAG_AUTHENTICATED) |
+ BIT(NL80211_STA_FLAG_MFP) |
+ BIT(NL80211_STA_FLAG_AUTHORIZED)))
+ return -EINVAL;
+ break;
+ case CFG80211_STA_TDLS_PEER_SETUP:
+ case CFG80211_STA_TDLS_PEER_ACTIVE:
+ if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)))
+ return -EINVAL;
+ /* ignore since it can't change */
+ params->sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
+ break;
+ default:
+ /* disallow mesh-specific things */
+ if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION)
+ return -EINVAL;
+ if (params->local_pm)
+ return -EINVAL;
+ if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE)
+ return -EINVAL;
+ }
+
+ if (statype != CFG80211_STA_TDLS_PEER_SETUP &&
+ statype != CFG80211_STA_TDLS_PEER_ACTIVE) {
+ /* TDLS can't be set, ... */
+ if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
+ return -EINVAL;
+ /*
+ * ... but don't bother the driver with it. This works around
+ * a hostapd/wpa_supplicant issue -- it always includes the
+ * TLDS_PEER flag in the mask even for AP mode.
+ */
+ params->sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
+ }
+
+ if (statype != CFG80211_STA_TDLS_PEER_SETUP) {
+ /* reject other things that can't change */
+ if (params->sta_modify_mask & STATION_PARAM_APPLY_UAPSD)
+ return -EINVAL;
+ if (params->sta_modify_mask & STATION_PARAM_APPLY_CAPABILITY)
+ return -EINVAL;
+ if (params->supported_rates)
+ return -EINVAL;
+ if (params->ext_capab || params->ht_capa || params->vht_capa)
+ return -EINVAL;
+ }
+
+ if (statype != CFG80211_STA_AP_CLIENT) {
+ if (params->vlan)
+ return -EINVAL;
+ }
+
+ switch (statype) {
+ case CFG80211_STA_AP_MLME_CLIENT:
+ /* Use this only for authorizing/unauthorizing a station */
+ if (!(params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED)))
+ return -EOPNOTSUPP;
+ break;
+ case CFG80211_STA_AP_CLIENT:
+ /* accept only the listed bits */
+ if (params->sta_flags_mask &
+ ~(BIT(NL80211_STA_FLAG_AUTHORIZED) |
+ BIT(NL80211_STA_FLAG_AUTHENTICATED) |
+ BIT(NL80211_STA_FLAG_ASSOCIATED) |
+ BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) |
+ BIT(NL80211_STA_FLAG_WME) |
+ BIT(NL80211_STA_FLAG_MFP)))
+ return -EINVAL;
+
+ /* but authenticated/associated only if driver handles it */
+ if (!(wiphy->features & NL80211_FEATURE_FULL_AP_CLIENT_STATE) &&
+ params->sta_flags_mask &
+ (BIT(NL80211_STA_FLAG_AUTHENTICATED) |
+ BIT(NL80211_STA_FLAG_ASSOCIATED)))
+ return -EINVAL;
+ break;
+ case CFG80211_STA_IBSS:
+ case CFG80211_STA_AP_STA:
+ /* reject any changes other than AUTHORIZED */
+ if (params->sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED))
+ return -EINVAL;
+ break;
+ case CFG80211_STA_TDLS_PEER_SETUP:
+ /* reject any changes other than AUTHORIZED or WME */
+ if (params->sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) |
+ BIT(NL80211_STA_FLAG_WME)))
+ return -EINVAL;
+ /* force (at least) rates when authorizing */
+ if (params->sta_flags_set & BIT(NL80211_STA_FLAG_AUTHORIZED) &&
+ !params->supported_rates)
+ return -EINVAL;
+ break;
+ case CFG80211_STA_TDLS_PEER_ACTIVE:
+ /* reject any changes */
+ return -EINVAL;
+ case CFG80211_STA_MESH_PEER_KERNEL:
+ if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE)
+ return -EINVAL;
+ break;
+ case CFG80211_STA_MESH_PEER_USER:
+ if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION)
+ return -EINVAL;
+ break;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(cfg80211_check_station_change);
+
/*
* Get vlan interface making sure it is running and on the right wiphy.
*/
@@ -3342,6 +3779,13 @@ static struct net_device *get_vlan(struct genl_info *info,
goto error;
}
+ if (v->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
+ v->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
+ v->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) {
+ ret = -EINVAL;
+ goto error;
+ }
+
if (!netif_running(v)) {
ret = -ENETDOWN;
goto error;
@@ -3359,21 +3803,13 @@ nl80211_sta_wme_policy[NL80211_STA_WME_MAX + 1] __read_mostly = {
[NL80211_STA_WME_MAX_SP] = { .type = NLA_U8 },
};
-static int nl80211_set_station_tdls(struct genl_info *info,
- struct station_parameters *params)
+static int nl80211_parse_sta_wme(struct genl_info *info,
+ struct station_parameters *params)
{
struct nlattr *tb[NL80211_STA_WME_MAX + 1];
struct nlattr *nla;
int err;
- /* Dummy STA entry gets updated once the peer capabilities are known */
- if (info->attrs[NL80211_ATTR_HT_CAPABILITY])
- params->ht_capa =
- nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]);
- if (info->attrs[NL80211_ATTR_VHT_CAPABILITY])
- params->vht_capa =
- nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]);
-
/* parse WME attributes if present */
if (!info->attrs[NL80211_ATTR_STA_WME])
return 0;
@@ -3401,18 +3837,34 @@ static int nl80211_set_station_tdls(struct genl_info *info,
return 0;
}
+static int nl80211_set_station_tdls(struct genl_info *info,
+ struct station_parameters *params)
+{
+ /* Dummy STA entry gets updated once the peer capabilities are known */
+ if (info->attrs[NL80211_ATTR_HT_CAPABILITY])
+ params->ht_capa =
+ nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]);
+ if (info->attrs[NL80211_ATTR_VHT_CAPABILITY])
+ params->vht_capa =
+ nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]);
+
+ return nl80211_parse_sta_wme(info, params);
+}
+
static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
- int err;
struct net_device *dev = info->user_ptr[1];
struct station_parameters params;
- u8 *mac_addr = NULL;
+ u8 *mac_addr;
+ int err;
memset(&params, 0, sizeof(params));
params.listen_interval = -1;
- params.plink_state = -1;
+
+ if (!rdev->ops->change_station)
+ return -EOPNOTSUPP;
if (info->attrs[NL80211_ATTR_STA_AID])
return -EINVAL;
@@ -3445,19 +3897,23 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL])
return -EINVAL;
- if (!rdev->ops->change_station)
- return -EOPNOTSUPP;
-
if (parse_station_flags(info, dev->ieee80211_ptr->iftype, &params))
return -EINVAL;
- if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION])
+ if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) {
params.plink_action =
- nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
+ nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
+ if (params.plink_action >= NUM_NL80211_PLINK_ACTIONS)
+ return -EINVAL;
+ }
- if (info->attrs[NL80211_ATTR_STA_PLINK_STATE])
+ if (info->attrs[NL80211_ATTR_STA_PLINK_STATE]) {
params.plink_state =
- nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]);
+ nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]);
+ if (params.plink_state >= NUM_NL80211_PLINK_STATES)
+ return -EINVAL;
+ params.sta_modify_mask |= STATION_PARAM_APPLY_PLINK_STATE;
+ }
if (info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]) {
enum nl80211_mesh_power_mode pm = nla_get_u32(
@@ -3470,127 +3926,33 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
params.local_pm = pm;
}
+ /* Include parameters for TDLS peer (will check later) */
+ err = nl80211_set_station_tdls(info, &params);
+ if (err)
+ return err;
+
+ params.vlan = get_vlan(info, rdev);
+ if (IS_ERR(params.vlan))
+ return PTR_ERR(params.vlan);
+
switch (dev->ieee80211_ptr->iftype) {
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_AP_VLAN:
case NL80211_IFTYPE_P2P_GO:
- /* disallow mesh-specific things */
- if (params.plink_action)
- return -EINVAL;
- if (params.local_pm)
- return -EINVAL;
-
- /* TDLS can't be set, ... */
- if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
- return -EINVAL;
- /*
- * ... but don't bother the driver with it. This works around
- * a hostapd/wpa_supplicant issue -- it always includes the
- * TLDS_PEER flag in the mask even for AP mode.
- */
- params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
-
- /* accept only the listed bits */
- if (params.sta_flags_mask &
- ~(BIT(NL80211_STA_FLAG_AUTHORIZED) |
- BIT(NL80211_STA_FLAG_AUTHENTICATED) |
- BIT(NL80211_STA_FLAG_ASSOCIATED) |
- BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) |
- BIT(NL80211_STA_FLAG_WME) |
- BIT(NL80211_STA_FLAG_MFP)))
- return -EINVAL;
-
- /* but authenticated/associated only if driver handles it */
- if (!(rdev->wiphy.features &
- NL80211_FEATURE_FULL_AP_CLIENT_STATE) &&
- params.sta_flags_mask &
- (BIT(NL80211_STA_FLAG_AUTHENTICATED) |
- BIT(NL80211_STA_FLAG_ASSOCIATED)))
- return -EINVAL;
-
- /* reject other things that can't change */
- if (params.supported_rates)
- return -EINVAL;
- if (info->attrs[NL80211_ATTR_STA_CAPABILITY])
- return -EINVAL;
- if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY])
- return -EINVAL;
- if (info->attrs[NL80211_ATTR_HT_CAPABILITY] ||
- info->attrs[NL80211_ATTR_VHT_CAPABILITY])
- return -EINVAL;
-
- /* must be last in here for error handling */
- params.vlan = get_vlan(info, rdev);
- if (IS_ERR(params.vlan))
- return PTR_ERR(params.vlan);
- break;
case NL80211_IFTYPE_P2P_CLIENT:
case NL80211_IFTYPE_STATION:
- /*
- * Don't allow userspace to change the TDLS_PEER flag,
- * but silently ignore attempts to change it since we
- * don't have state here to verify that it doesn't try
- * to change the flag.
- */
- params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
- /* Include parameters for TDLS peer (driver will check) */
- err = nl80211_set_station_tdls(info, &params);
- if (err)
- return err;
- /* disallow things sta doesn't support */
- if (params.plink_action)
- return -EINVAL;
- if (params.local_pm)
- return -EINVAL;
- /* reject any changes other than AUTHORIZED or WME (for TDLS) */
- if (params.sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) |
- BIT(NL80211_STA_FLAG_WME)))
- return -EINVAL;
- break;
case NL80211_IFTYPE_ADHOC:
- /* disallow things sta doesn't support */
- if (params.plink_action)
- return -EINVAL;
- if (params.local_pm)
- return -EINVAL;
- if (info->attrs[NL80211_ATTR_HT_CAPABILITY] ||
- info->attrs[NL80211_ATTR_VHT_CAPABILITY])
- return -EINVAL;
- /* reject any changes other than AUTHORIZED */
- if (params.sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED))
- return -EINVAL;
- break;
case NL80211_IFTYPE_MESH_POINT:
- /* disallow things mesh doesn't support */
- if (params.vlan)
- return -EINVAL;
- if (params.supported_rates)
- return -EINVAL;
- if (info->attrs[NL80211_ATTR_STA_CAPABILITY])
- return -EINVAL;
- if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY])
- return -EINVAL;
- if (info->attrs[NL80211_ATTR_HT_CAPABILITY] ||
- info->attrs[NL80211_ATTR_VHT_CAPABILITY])
- return -EINVAL;
- /*
- * No special handling for TDLS here -- the userspace
- * mesh code doesn't have this bug.
- */
- if (params.sta_flags_mask &
- ~(BIT(NL80211_STA_FLAG_AUTHENTICATED) |
- BIT(NL80211_STA_FLAG_MFP) |
- BIT(NL80211_STA_FLAG_AUTHORIZED)))
- return -EINVAL;
break;
default:
- return -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+ goto out_put_vlan;
}
- /* be aware of params.vlan when changing code here */
-
+ /* driver will call cfg80211_check_station_change() */
err = rdev_change_station(rdev, dev, mac_addr, &params);
+ out_put_vlan:
if (params.vlan)
dev_put(params.vlan);
@@ -3607,6 +3969,9 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
memset(&params, 0, sizeof(params));
+ if (!rdev->ops->add_station)
+ return -EOPNOTSUPP;
+
if (!info->attrs[NL80211_ATTR_MAC])
return -EINVAL;
@@ -3652,50 +4017,32 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
params.vht_capa =
nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]);
- if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION])
+ if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) {
params.plink_action =
- nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
+ nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
+ if (params.plink_action >= NUM_NL80211_PLINK_ACTIONS)
+ return -EINVAL;
+ }
- if (!rdev->ops->add_station)
- return -EOPNOTSUPP;
+ err = nl80211_parse_sta_wme(info, &params);
+ if (err)
+ return err;
if (parse_station_flags(info, dev->ieee80211_ptr->iftype, &params))
return -EINVAL;
+ /* When you run into this, adjust the code below for the new flag */
+ BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7);
+
switch (dev->ieee80211_ptr->iftype) {
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_AP_VLAN:
case NL80211_IFTYPE_P2P_GO:
- /* parse WME attributes if sta is WME capable */
- if ((rdev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) &&
- (params.sta_flags_set & BIT(NL80211_STA_FLAG_WME)) &&
- info->attrs[NL80211_ATTR_STA_WME]) {
- struct nlattr *tb[NL80211_STA_WME_MAX + 1];
- struct nlattr *nla;
-
- nla = info->attrs[NL80211_ATTR_STA_WME];
- err = nla_parse_nested(tb, NL80211_STA_WME_MAX, nla,
- nl80211_sta_wme_policy);
- if (err)
- return err;
-
- if (tb[NL80211_STA_WME_UAPSD_QUEUES])
- params.uapsd_queues =
- nla_get_u8(tb[NL80211_STA_WME_UAPSD_QUEUES]);
- if (params.uapsd_queues &
- ~IEEE80211_WMM_IE_STA_QOSINFO_AC_MASK)
- return -EINVAL;
-
- if (tb[NL80211_STA_WME_MAX_SP])
- params.max_sp =
- nla_get_u8(tb[NL80211_STA_WME_MAX_SP]);
-
- if (params.max_sp &
- ~IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK)
- return -EINVAL;
+ /* ignore WME attributes if iface/sta is not capable */
+ if (!(rdev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) ||
+ !(params.sta_flags_set & BIT(NL80211_STA_FLAG_WME)))
+ params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD;
- params.sta_modify_mask |= STATION_PARAM_APPLY_UAPSD;
- }
/* TDLS peers cannot be added */
if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
return -EINVAL;
@@ -3716,6 +4063,9 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
return PTR_ERR(params.vlan);
break;
case NL80211_IFTYPE_MESH_POINT:
+ /* ignore uAPSD data */
+ params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD;
+
/* associated is disallowed */
if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED))
return -EINVAL;
@@ -3724,8 +4074,14 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
break;
case NL80211_IFTYPE_STATION:
- /* associated is disallowed */
- if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED))
+ case NL80211_IFTYPE_P2P_CLIENT:
+ /* ignore uAPSD data */
+ params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD;
+
+ /* these are disallowed */
+ if (params.sta_flags_mask &
+ (BIT(NL80211_STA_FLAG_ASSOCIATED) |
+ BIT(NL80211_STA_FLAG_AUTHENTICATED)))
return -EINVAL;
/* Only TDLS peers can be added */
if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)))
@@ -3736,6 +4092,11 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
/* ... with external setup is supported */
if (!(rdev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP))
return -EOPNOTSUPP;
+ /*
+ * Older wpa_supplicant versions always mark the TDLS peer
+ * as authorized, but it shouldn't yet be.
+ */
+ params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_AUTHORIZED);
break;
default:
return -EOPNOTSUPP;
@@ -3829,13 +4190,13 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
{
struct mpath_info pinfo;
struct cfg80211_registered_device *dev;
- struct net_device *netdev;
+ struct wireless_dev *wdev;
u8 dst[ETH_ALEN];
u8 next_hop[ETH_ALEN];
- int path_idx = cb->args[1];
+ int path_idx = cb->args[2];
int err;
- err = nl80211_prepare_netdev_dump(skb, cb, &dev, &netdev);
+ err = nl80211_prepare_wdev_dump(skb, cb, &dev, &wdev);
if (err)
return err;
@@ -3844,14 +4205,14 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
goto out_err;
}
- if (netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) {
+ if (wdev->iftype != NL80211_IFTYPE_MESH_POINT) {
err = -EOPNOTSUPP;
goto out_err;
}
while (1) {
- err = rdev_dump_mpath(dev, netdev, path_idx, dst, next_hop,
- &pinfo);
+ err = rdev_dump_mpath(dev, wdev->netdev, path_idx, dst,
+ next_hop, &pinfo);
if (err == -ENOENT)
break;
if (err)
@@ -3859,7 +4220,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- netdev, dst, next_hop,
+ wdev->netdev, dst, next_hop,
&pinfo) < 0)
goto out;
@@ -3868,10 +4229,10 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
out:
- cb->args[1] = path_idx;
+ cb->args[2] = path_idx;
err = skb->len;
out_err:
- nl80211_finish_netdev_dump(dev);
+ nl80211_finish_wdev_dump(dev);
return err;
}
@@ -4280,6 +4641,7 @@ static const struct nla_policy
[NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 },
[NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 },
[NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG },
+ [NL80211_MESH_SETUP_USERSPACE_MPM] = { .type = NLA_FLAG },
[NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY,
.len = IEEE80211_MAX_DATA_LEN },
[NL80211_MESH_SETUP_USERSPACE_AMPE] = { .type = NLA_FLAG },
@@ -4418,6 +4780,7 @@ do { \
static int nl80211_parse_mesh_setup(struct genl_info *info,
struct mesh_setup *setup)
{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct nlattr *tb[NL80211_MESH_SETUP_ATTR_MAX + 1];
if (!info->attrs[NL80211_ATTR_MESH_SETUP])
@@ -4454,8 +4817,14 @@ static int nl80211_parse_mesh_setup(struct genl_info *info,
setup->ie = nla_data(ieattr);
setup->ie_len = nla_len(ieattr);
}
+ if (tb[NL80211_MESH_SETUP_USERSPACE_MPM] &&
+ !(rdev->wiphy.features & NL80211_FEATURE_USERSPACE_MPM))
+ return -EINVAL;
+ setup->user_mpm = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_MPM]);
setup->is_authenticated = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AUTH]);
setup->is_secure = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AMPE]);
+ if (setup->is_secure)
+ setup->user_mpm = true;
return 0;
}
@@ -5219,9 +5588,13 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
genl_dump_check_consistent(cb, hdr, &nl80211_fam);
- if (nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->bss_generation) ||
+ if (nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->bss_generation))
+ goto nla_put_failure;
+ if (wdev->netdev &&
nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex))
goto nla_put_failure;
+ if (nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)))
+ goto nla_put_failure;
bss = nla_nest_start(msg, NL80211_ATTR_BSS);
if (!bss)
@@ -5301,22 +5674,18 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
return -EMSGSIZE;
}
-static int nl80211_dump_scan(struct sk_buff *skb,
- struct netlink_callback *cb)
+static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb)
{
struct cfg80211_registered_device *rdev;
- struct net_device *dev;
struct cfg80211_internal_bss *scan;
struct wireless_dev *wdev;
- int start = cb->args[1], idx = 0;
+ int start = cb->args[2], idx = 0;
int err;
- err = nl80211_prepare_netdev_dump(skb, cb, &rdev, &dev);
+ err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
if (err)
return err;
- wdev = dev->ieee80211_ptr;
-
wdev_lock(wdev);
spin_lock_bh(&rdev->bss_lock);
cfg80211_bss_expire(rdev);
@@ -5337,8 +5706,8 @@ static int nl80211_dump_scan(struct sk_buff *skb,
spin_unlock_bh(&rdev->bss_lock);
wdev_unlock(wdev);
- cb->args[1] = idx;
- nl80211_finish_netdev_dump(rdev);
+ cb->args[2] = idx;
+ nl80211_finish_wdev_dump(rdev);
return skb->len;
}
@@ -5407,14 +5776,19 @@ static int nl80211_dump_survey(struct sk_buff *skb,
{
struct survey_info survey;
struct cfg80211_registered_device *dev;
- struct net_device *netdev;
- int survey_idx = cb->args[1];
+ struct wireless_dev *wdev;
+ int survey_idx = cb->args[2];
int res;
- res = nl80211_prepare_netdev_dump(skb, cb, &dev, &netdev);
+ res = nl80211_prepare_wdev_dump(skb, cb, &dev, &wdev);
if (res)
return res;
+ if (!wdev->netdev) {
+ res = -EINVAL;
+ goto out_err;
+ }
+
if (!dev->ops->dump_survey) {
res = -EOPNOTSUPP;
goto out_err;
@@ -5423,7 +5797,7 @@ static int nl80211_dump_survey(struct sk_buff *skb,
while (1) {
struct ieee80211_channel *chan;
- res = rdev_dump_survey(dev, netdev, survey_idx, &survey);
+ res = rdev_dump_survey(dev, wdev->netdev, survey_idx, &survey);
if (res == -ENOENT)
break;
if (res)
@@ -5445,17 +5819,16 @@ static int nl80211_dump_survey(struct sk_buff *skb,
if (nl80211_send_survey(skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- netdev,
- &survey) < 0)
+ wdev->netdev, &survey) < 0)
goto out;
survey_idx++;
}
out:
- cb->args[1] = survey_idx;
+ cb->args[2] = survey_idx;
res = skb->len;
out_err:
- nl80211_finish_netdev_dump(dev);
+ nl80211_finish_wdev_dump(dev);
return res;
}
@@ -5663,14 +6036,10 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
- struct cfg80211_crypto_settings crypto;
struct ieee80211_channel *chan;
- const u8 *bssid, *ssid, *ie = NULL, *prev_bssid = NULL;
- int err, ssid_len, ie_len = 0;
- bool use_mfp = false;
- u32 flags = 0;
- struct ieee80211_ht_cap *ht_capa = NULL;
- struct ieee80211_ht_cap *ht_capa_mask = NULL;
+ struct cfg80211_assoc_request req = {};
+ const u8 *bssid, *ssid;
+ int err, ssid_len = 0;
if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
return -EINVAL;
@@ -5698,41 +6067,58 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
if (info->attrs[NL80211_ATTR_IE]) {
- ie = nla_data(info->attrs[NL80211_ATTR_IE]);
- ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
+ req.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
+ req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
}
if (info->attrs[NL80211_ATTR_USE_MFP]) {
enum nl80211_mfp mfp =
nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]);
if (mfp == NL80211_MFP_REQUIRED)
- use_mfp = true;
+ req.use_mfp = true;
else if (mfp != NL80211_MFP_NO)
return -EINVAL;
}
if (info->attrs[NL80211_ATTR_PREV_BSSID])
- prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]);
+ req.prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]);
if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HT]))
- flags |= ASSOC_REQ_DISABLE_HT;
+ req.flags |= ASSOC_REQ_DISABLE_HT;
if (info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK])
- ht_capa_mask =
- nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]);
+ memcpy(&req.ht_capa_mask,
+ nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]),
+ sizeof(req.ht_capa_mask));
if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) {
- if (!ht_capa_mask)
+ if (!info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK])
+ return -EINVAL;
+ memcpy(&req.ht_capa,
+ nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]),
+ sizeof(req.ht_capa));
+ }
+
+ if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_VHT]))
+ req.flags |= ASSOC_REQ_DISABLE_VHT;
+
+ if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK])
+ memcpy(&req.vht_capa_mask,
+ nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]),
+ sizeof(req.vht_capa_mask));
+
+ if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) {
+ if (!info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK])
return -EINVAL;
- ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]);
+ memcpy(&req.vht_capa,
+ nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]),
+ sizeof(req.vht_capa));
}
- err = nl80211_crypto_settings(rdev, info, &crypto, 1);
+ err = nl80211_crypto_settings(rdev, info, &req.crypto, 1);
if (!err)
- err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid,
- ssid, ssid_len, ie, ie_len, use_mfp,
- &crypto, flags, ht_capa,
- ht_capa_mask);
+ err = cfg80211_mlme_assoc(rdev, dev, chan, bssid,
+ ssid, ssid_len, &req);
return err;
}
@@ -6312,6 +6698,24 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
sizeof(connect.ht_capa));
}
+ if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_VHT]))
+ connect.flags |= ASSOC_REQ_DISABLE_VHT;
+
+ if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK])
+ memcpy(&connect.vht_capa_mask,
+ nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]),
+ sizeof(connect.vht_capa_mask));
+
+ if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) {
+ if (!info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) {
+ kfree(connkeys);
+ return -EINVAL;
+ }
+ memcpy(&connect.vht_capa,
+ nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]),
+ sizeof(connect.vht_capa));
+ }
+
err = cfg80211_connect(rdev, dev, &connect, connkeys);
if (err)
kfree(connkeys);
@@ -7085,6 +7489,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
return err;
}
+ if (setup.user_mpm)
+ cfg.auto_open_plinks = false;
+
if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
err = nl80211_parse_chandef(rdev, info, &setup.chandef);
if (err)
@@ -7177,6 +7584,8 @@ static int nl80211_send_wowlan_tcp(struct sk_buff *msg,
&tcp->payload_tok))
return -ENOBUFS;
+ nla_nest_end(msg, nl_tcp);
+
return 0;
}
@@ -7284,7 +7693,8 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev,
return -EINVAL;
if (nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) >
- rdev->wiphy.wowlan.tcp->data_interval_max)
+ rdev->wiphy.wowlan.tcp->data_interval_max ||
+ nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) == 0)
return -EINVAL;
wake_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]);
@@ -7762,10 +8172,118 @@ static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info)
if (!rdev->ops->stop_p2p_device)
return -EOPNOTSUPP;
+ mutex_lock(&rdev->devlist_mtx);
mutex_lock(&rdev->sched_scan_mtx);
cfg80211_stop_p2p_device(rdev, wdev);
mutex_unlock(&rdev->sched_scan_mtx);
+ mutex_unlock(&rdev->devlist_mtx);
+
+ return 0;
+}
+
+static int nl80211_get_protocol_features(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ void *hdr;
+ struct sk_buff *msg;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
+ NL80211_CMD_GET_PROTOCOL_FEATURES);
+ if (!hdr)
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, NL80211_ATTR_PROTOCOL_FEATURES,
+ NL80211_PROTOCOL_FEATURE_SPLIT_WIPHY_DUMP))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return genlmsg_reply(msg, info);
+
+ nla_put_failure:
+ kfree_skb(msg);
+ return -ENOBUFS;
+}
+
+static int nl80211_update_ft_ies(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct cfg80211_update_ft_ies_params ft_params;
+ struct net_device *dev = info->user_ptr[1];
+
+ if (!rdev->ops->update_ft_ies)
+ return -EOPNOTSUPP;
+
+ if (!info->attrs[NL80211_ATTR_MDID] ||
+ !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
+ return -EINVAL;
+
+ memset(&ft_params, 0, sizeof(ft_params));
+ ft_params.md = nla_get_u16(info->attrs[NL80211_ATTR_MDID]);
+ ft_params.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
+ ft_params.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
+ return rdev_update_ft_ies(rdev, dev, &ft_params);
+}
+
+static int nl80211_crit_protocol_start(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct wireless_dev *wdev = info->user_ptr[1];
+ enum nl80211_crit_proto_id proto = NL80211_CRIT_PROTO_UNSPEC;
+ u16 duration;
+ int ret;
+
+ if (!rdev->ops->crit_proto_start)
+ return -EOPNOTSUPP;
+
+ if (WARN_ON(!rdev->ops->crit_proto_stop))
+ return -EINVAL;
+
+ if (rdev->crit_proto_nlportid)
+ return -EBUSY;
+
+ /* determine protocol if provided */
+ if (info->attrs[NL80211_ATTR_CRIT_PROT_ID])
+ proto = nla_get_u16(info->attrs[NL80211_ATTR_CRIT_PROT_ID]);
+
+ if (proto >= NUM_NL80211_CRIT_PROTO)
+ return -EINVAL;
+
+ /* timeout must be provided */
+ if (!info->attrs[NL80211_ATTR_MAX_CRIT_PROT_DURATION])
+ return -EINVAL;
+
+ duration =
+ nla_get_u16(info->attrs[NL80211_ATTR_MAX_CRIT_PROT_DURATION]);
+
+ if (duration > NL80211_CRIT_PROTO_MAX_DURATION)
+ return -ERANGE;
+
+ ret = rdev_crit_proto_start(rdev, wdev, proto, duration);
+ if (!ret)
+ rdev->crit_proto_nlportid = info->snd_portid;
+
+ return ret;
+}
+
+static int nl80211_crit_protocol_stop(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct wireless_dev *wdev = info->user_ptr[1];
+
+ if (!rdev->ops->crit_proto_stop)
+ return -EOPNOTSUPP;
+
+ if (rdev->crit_proto_nlportid) {
+ rdev->crit_proto_nlportid = 0;
+ rdev_crit_proto_stop(rdev, wdev);
+ }
return 0;
}
@@ -8445,6 +8963,35 @@ static struct genl_ops nl80211_ops[] = {
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
+ {
+ .cmd = NL80211_CMD_GET_PROTOCOL_FEATURES,
+ .doit = nl80211_get_protocol_features,
+ .policy = nl80211_policy,
+ },
+ {
+ .cmd = NL80211_CMD_UPDATE_FT_IES,
+ .doit = nl80211_update_ft_ies,
+ .policy = nl80211_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL80211_CMD_CRIT_PROTOCOL_START,
+ .doit = nl80211_crit_protocol_start,
+ .policy = nl80211_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
+ NL80211_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL80211_CMD_CRIT_PROTOCOL_STOP,
+ .doit = nl80211_crit_protocol_stop,
+ .policy = nl80211_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
+ NL80211_FLAG_NEED_RTNL,
+ }
};
static struct genl_multicast_group nl80211_mlme_mcgrp = {
@@ -8472,7 +9019,8 @@ void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev)
if (!msg)
return;
- if (nl80211_send_wiphy(msg, 0, 0, 0, rdev) < 0) {
+ if (nl80211_send_wiphy(rdev, msg, 0, 0, 0,
+ false, NULL, NULL, NULL) < 0) {
nlmsg_free(msg);
return;
}
@@ -8796,21 +9344,31 @@ void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
NL80211_CMD_DISASSOCIATE, gfp);
}
-void nl80211_send_unprot_deauth(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, const u8 *buf,
- size_t len, gfp_t gfp)
+void cfg80211_send_unprot_deauth(struct net_device *dev, const u8 *buf,
+ size_t len)
{
- nl80211_send_mlme_event(rdev, netdev, buf, len,
- NL80211_CMD_UNPROT_DEAUTHENTICATE, gfp);
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct wiphy *wiphy = wdev->wiphy;
+ struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+ trace_cfg80211_send_unprot_deauth(dev);
+ nl80211_send_mlme_event(rdev, dev, buf, len,
+ NL80211_CMD_UNPROT_DEAUTHENTICATE, GFP_ATOMIC);
}
+EXPORT_SYMBOL(cfg80211_send_unprot_deauth);
-void nl80211_send_unprot_disassoc(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, const u8 *buf,
- size_t len, gfp_t gfp)
+void cfg80211_send_unprot_disassoc(struct net_device *dev, const u8 *buf,
+ size_t len)
{
- nl80211_send_mlme_event(rdev, netdev, buf, len,
- NL80211_CMD_UNPROT_DISASSOCIATE, gfp);
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct wiphy *wiphy = wdev->wiphy;
+ struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+ trace_cfg80211_send_unprot_disassoc(dev);
+ nl80211_send_mlme_event(rdev, dev, buf, len,
+ NL80211_CMD_UNPROT_DISASSOCIATE, GFP_ATOMIC);
}
+EXPORT_SYMBOL(cfg80211_send_unprot_disassoc);
static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev,
struct net_device *netdev, int cmd,
@@ -9013,14 +9571,19 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
nlmsg_free(msg);
}
-void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev,
- struct net_device *netdev,
- const u8 *macaddr, const u8* ie, u8 ie_len,
- gfp_t gfp)
+void cfg80211_notify_new_peer_candidate(struct net_device *dev, const u8 *addr,
+ const u8* ie, u8 ie_len, gfp_t gfp)
{
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
struct sk_buff *msg;
void *hdr;
+ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_MESH_POINT))
+ return;
+
+ trace_cfg80211_notify_new_peer_candidate(dev, addr);
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
if (!msg)
return;
@@ -9032,8 +9595,8 @@ void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev,
}
if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
- nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) ||
- nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, macaddr) ||
+ nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
+ nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) ||
(ie_len && ie &&
nla_put(msg, NL80211_ATTR_IE, ie_len , ie)))
goto nla_put_failure;
@@ -9048,6 +9611,7 @@ void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev,
genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
+EXPORT_SYMBOL(cfg80211_notify_new_peer_candidate);
void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
struct net_device *netdev, const u8 *addr,
@@ -9116,7 +9680,7 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy,
nl_freq = nla_nest_start(msg, NL80211_ATTR_FREQ_BEFORE);
if (!nl_freq)
goto nla_put_failure;
- if (nl80211_msg_put_channel(msg, channel_before))
+ if (nl80211_msg_put_channel(msg, channel_before, false))
goto nla_put_failure;
nla_nest_end(msg, nl_freq);
@@ -9124,7 +9688,7 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy,
nl_freq = nla_nest_start(msg, NL80211_ATTR_FREQ_AFTER);
if (!nl_freq)
goto nla_put_failure;
- if (nl80211_msg_put_channel(msg, channel_after))
+ if (nl80211_msg_put_channel(msg, channel_after, false))
goto nla_put_failure;
nla_nest_end(msg, nl_freq);
@@ -9186,31 +9750,42 @@ static void nl80211_send_remain_on_chan_event(
nlmsg_free(msg);
}
-void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev,
- struct wireless_dev *wdev, u64 cookie,
- struct ieee80211_channel *chan,
- unsigned int duration, gfp_t gfp)
+void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie,
+ struct ieee80211_channel *chan,
+ unsigned int duration, gfp_t gfp)
{
+ struct wiphy *wiphy = wdev->wiphy;
+ struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+ trace_cfg80211_ready_on_channel(wdev, cookie, chan, duration);
nl80211_send_remain_on_chan_event(NL80211_CMD_REMAIN_ON_CHANNEL,
rdev, wdev, cookie, chan,
duration, gfp);
}
+EXPORT_SYMBOL(cfg80211_ready_on_channel);
-void nl80211_send_remain_on_channel_cancel(
- struct cfg80211_registered_device *rdev,
- struct wireless_dev *wdev,
- u64 cookie, struct ieee80211_channel *chan, gfp_t gfp)
+void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie,
+ struct ieee80211_channel *chan,
+ gfp_t gfp)
{
+ struct wiphy *wiphy = wdev->wiphy;
+ struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+ trace_cfg80211_ready_on_channel_expired(wdev, cookie, chan);
nl80211_send_remain_on_chan_event(NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL,
rdev, wdev, cookie, chan, 0, gfp);
}
+EXPORT_SYMBOL(cfg80211_remain_on_channel_expired);
-void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
- struct net_device *dev, const u8 *mac_addr,
- struct station_info *sinfo, gfp_t gfp)
+void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
+ struct station_info *sinfo, gfp_t gfp)
{
+ struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
+ struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
struct sk_buff *msg;
+ trace_cfg80211_new_sta(dev, mac_addr, sinfo);
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
if (!msg)
return;
@@ -9224,14 +9799,17 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
nl80211_mlme_mcgrp.id, gfp);
}
+EXPORT_SYMBOL(cfg80211_new_sta);
-void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev,
- struct net_device *dev, const u8 *mac_addr,
- gfp_t gfp)
+void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp)
{
+ struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
+ struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
struct sk_buff *msg;
void *hdr;
+ trace_cfg80211_del_sta(dev, mac_addr);
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
if (!msg)
return;
@@ -9256,12 +9834,14 @@ void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev,
genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
+EXPORT_SYMBOL(cfg80211_del_sta);
-void nl80211_send_conn_failed_event(struct cfg80211_registered_device *rdev,
- struct net_device *dev, const u8 *mac_addr,
- enum nl80211_connect_failed_reason reason,
- gfp_t gfp)
+void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
+ enum nl80211_connect_failed_reason reason,
+ gfp_t gfp)
{
+ struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
+ struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
struct sk_buff *msg;
void *hdr;
@@ -9290,6 +9870,7 @@ void nl80211_send_conn_failed_event(struct cfg80211_registered_device *rdev,
genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
+EXPORT_SYMBOL(cfg80211_conn_failed);
static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
const u8 *addr, gfp_t gfp)
@@ -9334,19 +9915,47 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
return true;
}
-bool nl80211_unexpected_frame(struct net_device *dev, const u8 *addr, gfp_t gfp)
+bool cfg80211_rx_spurious_frame(struct net_device *dev,
+ const u8 *addr, gfp_t gfp)
{
- return __nl80211_unexpected_frame(dev, NL80211_CMD_UNEXPECTED_FRAME,
- addr, gfp);
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ bool ret;
+
+ trace_cfg80211_rx_spurious_frame(dev, addr);
+
+ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
+ wdev->iftype != NL80211_IFTYPE_P2P_GO)) {
+ trace_cfg80211_return_bool(false);
+ return false;
+ }
+ ret = __nl80211_unexpected_frame(dev, NL80211_CMD_UNEXPECTED_FRAME,
+ addr, gfp);
+ trace_cfg80211_return_bool(ret);
+ return ret;
}
+EXPORT_SYMBOL(cfg80211_rx_spurious_frame);
-bool nl80211_unexpected_4addr_frame(struct net_device *dev,
- const u8 *addr, gfp_t gfp)
+bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev,
+ const u8 *addr, gfp_t gfp)
{
- return __nl80211_unexpected_frame(dev,
- NL80211_CMD_UNEXPECTED_4ADDR_FRAME,
- addr, gfp);
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ bool ret;
+
+ trace_cfg80211_rx_unexpected_4addr_frame(dev, addr);
+
+ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
+ wdev->iftype != NL80211_IFTYPE_P2P_GO &&
+ wdev->iftype != NL80211_IFTYPE_AP_VLAN)) {
+ trace_cfg80211_return_bool(false);
+ return false;
+ }
+ ret = __nl80211_unexpected_frame(dev,
+ NL80211_CMD_UNEXPECTED_4ADDR_FRAME,
+ addr, gfp);
+ trace_cfg80211_return_bool(ret);
+ return ret;
}
+EXPORT_SYMBOL(cfg80211_rx_unexpected_4addr_frame);
int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev, u32 nlportid,
@@ -9370,6 +9979,7 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
(netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX,
netdev->ifindex)) ||
+ nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)) ||
nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, freq) ||
(sig_dbm &&
nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, sig_dbm)) ||
@@ -9386,15 +9996,17 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
return -ENOBUFS;
}
-void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev,
- struct wireless_dev *wdev, u64 cookie,
- const u8 *buf, size_t len, bool ack,
- gfp_t gfp)
+void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
+ const u8 *buf, size_t len, bool ack, gfp_t gfp)
{
+ struct wiphy *wiphy = wdev->wiphy;
+ struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
struct net_device *netdev = wdev->netdev;
struct sk_buff *msg;
void *hdr;
+ trace_cfg80211_mgmt_tx_status(wdev, cookie, ack);
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
if (!msg)
return;
@@ -9408,6 +10020,7 @@ void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev,
if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
(netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX,
netdev->ifindex)) ||
+ nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)) ||
nla_put(msg, NL80211_ATTR_FRAME, len, buf) ||
nla_put_u64(msg, NL80211_ATTR_COOKIE, cookie) ||
(ack && nla_put_flag(msg, NL80211_ATTR_ACK)))
@@ -9422,17 +10035,21 @@ void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev,
genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
+EXPORT_SYMBOL(cfg80211_mgmt_tx_status);
-void
-nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
- struct net_device *netdev,
- enum nl80211_cqm_rssi_threshold_event rssi_event,
- gfp_t gfp)
+void cfg80211_cqm_rssi_notify(struct net_device *dev,
+ enum nl80211_cqm_rssi_threshold_event rssi_event,
+ gfp_t gfp)
{
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct wiphy *wiphy = wdev->wiphy;
+ struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
struct sk_buff *msg;
struct nlattr *pinfoattr;
void *hdr;
+ trace_cfg80211_cqm_rssi_notify(dev, rssi_event);
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
if (!msg)
return;
@@ -9444,7 +10061,7 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
}
if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
- nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex))
+ nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex))
goto nla_put_failure;
pinfoattr = nla_nest_start(msg, NL80211_ATTR_CQM);
@@ -9467,10 +10084,11 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
+EXPORT_SYMBOL(cfg80211_cqm_rssi_notify);
-void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, const u8 *bssid,
- const u8 *replay_ctr, gfp_t gfp)
+static void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev,
+ struct net_device *netdev, const u8 *bssid,
+ const u8 *replay_ctr, gfp_t gfp)
{
struct sk_buff *msg;
struct nlattr *rekey_attr;
@@ -9512,9 +10130,22 @@ void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev,
nlmsg_free(msg);
}
-void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, int index,
- const u8 *bssid, bool preauth, gfp_t gfp)
+void cfg80211_gtk_rekey_notify(struct net_device *dev, const u8 *bssid,
+ const u8 *replay_ctr, gfp_t gfp)
+{
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct wiphy *wiphy = wdev->wiphy;
+ struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+ trace_cfg80211_gtk_rekey_notify(dev, bssid);
+ nl80211_gtk_rekey_notify(rdev, dev, bssid, replay_ctr, gfp);
+}
+EXPORT_SYMBOL(cfg80211_gtk_rekey_notify);
+
+static void
+nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev,
+ struct net_device *netdev, int index,
+ const u8 *bssid, bool preauth, gfp_t gfp)
{
struct sk_buff *msg;
struct nlattr *attr;
@@ -9557,9 +10188,22 @@ void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev,
nlmsg_free(msg);
}
-void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
- struct net_device *netdev,
- struct cfg80211_chan_def *chandef, gfp_t gfp)
+void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index,
+ const u8 *bssid, bool preauth, gfp_t gfp)
+{
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct wiphy *wiphy = wdev->wiphy;
+ struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+ trace_cfg80211_pmksa_candidate_notify(dev, index, bssid, preauth);
+ nl80211_pmksa_candidate_notify(rdev, dev, index, bssid, preauth, gfp);
+}
+EXPORT_SYMBOL(cfg80211_pmksa_candidate_notify);
+
+static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
+ struct net_device *netdev,
+ struct cfg80211_chan_def *chandef,
+ gfp_t gfp)
{
struct sk_buff *msg;
void *hdr;
@@ -9591,11 +10235,36 @@ void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
nlmsg_free(msg);
}
-void
-nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, const u8 *peer,
- u32 num_packets, u32 rate, u32 intvl, gfp_t gfp)
+void cfg80211_ch_switch_notify(struct net_device *dev,
+ struct cfg80211_chan_def *chandef)
+{
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct wiphy *wiphy = wdev->wiphy;
+ struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+ trace_cfg80211_ch_switch_notify(dev, chandef);
+
+ wdev_lock(wdev);
+
+ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
+ wdev->iftype != NL80211_IFTYPE_P2P_GO))
+ goto out;
+
+ wdev->channel = chandef->chan;
+ nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL);
+out:
+ wdev_unlock(wdev);
+ return;
+}
+EXPORT_SYMBOL(cfg80211_ch_switch_notify);
+
+void cfg80211_cqm_txe_notify(struct net_device *dev,
+ const u8 *peer, u32 num_packets,
+ u32 rate, u32 intvl, gfp_t gfp)
{
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct wiphy *wiphy = wdev->wiphy;
+ struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
struct sk_buff *msg;
struct nlattr *pinfoattr;
void *hdr;
@@ -9611,7 +10280,7 @@ nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev,
}
if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
- nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) ||
+ nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, peer))
goto nla_put_failure;
@@ -9640,6 +10309,7 @@ nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev,
genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
+EXPORT_SYMBOL(cfg80211_cqm_txe_notify);
void
nl80211_radar_notify(struct cfg80211_registered_device *rdev,
@@ -9692,15 +10362,18 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev,
nlmsg_free(msg);
}
-void
-nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, const u8 *peer,
- u32 num_packets, gfp_t gfp)
+void cfg80211_cqm_pktloss_notify(struct net_device *dev,
+ const u8 *peer, u32 num_packets, gfp_t gfp)
{
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct wiphy *wiphy = wdev->wiphy;
+ struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
struct sk_buff *msg;
struct nlattr *pinfoattr;
void *hdr;
+ trace_cfg80211_cqm_pktloss_notify(dev, peer, num_packets);
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
if (!msg)
return;
@@ -9712,7 +10385,7 @@ nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev,
}
if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
- nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) ||
+ nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, peer))
goto nla_put_failure;
@@ -9735,6 +10408,7 @@ nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev,
genlmsg_cancel(msg, hdr);
nlmsg_free(msg);
}
+EXPORT_SYMBOL(cfg80211_cqm_pktloss_notify);
void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
u64 cookie, bool acked, gfp_t gfp)
@@ -10021,6 +10695,89 @@ static struct notifier_block nl80211_netlink_notifier = {
.notifier_call = nl80211_netlink_notify,
};
+void cfg80211_ft_event(struct net_device *netdev,
+ struct cfg80211_ft_event_params *ft_event)
+{
+ struct wiphy *wiphy = netdev->ieee80211_ptr->wiphy;
+ struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct sk_buff *msg;
+ void *hdr;
+ int err;
+
+ trace_cfg80211_ft_event(wiphy, netdev, ft_event);
+
+ if (!ft_event->target_ap)
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FT_EVENT);
+ if (!hdr) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+ nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
+ nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, ft_event->target_ap);
+ if (ft_event->ies)
+ nla_put(msg, NL80211_ATTR_IE, ft_event->ies_len, ft_event->ies);
+ if (ft_event->ric_ies)
+ nla_put(msg, NL80211_ATTR_IE_RIC, ft_event->ric_ies_len,
+ ft_event->ric_ies);
+
+ err = genlmsg_end(msg, hdr);
+ if (err < 0) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+ nl80211_mlme_mcgrp.id, GFP_KERNEL);
+}
+EXPORT_SYMBOL(cfg80211_ft_event);
+
+void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp)
+{
+ struct cfg80211_registered_device *rdev;
+ struct sk_buff *msg;
+ void *hdr;
+ u32 nlportid;
+
+ rdev = wiphy_to_dev(wdev->wiphy);
+ if (!rdev->crit_proto_nlportid)
+ return;
+
+ nlportid = rdev->crit_proto_nlportid;
+ rdev->crit_proto_nlportid = 0;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+ if (!msg)
+ return;
+
+ hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_CRIT_PROTOCOL_STOP);
+ if (!hdr)
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
+ nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid);
+ return;
+
+ nla_put_failure:
+ if (hdr)
+ genlmsg_cancel(msg, hdr);
+ nlmsg_free(msg);
+
+}
+EXPORT_SYMBOL(cfg80211_crit_proto_stopped);
+
/* initialisation/exit functions */
int nl80211_init(void)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index b061da4919e1..a4073e808c13 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -29,12 +29,6 @@ void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
struct net_device *netdev,
const u8 *buf, size_t len, gfp_t gfp);
-void nl80211_send_unprot_deauth(struct cfg80211_registered_device *rdev,
- struct net_device *netdev,
- const u8 *buf, size_t len, gfp_t gfp);
-void nl80211_send_unprot_disassoc(struct cfg80211_registered_device *rdev,
- struct net_device *netdev,
- const u8 *buf, size_t len, gfp_t gfp);
void nl80211_send_auth_timeout(struct cfg80211_registered_device *rdev,
struct net_device *netdev,
const u8 *addr, gfp_t gfp);
@@ -54,10 +48,6 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
struct net_device *netdev, u16 reason,
const u8 *ie, size_t ie_len, bool from_ap);
-void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev,
- struct net_device *netdev,
- const u8 *macaddr, const u8* ie, u8 ie_len,
- gfp_t gfp);
void
nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
struct net_device *netdev, const u8 *addr,
@@ -73,41 +63,10 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
struct net_device *netdev, const u8 *bssid,
gfp_t gfp);
-void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev,
- struct wireless_dev *wdev, u64 cookie,
- struct ieee80211_channel *chan,
- unsigned int duration, gfp_t gfp);
-void nl80211_send_remain_on_channel_cancel(
- struct cfg80211_registered_device *rdev,
- struct wireless_dev *wdev,
- u64 cookie, struct ieee80211_channel *chan, gfp_t gfp);
-
-void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
- struct net_device *dev, const u8 *mac_addr,
- struct station_info *sinfo, gfp_t gfp);
-void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev,
- struct net_device *dev, const u8 *mac_addr,
- gfp_t gfp);
-
-void nl80211_send_conn_failed_event(struct cfg80211_registered_device *rdev,
- struct net_device *dev, const u8 *mac_addr,
- enum nl80211_connect_failed_reason reason,
- gfp_t gfp);
-
int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev, u32 nlpid,
int freq, int sig_dbm,
const u8 *buf, size_t len, gfp_t gfp);
-void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev,
- struct wireless_dev *wdev, u64 cookie,
- const u8 *buf, size_t len, bool ack,
- gfp_t gfp);
-
-void
-nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
- struct net_device *netdev,
- enum nl80211_cqm_rssi_threshold_event rssi_event,
- gfp_t gfp);
void
nl80211_radar_notify(struct cfg80211_registered_device *rdev,
@@ -115,31 +74,4 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev,
enum nl80211_radar_event event,
struct net_device *netdev, gfp_t gfp);
-void
-nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, const u8 *peer,
- u32 num_packets, gfp_t gfp);
-
-void
-nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, const u8 *peer,
- u32 num_packets, u32 rate, u32 intvl, gfp_t gfp);
-
-void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, const u8 *bssid,
- const u8 *replay_ctr, gfp_t gfp);
-
-void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, int index,
- const u8 *bssid, bool preauth, gfp_t gfp);
-
-void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
- struct net_device *dev,
- struct cfg80211_chan_def *chandef, gfp_t gfp);
-
-bool nl80211_unexpected_frame(struct net_device *dev,
- const u8 *addr, gfp_t gfp);
-bool nl80211_unexpected_4addr_frame(struct net_device *dev,
- const u8 *addr, gfp_t gfp);
-
#endif /* __NET_WIRELESS_NL80211_H */
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 422d38291d66..9f15f0ac824d 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -6,11 +6,12 @@
#include "core.h"
#include "trace.h"
-static inline int rdev_suspend(struct cfg80211_registered_device *rdev)
+static inline int rdev_suspend(struct cfg80211_registered_device *rdev,
+ struct cfg80211_wowlan *wowlan)
{
int ret;
- trace_rdev_suspend(&rdev->wiphy, rdev->wowlan);
- ret = rdev->ops->suspend(&rdev->wiphy, rdev->wowlan);
+ trace_rdev_suspend(&rdev->wiphy, wowlan);
+ ret = rdev->ops->suspend(&rdev->wiphy, wowlan);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
@@ -874,7 +875,7 @@ static inline void rdev_stop_p2p_device(struct cfg80211_registered_device *rdev,
trace_rdev_stop_p2p_device(&rdev->wiphy, wdev);
rdev->ops->stop_p2p_device(&rdev->wiphy, wdev);
trace_rdev_return_void(&rdev->wiphy);
-}
+}
static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev,
struct net_device *dev,
@@ -887,4 +888,39 @@ static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev,
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
+
+static inline int rdev_update_ft_ies(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
+ struct cfg80211_update_ft_ies_params *ftie)
+{
+ int ret;
+
+ trace_rdev_update_ft_ies(&rdev->wiphy, dev, ftie);
+ ret = rdev->ops->update_ft_ies(&rdev->wiphy, dev, ftie);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+ return ret;
+}
+
+static inline int rdev_crit_proto_start(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev,
+ enum nl80211_crit_proto_id protocol,
+ u16 duration)
+{
+ int ret;
+
+ trace_rdev_crit_proto_start(&rdev->wiphy, wdev, protocol, duration);
+ ret = rdev->ops->crit_proto_start(&rdev->wiphy, wdev,
+ protocol, duration);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+ return ret;
+}
+
+static inline void rdev_crit_proto_stop(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev)
+{
+ trace_rdev_crit_proto_stop(&rdev->wiphy, wdev);
+ rdev->ops->crit_proto_stop(&rdev->wiphy, wdev);
+ trace_rdev_return_void(&rdev->wiphy);
+}
+
#endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 98532c00242d..cc35fbaa4578 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -184,14 +184,14 @@ static const struct ieee80211_regdomain world_regdom = {
NL80211_RRF_NO_IBSS |
NL80211_RRF_NO_OFDM),
/* IEEE 802.11a, channel 36..48 */
- REG_RULE(5180-10, 5240+10, 40, 6, 20,
+ REG_RULE(5180-10, 5240+10, 80, 6, 20,
NL80211_RRF_PASSIVE_SCAN |
NL80211_RRF_NO_IBSS),
- /* NB: 5260 MHz - 5700 MHz requies DFS */
+ /* NB: 5260 MHz - 5700 MHz requires DFS */
/* IEEE 802.11a, channel 149..165 */
- REG_RULE(5745-10, 5825+10, 40, 6, 20,
+ REG_RULE(5745-10, 5825+10, 80, 6, 20,
NL80211_RRF_PASSIVE_SCAN |
NL80211_RRF_NO_IBSS),
@@ -855,7 +855,7 @@ static void handle_channel(struct wiphy *wiphy,
return;
REG_DBG_PRINT("Disabling freq %d MHz\n", chan->center_freq);
- chan->flags = IEEE80211_CHAN_DISABLED;
+ chan->flags |= IEEE80211_CHAN_DISABLED;
return;
}
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 482c70e70127..3ed35c345cae 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -160,7 +160,7 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
{
struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
struct cfg80211_connect_params *params;
- const u8 *prev_bssid = NULL;
+ struct cfg80211_assoc_request req = {};
int err;
ASSERT_WDEV_LOCK(wdev);
@@ -187,16 +187,20 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
BUG_ON(!rdev->ops->assoc);
wdev->conn->state = CFG80211_CONN_ASSOCIATING;
if (wdev->conn->prev_bssid_valid)
- prev_bssid = wdev->conn->prev_bssid;
- err = __cfg80211_mlme_assoc(rdev, wdev->netdev,
- params->channel, params->bssid,
- prev_bssid,
- params->ssid, params->ssid_len,
- params->ie, params->ie_len,
- params->mfp != NL80211_MFP_NO,
- &params->crypto,
- params->flags, &params->ht_capa,
- &params->ht_capa_mask);
+ req.prev_bssid = wdev->conn->prev_bssid;
+ req.ie = params->ie;
+ req.ie_len = params->ie_len;
+ req.use_mfp = params->mfp != NL80211_MFP_NO;
+ req.crypto = params->crypto;
+ req.flags = params->flags;
+ req.ht_capa = params->ht_capa;
+ req.ht_capa_mask = params->ht_capa_mask;
+ req.vht_capa = params->vht_capa;
+ req.vht_capa_mask = params->vht_capa_mask;
+
+ err = __cfg80211_mlme_assoc(rdev, wdev->netdev, params->channel,
+ params->bssid, params->ssid,
+ params->ssid_len, &req);
if (err)
__cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid,
NULL, 0,
@@ -227,12 +231,15 @@ void cfg80211_conn_work(struct work_struct *work)
mutex_lock(&rdev->sched_scan_mtx);
list_for_each_entry(wdev, &rdev->wdev_list, list) {
+ if (!wdev->netdev)
+ continue;
+
wdev_lock(wdev);
if (!netif_running(wdev->netdev)) {
wdev_unlock(wdev);
continue;
}
- if (wdev->sme_state != CFG80211_SME_CONNECTING) {
+ if (wdev->sme_state != CFG80211_SME_CONNECTING || !wdev->conn) {
wdev_unlock(wdev);
continue;
}
@@ -957,7 +964,7 @@ int __cfg80211_disconnect(struct cfg80211_registered_device *rdev,
/* was it connected by userspace SME? */
if (!wdev->conn) {
cfg80211_mlme_down(rdev, dev);
- return 0;
+ goto disconnect;
}
if (wdev->sme_state == CFG80211_SME_CONNECTING &&
@@ -983,6 +990,7 @@ int __cfg80211_disconnect(struct cfg80211_registered_device *rdev,
return err;
}
+ disconnect:
if (wdev->sme_state == CFG80211_SME_CONNECTED)
__cfg80211_disconnected(dev, NULL, 0, 0, false);
else if (wdev->sme_state == CFG80211_SME_CONNECTING)
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 238ee49b3868..8f28b9f798d8 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -83,6 +83,14 @@ static int wiphy_uevent(struct device *dev, struct kobj_uevent_env *env)
return 0;
}
+static void cfg80211_leave_all(struct cfg80211_registered_device *rdev)
+{
+ struct wireless_dev *wdev;
+
+ list_for_each_entry(wdev, &rdev->wdev_list, list)
+ cfg80211_leave(rdev, wdev);
+}
+
static int wiphy_suspend(struct device *dev, pm_message_t state)
{
struct cfg80211_registered_device *rdev = dev_to_rdev(dev);
@@ -90,12 +98,19 @@ static int wiphy_suspend(struct device *dev, pm_message_t state)
rdev->suspend_at = get_seconds();
- if (rdev->ops->suspend) {
- rtnl_lock();
- if (rdev->wiphy.registered)
- ret = rdev_suspend(rdev);
- rtnl_unlock();
+ rtnl_lock();
+ if (rdev->wiphy.registered) {
+ if (!rdev->wowlan)
+ cfg80211_leave_all(rdev);
+ if (rdev->ops->suspend)
+ ret = rdev_suspend(rdev, rdev->wowlan);
+ if (ret == 1) {
+ /* Driver refuse to configure wowlan */
+ cfg80211_leave_all(rdev);
+ ret = rdev_suspend(rdev, NULL);
+ }
}
+ rtnl_unlock();
return ret;
}
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 7586de77a2f8..5755bc14abbd 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1786,6 +1786,61 @@ TRACE_EVENT(rdev_set_mac_acl,
WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->acl_policy)
);
+TRACE_EVENT(rdev_update_ft_ies,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ struct cfg80211_update_ft_ies_params *ftie),
+ TP_ARGS(wiphy, netdev, ftie),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ __field(u16, md)
+ __dynamic_array(u8, ie, ftie->ie_len)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ __entry->md = ftie->md;
+ memcpy(__get_dynamic_array(ie), ftie->ie, ftie->ie_len);
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", md: 0x%x",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->md)
+);
+
+TRACE_EVENT(rdev_crit_proto_start,
+ TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
+ enum nl80211_crit_proto_id protocol, u16 duration),
+ TP_ARGS(wiphy, wdev, protocol, duration),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ WDEV_ENTRY
+ __field(u16, proto)
+ __field(u16, duration)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ WDEV_ASSIGN;
+ __entry->proto = protocol;
+ __entry->duration = duration;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", proto=%x, duration=%u",
+ WIPHY_PR_ARG, WDEV_PR_ARG, __entry->proto, __entry->duration)
+);
+
+TRACE_EVENT(rdev_crit_proto_stop,
+ TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
+ TP_ARGS(wiphy, wdev),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ WDEV_ENTRY
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ WDEV_ASSIGN;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT,
+ WIPHY_PR_ARG, WDEV_PR_ARG)
+);
+
/*************************************************************
* cfg80211 exported functions traces *
*************************************************************/
@@ -2386,6 +2441,7 @@ TRACE_EVENT(cfg80211_report_wowlan_wakeup,
TP_STRUCT__entry(
WIPHY_ENTRY
WDEV_ENTRY
+ __field(bool, non_wireless)
__field(bool, disconnect)
__field(bool, magic_pkt)
__field(bool, gtk_rekey_failure)
@@ -2394,26 +2450,54 @@ TRACE_EVENT(cfg80211_report_wowlan_wakeup,
__field(bool, rfkill_release)
__field(s32, pattern_idx)
__field(u32, packet_len)
- __dynamic_array(u8, packet, wakeup->packet_present_len)
+ __dynamic_array(u8, packet,
+ wakeup ? wakeup->packet_present_len : 0)
),
TP_fast_assign(
WIPHY_ASSIGN;
WDEV_ASSIGN;
- __entry->disconnect = wakeup->disconnect;
- __entry->magic_pkt = wakeup->magic_pkt;
- __entry->gtk_rekey_failure = wakeup->gtk_rekey_failure;
- __entry->eap_identity_req = wakeup->eap_identity_req;
- __entry->four_way_handshake = wakeup->four_way_handshake;
- __entry->rfkill_release = wakeup->rfkill_release;
- __entry->pattern_idx = wakeup->pattern_idx;
- __entry->packet_len = wakeup->packet_len;
- if (wakeup->packet && wakeup->packet_present_len)
+ __entry->non_wireless = !wakeup;
+ __entry->disconnect = wakeup ? wakeup->disconnect : false;
+ __entry->magic_pkt = wakeup ? wakeup->magic_pkt : false;
+ __entry->gtk_rekey_failure = wakeup ? wakeup->gtk_rekey_failure : false;
+ __entry->eap_identity_req = wakeup ? wakeup->eap_identity_req : false;
+ __entry->four_way_handshake = wakeup ? wakeup->four_way_handshake : false;
+ __entry->rfkill_release = wakeup ? wakeup->rfkill_release : false;
+ __entry->pattern_idx = wakeup ? wakeup->pattern_idx : false;
+ __entry->packet_len = wakeup ? wakeup->packet_len : false;
+ if (wakeup && wakeup->packet && wakeup->packet_present_len)
memcpy(__get_dynamic_array(packet), wakeup->packet,
wakeup->packet_present_len);
),
TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG)
);
+TRACE_EVENT(cfg80211_ft_event,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ struct cfg80211_ft_event_params *ft_event),
+ TP_ARGS(wiphy, netdev, ft_event),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ __dynamic_array(u8, ies, ft_event->ies_len)
+ MAC_ENTRY(target_ap)
+ __dynamic_array(u8, ric_ies, ft_event->ric_ies_len)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ if (ft_event->ies)
+ memcpy(__get_dynamic_array(ies), ft_event->ies,
+ ft_event->ies_len);
+ MAC_ASSIGN(target_ap, ft_event->target_ap);
+ if (ft_event->ric_ies)
+ memcpy(__get_dynamic_array(ric_ies), ft_event->ric_ies,
+ ft_event->ric_ies_len);
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", target_ap: " MAC_PR_FMT,
+ WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(target_ap))
+);
+
#endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */
#undef TRACE_INCLUDE_PATH
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 37a56ee1e1ed..f5ad4d94ba88 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -511,7 +511,7 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
encaps_data = bridge_tunnel_header;
encaps_len = sizeof(bridge_tunnel_header);
skip_header_bytes -= 2;
- } else if (ethertype > 0x600) {
+ } else if (ethertype >= ETH_P_802_3_MIN) {
encaps_data = rfc1042_header;
encaps_len = sizeof(rfc1042_header);
skip_header_bytes -= 2;
@@ -1155,6 +1155,26 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len,
}
EXPORT_SYMBOL(cfg80211_get_p2p_attr);
+bool ieee80211_operating_class_to_band(u8 operating_class,
+ enum ieee80211_band *band)
+{
+ switch (operating_class) {
+ case 112:
+ case 115 ... 127:
+ *band = IEEE80211_BAND_5GHZ;
+ return true;
+ case 81:
+ case 82:
+ case 83:
+ case 84:
+ *band = IEEE80211_BAND_2GHZ;
+ return true;
+ }
+
+ return false;
+}
+EXPORT_SYMBOL(ieee80211_operating_class_to_band);
+
int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
u32 beacon_int)
{
@@ -1258,12 +1278,12 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
list_for_each_entry(wdev_iter, &rdev->wdev_list, list) {
if (wdev_iter == wdev)
continue;
- if (wdev_iter->netdev) {
- if (!netif_running(wdev_iter->netdev))
- continue;
- } else if (wdev_iter->iftype == NL80211_IFTYPE_P2P_DEVICE) {
+ if (wdev_iter->iftype == NL80211_IFTYPE_P2P_DEVICE) {
if (!wdev_iter->p2p_started)
continue;
+ } else if (wdev_iter->netdev) {
+ if (!netif_running(wdev_iter->netdev))
+ continue;
} else {
WARN_ON(1);
}
diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c
index 2ffde4631ae2..0917f047f2cf 100644
--- a/net/x25/x25_proc.c
+++ b/net/x25/x25_proc.c
@@ -187,7 +187,6 @@ static int x25_seq_forward_open(struct inode *inode, struct file *file)
}
static const struct file_operations x25_seq_socket_fops = {
- .owner = THIS_MODULE,
.open = x25_seq_socket_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -195,7 +194,6 @@ static const struct file_operations x25_seq_socket_fops = {
};
static const struct file_operations x25_seq_route_fops = {
- .owner = THIS_MODULE,
.open = x25_seq_route_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -203,55 +201,38 @@ static const struct file_operations x25_seq_route_fops = {
};
static const struct file_operations x25_seq_forward_fops = {
- .owner = THIS_MODULE,
.open = x25_seq_forward_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
-static struct proc_dir_entry *x25_proc_dir;
-
int __init x25_proc_init(void)
{
- struct proc_dir_entry *p;
- int rc = -ENOMEM;
+ if (!proc_mkdir("x25", init_net.proc_net))
+ return -ENOMEM;
- x25_proc_dir = proc_mkdir("x25", init_net.proc_net);
- if (!x25_proc_dir)
+ if (!proc_create("x25/route", S_IRUGO, init_net.proc_net,
+ &x25_seq_route_fops))
goto out;
- p = proc_create("route", S_IRUGO, x25_proc_dir, &x25_seq_route_fops);
- if (!p)
- goto out_route;
-
- p = proc_create("socket", S_IRUGO, x25_proc_dir, &x25_seq_socket_fops);
- if (!p)
- goto out_socket;
+ if (!proc_create("x25/socket", S_IRUGO, init_net.proc_net,
+ &x25_seq_socket_fops))
+ goto out;
- p = proc_create("forward", S_IRUGO, x25_proc_dir,
- &x25_seq_forward_fops);
- if (!p)
- goto out_forward;
- rc = 0;
+ if (!proc_create("x25/forward", S_IRUGO, init_net.proc_net,
+ &x25_seq_forward_fops))
+ goto out;
+ return 0;
out:
- return rc;
-out_forward:
- remove_proc_entry("socket", x25_proc_dir);
-out_socket:
- remove_proc_entry("route", x25_proc_dir);
-out_route:
- remove_proc_entry("x25", init_net.proc_net);
- goto out;
+ remove_proc_subtree("x25", init_net.proc_net);
+ return -ENOMEM;
}
void __exit x25_proc_exit(void)
{
- remove_proc_entry("forward", x25_proc_dir);
- remove_proc_entry("route", x25_proc_dir);
- remove_proc_entry("socket", x25_proc_dir);
- remove_proc_entry("x25", init_net.proc_net);
+ remove_proc_subtree("x25", init_net.proc_net);
}
#else /* CONFIG_PROC_FS */
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 6fb9d00a75dc..ab4ef72f0b1d 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -311,6 +311,19 @@ static struct xfrm_algo_desc aalg_list[] = {
.sadb_alg_maxbits = 128
}
},
+{
+ /* rfc4494 */
+ .name = "cmac(aes)",
+
+ .uinfo = {
+ .auth = {
+ .icv_truncbits = 96,
+ .icv_fullbits = 128,
+ }
+ },
+
+ .pfkey_supported = 0,
+},
};
static struct xfrm_algo_desc ealg_list[] = {
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index bcfda8921b5b..0cf003dfa8fc 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -64,6 +64,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
if (unlikely(x->km.state != XFRM_STATE_VALID)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID);
+ err = -EINVAL;
goto error;
}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 167c67d46c6a..ea970b8002a2 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1037,6 +1037,24 @@ __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir
return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
}
+static int flow_to_policy_dir(int dir)
+{
+ if (XFRM_POLICY_IN == FLOW_DIR_IN &&
+ XFRM_POLICY_OUT == FLOW_DIR_OUT &&
+ XFRM_POLICY_FWD == FLOW_DIR_FWD)
+ return dir;
+
+ switch (dir) {
+ default:
+ case FLOW_DIR_IN:
+ return XFRM_POLICY_IN;
+ case FLOW_DIR_OUT:
+ return XFRM_POLICY_OUT;
+ case FLOW_DIR_FWD:
+ return XFRM_POLICY_FWD;
+ }
+}
+
static struct flow_cache_object *
xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
u8 dir, struct flow_cache_object *old_obj, void *ctx)
@@ -1046,7 +1064,7 @@ xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
if (old_obj)
xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
- pol = __xfrm_policy_lookup(net, fl, family, dir);
+ pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir));
if (IS_ERR_OR_NULL(pol))
return ERR_CAST(pol);
@@ -1932,7 +1950,8 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
* previous cache entry */
if (xdst == NULL) {
num_pols = 1;
- pols[0] = __xfrm_policy_lookup(net, fl, family, dir);
+ pols[0] = __xfrm_policy_lookup(net, fl, family,
+ flow_to_policy_dir(dir));
err = xfrm_expand_policies(fl, family, pols,
&num_pols, &num_xfrms);
if (err < 0)
@@ -2538,11 +2557,12 @@ static void __xfrm_garbage_collect(struct net *net)
}
}
-static void xfrm_garbage_collect(struct net *net)
+void xfrm_garbage_collect(struct net *net)
{
flow_cache_flush();
__xfrm_garbage_collect(net);
}
+EXPORT_SYMBOL(xfrm_garbage_collect);
static void xfrm_garbage_collect_deferred(struct net *net)
{
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 2c341bdaf47c..78f66fa92449 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1187,6 +1187,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
goto error;
x->props.flags = orig->props.flags;
+ x->props.extra_flags = orig->props.extra_flags;
x->curlft.add_time = orig->curlft.add_time;
x->km.state = orig->km.state;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index fbd9e6cd0fd7..3f565e495ac6 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -515,6 +515,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
copy_from_user_state(x, p);
+ if (attrs[XFRMA_SA_EXTRA_FLAGS])
+ x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]);
+
if ((err = attach_aead(&x->aead, &x->props.ealgo,
attrs[XFRMA_ALG_AEAD])))
goto error;
@@ -779,6 +782,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
copy_to_user_state(x, p);
+ if (x->props.extra_flags) {
+ ret = nla_put_u32(skb, XFRMA_SA_EXTRA_FLAGS,
+ x->props.extra_flags);
+ if (ret)
+ goto out;
+ }
+
if (x->coaddr) {
ret = nla_put(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr);
if (ret)
@@ -1671,6 +1681,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
out:
xfrm_pol_put(xp);
+ if (delete && err == 0)
+ xfrm_garbage_collect(net);
return err;
}
@@ -2302,9 +2314,10 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_MARK] = { .len = sizeof(struct xfrm_mark) },
[XFRMA_TFCPAD] = { .type = NLA_U32 },
[XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) },
+ [XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 },
};
-static struct xfrm_link {
+static const struct xfrm_link {
int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **);
int (*dump)(struct sk_buff *, struct netlink_callback *);
int (*done)(struct netlink_callback *);
@@ -2338,7 +2351,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct nlattr *attrs[XFRMA_MAX+1];
- struct xfrm_link *link;
+ const struct xfrm_link *link;
int type, err;
type = nlh->nlmsg_type;
@@ -2495,6 +2508,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
x->security->ctx_len);
if (x->coaddr)
l += nla_total_size(sizeof(*x->coaddr));
+ if (x->props.extra_flags)
+ l += nla_total_size(sizeof(x->props.extra_flags));
/* Must count x->lastused as it may become non-zero behind our back. */
l += nla_total_size(sizeof(u64));